sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import ( 10 apply_index_offset, 11 count_params, 12 ensure_collection, 13 ensure_list, 14 seq_get, 15) 16from sqlglot.tokens import Token, Tokenizer, TokenType 17from sqlglot.trie import in_trie, new_trie 18 19logger = logging.getLogger("sqlglot") 20 21 22def parse_var_map(args: t.Sequence) -> exp.Expression: 23 keys = [] 24 values = [] 25 for i in range(0, len(args), 2): 26 keys.append(args[i]) 27 values.append(args[i + 1]) 28 return exp.VarMap( 29 keys=exp.Array(expressions=keys), 30 values=exp.Array(expressions=values), 31 ) 32 33 34def parse_like(args): 35 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 36 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 37 38 39def binary_range_parser( 40 expr_type: t.Type[exp.Expression], 41) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 42 return lambda self, this: self._parse_escape( 43 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 44 ) 45 46 47class _Parser(type): 48 def __new__(cls, clsname, bases, attrs): 49 klass = super().__new__(cls, clsname, bases, attrs) 50 klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 51 klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS) 52 53 return klass 54 55 56class Parser(metaclass=_Parser): 57 """ 58 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 59 a parsed syntax tree. 60 61 Args: 62 error_level: the desired error level. 63 Default: ErrorLevel.RAISE 64 error_message_context: determines the amount of context to capture from a 65 query string when displaying the error message (in number of characters). 66 Default: 50. 67 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 68 Default: 0 69 alias_post_tablesample: If the table alias comes after tablesample. 70 Default: False 71 max_errors: Maximum number of error messages to include in a raised ParseError. 72 This is only relevant if error_level is ErrorLevel.RAISE. 73 Default: 3 74 null_ordering: Indicates the default null ordering method to use if not explicitly set. 75 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 76 Default: "nulls_are_small" 77 """ 78 79 FUNCTIONS: t.Dict[str, t.Callable] = { 80 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 81 "DATE_TO_DATE_STR": lambda args: exp.Cast( 82 this=seq_get(args, 0), 83 to=exp.DataType(this=exp.DataType.Type.TEXT), 84 ), 85 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 86 "IFNULL": exp.Coalesce.from_arg_list, 87 "LIKE": parse_like, 88 "TIME_TO_TIME_STR": lambda args: exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 93 this=exp.Cast( 94 this=seq_get(args, 0), 95 to=exp.DataType(this=exp.DataType.Type.TEXT), 96 ), 97 start=exp.Literal.number(1), 98 length=exp.Literal.number(10), 99 ), 100 "VAR_MAP": parse_var_map, 101 } 102 103 NO_PAREN_FUNCTIONS = { 104 TokenType.CURRENT_DATE: exp.CurrentDate, 105 TokenType.CURRENT_DATETIME: exp.CurrentDate, 106 TokenType.CURRENT_TIME: exp.CurrentTime, 107 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.MAP, 113 TokenType.STRUCT, 114 TokenType.NULLABLE, 115 } 116 117 TYPE_TOKENS = { 118 TokenType.BIT, 119 TokenType.BOOLEAN, 120 TokenType.TINYINT, 121 TokenType.SMALLINT, 122 TokenType.INT, 123 TokenType.BIGINT, 124 TokenType.FLOAT, 125 TokenType.DOUBLE, 126 TokenType.CHAR, 127 TokenType.NCHAR, 128 TokenType.VARCHAR, 129 TokenType.NVARCHAR, 130 TokenType.TEXT, 131 TokenType.MEDIUMTEXT, 132 TokenType.LONGTEXT, 133 TokenType.MEDIUMBLOB, 134 TokenType.LONGBLOB, 135 TokenType.BINARY, 136 TokenType.VARBINARY, 137 TokenType.JSON, 138 TokenType.JSONB, 139 TokenType.INTERVAL, 140 TokenType.TIME, 141 TokenType.TIMESTAMP, 142 TokenType.TIMESTAMPTZ, 143 TokenType.TIMESTAMPLTZ, 144 TokenType.DATETIME, 145 TokenType.DATE, 146 TokenType.DECIMAL, 147 TokenType.UUID, 148 TokenType.GEOGRAPHY, 149 TokenType.GEOMETRY, 150 TokenType.HLLSKETCH, 151 TokenType.HSTORE, 152 TokenType.PSEUDO_TYPE, 153 TokenType.SUPER, 154 TokenType.SERIAL, 155 TokenType.SMALLSERIAL, 156 TokenType.BIGSERIAL, 157 TokenType.XML, 158 TokenType.UNIQUEIDENTIFIER, 159 TokenType.MONEY, 160 TokenType.SMALLMONEY, 161 TokenType.ROWVERSION, 162 TokenType.IMAGE, 163 TokenType.VARIANT, 164 TokenType.OBJECT, 165 TokenType.INET, 166 *NESTED_TYPE_TOKENS, 167 } 168 169 SUBQUERY_PREDICATES = { 170 TokenType.ANY: exp.Any, 171 TokenType.ALL: exp.All, 172 TokenType.EXISTS: exp.Exists, 173 TokenType.SOME: exp.Any, 174 } 175 176 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 177 178 DB_CREATABLES = { 179 TokenType.DATABASE, 180 TokenType.SCHEMA, 181 TokenType.TABLE, 182 TokenType.VIEW, 183 } 184 185 CREATABLES = { 186 TokenType.COLUMN, 187 TokenType.FUNCTION, 188 TokenType.INDEX, 189 TokenType.PROCEDURE, 190 *DB_CREATABLES, 191 } 192 193 ID_VAR_TOKENS = { 194 TokenType.VAR, 195 TokenType.ANTI, 196 TokenType.APPLY, 197 TokenType.AUTO_INCREMENT, 198 TokenType.BEGIN, 199 TokenType.BOTH, 200 TokenType.BUCKET, 201 TokenType.CACHE, 202 TokenType.CASCADE, 203 TokenType.COLLATE, 204 TokenType.COMMAND, 205 TokenType.COMMENT, 206 TokenType.COMMIT, 207 TokenType.COMPOUND, 208 TokenType.CONSTRAINT, 209 TokenType.DEFAULT, 210 TokenType.DELETE, 211 TokenType.DESCRIBE, 212 TokenType.DIV, 213 TokenType.END, 214 TokenType.EXECUTE, 215 TokenType.ESCAPE, 216 TokenType.FALSE, 217 TokenType.FIRST, 218 TokenType.FILTER, 219 TokenType.FOLLOWING, 220 TokenType.FORMAT, 221 TokenType.FULL, 222 TokenType.IF, 223 TokenType.ISNULL, 224 TokenType.INTERVAL, 225 TokenType.LAZY, 226 TokenType.LEADING, 227 TokenType.LEFT, 228 TokenType.LOCAL, 229 TokenType.MATERIALIZED, 230 TokenType.MERGE, 231 TokenType.NATURAL, 232 TokenType.NEXT, 233 TokenType.OFFSET, 234 TokenType.ONLY, 235 TokenType.OPTIONS, 236 TokenType.ORDINALITY, 237 TokenType.PARTITION, 238 TokenType.PERCENT, 239 TokenType.PIVOT, 240 TokenType.PRAGMA, 241 TokenType.PRECEDING, 242 TokenType.RANGE, 243 TokenType.REFERENCES, 244 TokenType.RIGHT, 245 TokenType.ROW, 246 TokenType.ROWS, 247 TokenType.SEED, 248 TokenType.SEMI, 249 TokenType.SET, 250 TokenType.SHOW, 251 TokenType.SORTKEY, 252 TokenType.TEMPORARY, 253 TokenType.TOP, 254 TokenType.TRAILING, 255 TokenType.TRUE, 256 TokenType.UNBOUNDED, 257 TokenType.UNIQUE, 258 TokenType.UNLOGGED, 259 TokenType.UNPIVOT, 260 TokenType.VOLATILE, 261 TokenType.WINDOW, 262 *CREATABLES, 263 *SUBQUERY_PREDICATES, 264 *TYPE_TOKENS, 265 *NO_PAREN_FUNCTIONS, 266 } 267 268 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 269 TokenType.APPLY, 270 TokenType.FULL, 271 TokenType.LEFT, 272 TokenType.NATURAL, 273 TokenType.OFFSET, 274 TokenType.RIGHT, 275 TokenType.WINDOW, 276 } 277 278 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 279 280 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} 281 282 FUNC_TOKENS = { 283 TokenType.COMMAND, 284 TokenType.CURRENT_DATE, 285 TokenType.CURRENT_DATETIME, 286 TokenType.CURRENT_TIMESTAMP, 287 TokenType.CURRENT_TIME, 288 TokenType.FILTER, 289 TokenType.FIRST, 290 TokenType.FORMAT, 291 TokenType.GLOB, 292 TokenType.IDENTIFIER, 293 TokenType.INDEX, 294 TokenType.ISNULL, 295 TokenType.ILIKE, 296 TokenType.LIKE, 297 TokenType.MERGE, 298 TokenType.OFFSET, 299 TokenType.PRIMARY_KEY, 300 TokenType.REPLACE, 301 TokenType.ROW, 302 TokenType.UNNEST, 303 TokenType.VAR, 304 TokenType.LEFT, 305 TokenType.RIGHT, 306 TokenType.DATE, 307 TokenType.DATETIME, 308 TokenType.TABLE, 309 TokenType.TIMESTAMP, 310 TokenType.TIMESTAMPTZ, 311 TokenType.WINDOW, 312 *TYPE_TOKENS, 313 *SUBQUERY_PREDICATES, 314 } 315 316 CONJUNCTION = { 317 TokenType.AND: exp.And, 318 TokenType.OR: exp.Or, 319 } 320 321 EQUALITY = { 322 TokenType.EQ: exp.EQ, 323 TokenType.NEQ: exp.NEQ, 324 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 325 } 326 327 COMPARISON = { 328 TokenType.GT: exp.GT, 329 TokenType.GTE: exp.GTE, 330 TokenType.LT: exp.LT, 331 TokenType.LTE: exp.LTE, 332 } 333 334 BITWISE = { 335 TokenType.AMP: exp.BitwiseAnd, 336 TokenType.CARET: exp.BitwiseXor, 337 TokenType.PIPE: exp.BitwiseOr, 338 TokenType.DPIPE: exp.DPipe, 339 } 340 341 TERM = { 342 TokenType.DASH: exp.Sub, 343 TokenType.PLUS: exp.Add, 344 TokenType.MOD: exp.Mod, 345 TokenType.COLLATE: exp.Collate, 346 } 347 348 FACTOR = { 349 TokenType.DIV: exp.IntDiv, 350 TokenType.LR_ARROW: exp.Distance, 351 TokenType.SLASH: exp.Div, 352 TokenType.STAR: exp.Mul, 353 } 354 355 TIMESTAMPS = { 356 TokenType.TIME, 357 TokenType.TIMESTAMP, 358 TokenType.TIMESTAMPTZ, 359 TokenType.TIMESTAMPLTZ, 360 } 361 362 SET_OPERATIONS = { 363 TokenType.UNION, 364 TokenType.INTERSECT, 365 TokenType.EXCEPT, 366 } 367 368 JOIN_SIDES = { 369 TokenType.LEFT, 370 TokenType.RIGHT, 371 TokenType.FULL, 372 } 373 374 JOIN_KINDS = { 375 TokenType.INNER, 376 TokenType.OUTER, 377 TokenType.CROSS, 378 TokenType.SEMI, 379 TokenType.ANTI, 380 } 381 382 LAMBDAS = { 383 TokenType.ARROW: lambda self, expressions: self.expression( 384 exp.Lambda, 385 this=self._replace_lambda( 386 self._parse_conjunction(), 387 {node.name for node in expressions}, 388 ), 389 expressions=expressions, 390 ), 391 TokenType.FARROW: lambda self, expressions: self.expression( 392 exp.Kwarg, 393 this=exp.Var(this=expressions[0].name), 394 expression=self._parse_conjunction(), 395 ), 396 } 397 398 COLUMN_OPERATORS = { 399 TokenType.DOT: None, 400 TokenType.DCOLON: lambda self, this, to: self.expression( 401 exp.Cast, 402 this=this, 403 to=to, 404 ), 405 TokenType.ARROW: lambda self, this, path: self.expression( 406 exp.JSONExtract, 407 this=this, 408 expression=path, 409 ), 410 TokenType.DARROW: lambda self, this, path: self.expression( 411 exp.JSONExtractScalar, 412 this=this, 413 expression=path, 414 ), 415 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 416 exp.JSONBExtract, 417 this=this, 418 expression=path, 419 ), 420 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 421 exp.JSONBExtractScalar, 422 this=this, 423 expression=path, 424 ), 425 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 426 exp.JSONBContains, 427 this=this, 428 expression=key, 429 ), 430 } 431 432 EXPRESSION_PARSERS = { 433 exp.Column: lambda self: self._parse_column(), 434 exp.DataType: lambda self: self._parse_types(), 435 exp.From: lambda self: self._parse_from(), 436 exp.Group: lambda self: self._parse_group(), 437 exp.Identifier: lambda self: self._parse_id_var(), 438 exp.Lateral: lambda self: self._parse_lateral(), 439 exp.Join: lambda self: self._parse_join(), 440 exp.Order: lambda self: self._parse_order(), 441 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 442 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 443 exp.Lambda: lambda self: self._parse_lambda(), 444 exp.Limit: lambda self: self._parse_limit(), 445 exp.Offset: lambda self: self._parse_offset(), 446 exp.TableAlias: lambda self: self._parse_table_alias(), 447 exp.Table: lambda self: self._parse_table(), 448 exp.Condition: lambda self: self._parse_conjunction(), 449 exp.Expression: lambda self: self._parse_statement(), 450 exp.Properties: lambda self: self._parse_properties(), 451 exp.Where: lambda self: self._parse_where(), 452 exp.Ordered: lambda self: self._parse_ordered(), 453 exp.Having: lambda self: self._parse_having(), 454 exp.With: lambda self: self._parse_with(), 455 exp.Window: lambda self: self._parse_named_window(), 456 exp.Qualify: lambda self: self._parse_qualify(), 457 exp.Returning: lambda self: self._parse_returning(), 458 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 459 } 460 461 STATEMENT_PARSERS = { 462 TokenType.ALTER: lambda self: self._parse_alter(), 463 TokenType.BEGIN: lambda self: self._parse_transaction(), 464 TokenType.CACHE: lambda self: self._parse_cache(), 465 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 466 TokenType.COMMENT: lambda self: self._parse_comment(), 467 TokenType.CREATE: lambda self: self._parse_create(), 468 TokenType.DELETE: lambda self: self._parse_delete(), 469 TokenType.DESC: lambda self: self._parse_describe(), 470 TokenType.DESCRIBE: lambda self: self._parse_describe(), 471 TokenType.DROP: lambda self: self._parse_drop(), 472 TokenType.END: lambda self: self._parse_commit_or_rollback(), 473 TokenType.INSERT: lambda self: self._parse_insert(), 474 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), 475 TokenType.MERGE: lambda self: self._parse_merge(), 476 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 477 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 478 TokenType.SET: lambda self: self._parse_set(), 479 TokenType.UNCACHE: lambda self: self._parse_uncache(), 480 TokenType.UPDATE: lambda self: self._parse_update(), 481 TokenType.USE: lambda self: self.expression( 482 exp.Use, 483 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 484 and exp.Var(this=self._prev.text), 485 this=self._parse_table(schema=False), 486 ), 487 } 488 489 UNARY_PARSERS = { 490 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 491 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 492 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 493 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 494 } 495 496 PRIMARY_PARSERS = { 497 TokenType.STRING: lambda self, token: self.expression( 498 exp.Literal, this=token.text, is_string=True 499 ), 500 TokenType.NUMBER: lambda self, token: self.expression( 501 exp.Literal, this=token.text, is_string=False 502 ), 503 TokenType.STAR: lambda self, _: self.expression( 504 exp.Star, 505 **{"except": self._parse_except(), "replace": self._parse_replace()}, 506 ), 507 TokenType.NULL: lambda self, _: self.expression(exp.Null), 508 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 509 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 510 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 511 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 512 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 513 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 514 TokenType.NATIONAL: lambda self, token: self._parse_national(token), 515 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 516 } 517 518 PLACEHOLDER_PARSERS = { 519 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 520 TokenType.PARAMETER: lambda self: self._parse_parameter(), 521 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 522 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 523 else None, 524 } 525 526 RANGE_PARSERS = { 527 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 528 TokenType.GLOB: binary_range_parser(exp.Glob), 529 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 530 TokenType.IN: lambda self, this: self._parse_in(this), 531 TokenType.IS: lambda self, this: self._parse_is(this), 532 TokenType.LIKE: binary_range_parser(exp.Like), 533 TokenType.ILIKE: binary_range_parser(exp.ILike), 534 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 535 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 536 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 537 } 538 539 PROPERTY_PARSERS = { 540 "AFTER": lambda self: self._parse_afterjournal( 541 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 542 ), 543 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 544 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 545 "BEFORE": lambda self: self._parse_journal( 546 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 547 ), 548 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 549 "CHARACTER SET": lambda self: self._parse_character_set(), 550 "CHECKSUM": lambda self: self._parse_checksum(), 551 "CLUSTER BY": lambda self: self.expression( 552 exp.Cluster, expressions=self._parse_csv(self._parse_ordered) 553 ), 554 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 555 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 556 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( 557 default=self._prev.text.upper() == "DEFAULT" 558 ), 559 "DEFINER": lambda self: self._parse_definer(), 560 "DETERMINISTIC": lambda self: self.expression( 561 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 562 ), 563 "DISTKEY": lambda self: self._parse_distkey(), 564 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 565 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 566 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 567 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), 568 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 569 "FREESPACE": lambda self: self._parse_freespace(), 570 "GLOBAL": lambda self: self._parse_temporary(global_=True), 571 "IMMUTABLE": lambda self: self.expression( 572 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 573 ), 574 "JOURNAL": lambda self: self._parse_journal( 575 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 576 ), 577 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 578 "LIKE": lambda self: self._parse_create_like(), 579 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), 580 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 581 "LOCK": lambda self: self._parse_locking(), 582 "LOCKING": lambda self: self._parse_locking(), 583 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), 584 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 585 "MAX": lambda self: self._parse_datablocksize(), 586 "MAXIMUM": lambda self: self._parse_datablocksize(), 587 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( 588 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" 589 ), 590 "MIN": lambda self: self._parse_datablocksize(), 591 "MINIMUM": lambda self: self._parse_datablocksize(), 592 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 593 "NO": lambda self: self._parse_noprimaryindex(), 594 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), 595 "ON": lambda self: self._parse_oncommit(), 596 "PARTITION BY": lambda self: self._parse_partitioned_by(), 597 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 598 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 599 "RETURNS": lambda self: self._parse_returns(), 600 "ROW": lambda self: self._parse_row(), 601 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 602 "SORTKEY": lambda self: self._parse_sortkey(), 603 "STABLE": lambda self: self.expression( 604 exp.VolatilityProperty, this=exp.Literal.string("STABLE") 605 ), 606 "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 607 "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 608 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 609 "TEMPORARY": lambda self: self._parse_temporary(global_=False), 610 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 611 "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 612 "VOLATILE": lambda self: self.expression( 613 exp.VolatilityProperty, this=exp.Literal.string("VOLATILE") 614 ), 615 "WITH": lambda self: self._parse_with_property(), 616 } 617 618 CONSTRAINT_PARSERS = { 619 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 620 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 621 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 622 "CHARACTER SET": lambda self: self.expression( 623 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 624 ), 625 "CHECK": lambda self: self.expression( 626 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 627 ), 628 "COLLATE": lambda self: self.expression( 629 exp.CollateColumnConstraint, this=self._parse_var() 630 ), 631 "COMMENT": lambda self: self.expression( 632 exp.CommentColumnConstraint, this=self._parse_string() 633 ), 634 "COMPRESS": lambda self: self._parse_compress(), 635 "DEFAULT": lambda self: self.expression( 636 exp.DefaultColumnConstraint, this=self._parse_bitwise() 637 ), 638 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 639 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 640 "FORMAT": lambda self: self.expression( 641 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 642 ), 643 "GENERATED": lambda self: self._parse_generated_as_identity(), 644 "IDENTITY": lambda self: self._parse_auto_increment(), 645 "INLINE": lambda self: self._parse_inline(), 646 "LIKE": lambda self: self._parse_create_like(), 647 "NOT": lambda self: self._parse_not_constraint(), 648 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 649 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 650 "PRIMARY KEY": lambda self: self._parse_primary_key(), 651 "TITLE": lambda self: self.expression( 652 exp.TitleColumnConstraint, this=self._parse_var_or_string() 653 ), 654 "UNIQUE": lambda self: self._parse_unique(), 655 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 656 } 657 658 ALTER_PARSERS = { 659 "ADD": lambda self: self._parse_alter_table_add(), 660 "ALTER": lambda self: self._parse_alter_table_alter(), 661 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 662 "DROP": lambda self: self._parse_alter_table_drop(), 663 "RENAME": lambda self: self._parse_alter_table_rename(), 664 } 665 666 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 667 668 NO_PAREN_FUNCTION_PARSERS = { 669 TokenType.CASE: lambda self: self._parse_case(), 670 TokenType.IF: lambda self: self._parse_if(), 671 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 672 } 673 674 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 675 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 676 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 677 "DECODE": lambda self: self._parse_decode(), 678 "EXTRACT": lambda self: self._parse_extract(), 679 "JSON_OBJECT": lambda self: self._parse_json_object(), 680 "LOG": lambda self: self._parse_logarithm(), 681 "MATCH": lambda self: self._parse_match_against(), 682 "POSITION": lambda self: self._parse_position(), 683 "STRING_AGG": lambda self: self._parse_string_agg(), 684 "SUBSTRING": lambda self: self._parse_substring(), 685 "TRIM": lambda self: self._parse_trim(), 686 "TRY_CAST": lambda self: self._parse_cast(False), 687 "TRY_CONVERT": lambda self: self._parse_convert(False), 688 } 689 690 QUERY_MODIFIER_PARSERS = { 691 "match": lambda self: self._parse_match_recognize(), 692 "where": lambda self: self._parse_where(), 693 "group": lambda self: self._parse_group(), 694 "having": lambda self: self._parse_having(), 695 "qualify": lambda self: self._parse_qualify(), 696 "windows": lambda self: self._parse_window_clause(), 697 "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute), 698 "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 699 "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 700 "order": lambda self: self._parse_order(), 701 "limit": lambda self: self._parse_limit(), 702 "offset": lambda self: self._parse_offset(), 703 "lock": lambda self: self._parse_lock(), 704 "sample": lambda self: self._parse_table_sample(as_modifier=True), 705 } 706 707 SET_PARSERS = { 708 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 709 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 710 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 711 "TRANSACTION": lambda self: self._parse_set_transaction(), 712 } 713 714 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 715 716 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 717 718 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 719 720 TRANSACTION_CHARACTERISTICS = { 721 "ISOLATION LEVEL REPEATABLE READ", 722 "ISOLATION LEVEL READ COMMITTED", 723 "ISOLATION LEVEL READ UNCOMMITTED", 724 "ISOLATION LEVEL SERIALIZABLE", 725 "READ WRITE", 726 "READ ONLY", 727 } 728 729 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 730 731 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 732 733 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 734 735 STRICT_CAST = True 736 737 CONVERT_TYPE_FIRST = False 738 739 LOG_BASE_FIRST = True 740 LOG_DEFAULTS_TO_LN = False 741 742 __slots__ = ( 743 "error_level", 744 "error_message_context", 745 "sql", 746 "errors", 747 "index_offset", 748 "unnest_column_only", 749 "alias_post_tablesample", 750 "max_errors", 751 "null_ordering", 752 "_tokens", 753 "_index", 754 "_curr", 755 "_next", 756 "_prev", 757 "_prev_comments", 758 "_show_trie", 759 "_set_trie", 760 ) 761 762 def __init__( 763 self, 764 error_level: t.Optional[ErrorLevel] = None, 765 error_message_context: int = 100, 766 index_offset: int = 0, 767 unnest_column_only: bool = False, 768 alias_post_tablesample: bool = False, 769 max_errors: int = 3, 770 null_ordering: t.Optional[str] = None, 771 ): 772 self.error_level = error_level or ErrorLevel.IMMEDIATE 773 self.error_message_context = error_message_context 774 self.index_offset = index_offset 775 self.unnest_column_only = unnest_column_only 776 self.alias_post_tablesample = alias_post_tablesample 777 self.max_errors = max_errors 778 self.null_ordering = null_ordering 779 self.reset() 780 781 def reset(self): 782 self.sql = "" 783 self.errors = [] 784 self._tokens = [] 785 self._index = 0 786 self._curr = None 787 self._next = None 788 self._prev = None 789 self._prev_comments = None 790 791 def parse( 792 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 793 ) -> t.List[t.Optional[exp.Expression]]: 794 """ 795 Parses a list of tokens and returns a list of syntax trees, one tree 796 per parsed SQL statement. 797 798 Args: 799 raw_tokens: the list of tokens. 800 sql: the original SQL string, used to produce helpful debug messages. 801 802 Returns: 803 The list of syntax trees. 804 """ 805 return self._parse( 806 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 807 ) 808 809 def parse_into( 810 self, 811 expression_types: exp.IntoType, 812 raw_tokens: t.List[Token], 813 sql: t.Optional[str] = None, 814 ) -> t.List[t.Optional[exp.Expression]]: 815 """ 816 Parses a list of tokens into a given Expression type. If a collection of Expression 817 types is given instead, this method will try to parse the token list into each one 818 of them, stopping at the first for which the parsing succeeds. 819 820 Args: 821 expression_types: the expression type(s) to try and parse the token list into. 822 raw_tokens: the list of tokens. 823 sql: the original SQL string, used to produce helpful debug messages. 824 825 Returns: 826 The target Expression. 827 """ 828 errors = [] 829 for expression_type in ensure_collection(expression_types): 830 parser = self.EXPRESSION_PARSERS.get(expression_type) 831 if not parser: 832 raise TypeError(f"No parser registered for {expression_type}") 833 try: 834 return self._parse(parser, raw_tokens, sql) 835 except ParseError as e: 836 e.errors[0]["into_expression"] = expression_type 837 errors.append(e) 838 raise ParseError( 839 f"Failed to parse into {expression_types}", 840 errors=merge_errors(errors), 841 ) from errors[-1] 842 843 def _parse( 844 self, 845 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 846 raw_tokens: t.List[Token], 847 sql: t.Optional[str] = None, 848 ) -> t.List[t.Optional[exp.Expression]]: 849 self.reset() 850 self.sql = sql or "" 851 total = len(raw_tokens) 852 chunks: t.List[t.List[Token]] = [[]] 853 854 for i, token in enumerate(raw_tokens): 855 if token.token_type == TokenType.SEMICOLON: 856 if i < total - 1: 857 chunks.append([]) 858 else: 859 chunks[-1].append(token) 860 861 expressions = [] 862 863 for tokens in chunks: 864 self._index = -1 865 self._tokens = tokens 866 self._advance() 867 868 expressions.append(parse_method(self)) 869 870 if self._index < len(self._tokens): 871 self.raise_error("Invalid expression / Unexpected token") 872 873 self.check_errors() 874 875 return expressions 876 877 def check_errors(self) -> None: 878 """ 879 Logs or raises any found errors, depending on the chosen error level setting. 880 """ 881 if self.error_level == ErrorLevel.WARN: 882 for error in self.errors: 883 logger.error(str(error)) 884 elif self.error_level == ErrorLevel.RAISE and self.errors: 885 raise ParseError( 886 concat_messages(self.errors, self.max_errors), 887 errors=merge_errors(self.errors), 888 ) 889 890 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 891 """ 892 Appends an error in the list of recorded errors or raises it, depending on the chosen 893 error level setting. 894 """ 895 token = token or self._curr or self._prev or Token.string("") 896 start = self._find_token(token) 897 end = start + len(token.text) 898 start_context = self.sql[max(start - self.error_message_context, 0) : start] 899 highlight = self.sql[start:end] 900 end_context = self.sql[end : end + self.error_message_context] 901 902 error = ParseError.new( 903 f"{message}. Line {token.line}, Col: {token.col}.\n" 904 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 905 description=message, 906 line=token.line, 907 col=token.col, 908 start_context=start_context, 909 highlight=highlight, 910 end_context=end_context, 911 ) 912 913 if self.error_level == ErrorLevel.IMMEDIATE: 914 raise error 915 916 self.errors.append(error) 917 918 def expression( 919 self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs 920 ) -> exp.Expression: 921 """ 922 Creates a new, validated Expression. 923 924 Args: 925 exp_class: the expression class to instantiate. 926 comments: an optional list of comments to attach to the expression. 927 kwargs: the arguments to set for the expression along with their respective values. 928 929 Returns: 930 The target expression. 931 """ 932 instance = exp_class(**kwargs) 933 if self._prev_comments: 934 instance.comments = self._prev_comments 935 self._prev_comments = None 936 if comments: 937 instance.comments = comments 938 self.validate_expression(instance) 939 return instance 940 941 def validate_expression( 942 self, expression: exp.Expression, args: t.Optional[t.List] = None 943 ) -> None: 944 """ 945 Validates an already instantiated expression, making sure that all its mandatory arguments 946 are set. 947 948 Args: 949 expression: the expression to validate. 950 args: an optional list of items that was used to instantiate the expression, if it's a Func. 951 """ 952 if self.error_level == ErrorLevel.IGNORE: 953 return 954 955 for error_message in expression.error_messages(args): 956 self.raise_error(error_message) 957 958 def _find_sql(self, start: Token, end: Token) -> str: 959 return self.sql[self._find_token(start) : self._find_token(end) + len(end.text)] 960 961 def _find_token(self, token: Token) -> int: 962 line = 1 963 col = 1 964 index = 0 965 966 while line < token.line or col < token.col: 967 if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK: 968 line += 1 969 col = 1 970 else: 971 col += 1 972 index += 1 973 974 return index 975 976 def _advance(self, times: int = 1) -> None: 977 self._index += times 978 self._curr = seq_get(self._tokens, self._index) 979 self._next = seq_get(self._tokens, self._index + 1) 980 if self._index > 0: 981 self._prev = self._tokens[self._index - 1] 982 self._prev_comments = self._prev.comments 983 else: 984 self._prev = None 985 self._prev_comments = None 986 987 def _retreat(self, index: int) -> None: 988 if index != self._index: 989 self._advance(index - self._index) 990 991 def _parse_command(self) -> exp.Expression: 992 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 993 994 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 995 start = self._prev 996 exists = self._parse_exists() if allow_exists else None 997 998 self._match(TokenType.ON) 999 1000 kind = self._match_set(self.CREATABLES) and self._prev 1001 1002 if not kind: 1003 return self._parse_as_command(start) 1004 1005 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1006 this = self._parse_user_defined_function(kind=kind.token_type) 1007 elif kind.token_type == TokenType.TABLE: 1008 this = self._parse_table() 1009 elif kind.token_type == TokenType.COLUMN: 1010 this = self._parse_column() 1011 else: 1012 this = self._parse_id_var() 1013 1014 self._match(TokenType.IS) 1015 1016 return self.expression( 1017 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1018 ) 1019 1020 def _parse_statement(self) -> t.Optional[exp.Expression]: 1021 if self._curr is None: 1022 return None 1023 1024 if self._match_set(self.STATEMENT_PARSERS): 1025 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1026 1027 if self._match_set(Tokenizer.COMMANDS): 1028 return self._parse_command() 1029 1030 expression = self._parse_expression() 1031 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1032 1033 self._parse_query_modifiers(expression) 1034 return expression 1035 1036 def _parse_drop(self, default_kind: t.Optional[str] = None) -> t.Optional[exp.Expression]: 1037 start = self._prev 1038 temporary = self._match(TokenType.TEMPORARY) 1039 materialized = self._match(TokenType.MATERIALIZED) 1040 kind = self._match_set(self.CREATABLES) and self._prev.text 1041 if not kind: 1042 if default_kind: 1043 kind = default_kind 1044 else: 1045 return self._parse_as_command(start) 1046 1047 return self.expression( 1048 exp.Drop, 1049 exists=self._parse_exists(), 1050 this=self._parse_table(schema=True), 1051 kind=kind, 1052 temporary=temporary, 1053 materialized=materialized, 1054 cascade=self._match(TokenType.CASCADE), 1055 constraints=self._match_text_seq("CONSTRAINTS"), 1056 ) 1057 1058 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1059 return ( 1060 self._match(TokenType.IF) 1061 and (not not_ or self._match(TokenType.NOT)) 1062 and self._match(TokenType.EXISTS) 1063 ) 1064 1065 def _parse_create(self) -> t.Optional[exp.Expression]: 1066 start = self._prev 1067 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 1068 TokenType.OR, TokenType.REPLACE 1069 ) 1070 unique = self._match(TokenType.UNIQUE) 1071 volatile = self._match(TokenType.VOLATILE) 1072 1073 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1074 self._match(TokenType.TABLE) 1075 1076 properties = None 1077 create_token = self._match_set(self.CREATABLES) and self._prev 1078 1079 if not create_token: 1080 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1081 create_token = self._match_set(self.CREATABLES) and self._prev 1082 1083 if not properties or not create_token: 1084 return self._parse_as_command(start) 1085 1086 exists = self._parse_exists(not_=True) 1087 this = None 1088 expression = None 1089 indexes = None 1090 no_schema_binding = None 1091 begin = None 1092 1093 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1094 this = self._parse_user_defined_function(kind=create_token.token_type) 1095 temp_properties = self._parse_properties() 1096 if properties and temp_properties: 1097 properties.expressions.extend(temp_properties.expressions) 1098 elif temp_properties: 1099 properties = temp_properties 1100 1101 self._match(TokenType.ALIAS) 1102 begin = self._match(TokenType.BEGIN) 1103 return_ = self._match_text_seq("RETURN") 1104 expression = self._parse_statement() 1105 1106 if return_: 1107 expression = self.expression(exp.Return, this=expression) 1108 elif create_token.token_type == TokenType.INDEX: 1109 this = self._parse_index() 1110 elif create_token.token_type in self.DB_CREATABLES: 1111 table_parts = self._parse_table_parts(schema=True) 1112 1113 # exp.Properties.Location.POST_NAME 1114 if self._match(TokenType.COMMA): 1115 temp_properties = self._parse_properties(before=True) 1116 if properties and temp_properties: 1117 properties.expressions.extend(temp_properties.expressions) 1118 elif temp_properties: 1119 properties = temp_properties 1120 1121 this = self._parse_schema(this=table_parts) 1122 1123 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1124 temp_properties = self._parse_properties() 1125 if properties and temp_properties: 1126 properties.expressions.extend(temp_properties.expressions) 1127 elif temp_properties: 1128 properties = temp_properties 1129 1130 self._match(TokenType.ALIAS) 1131 1132 # exp.Properties.Location.POST_ALIAS 1133 if not ( 1134 self._match(TokenType.SELECT, advance=False) 1135 or self._match(TokenType.WITH, advance=False) 1136 or self._match(TokenType.L_PAREN, advance=False) 1137 ): 1138 temp_properties = self._parse_properties() 1139 if properties and temp_properties: 1140 properties.expressions.extend(temp_properties.expressions) 1141 elif temp_properties: 1142 properties = temp_properties 1143 1144 expression = self._parse_ddl_select() 1145 1146 if create_token.token_type == TokenType.TABLE: 1147 # exp.Properties.Location.POST_EXPRESSION 1148 temp_properties = self._parse_properties() 1149 if properties and temp_properties: 1150 properties.expressions.extend(temp_properties.expressions) 1151 elif temp_properties: 1152 properties = temp_properties 1153 1154 indexes = [] 1155 while True: 1156 index = self._parse_create_table_index() 1157 1158 # exp.Properties.Location.POST_INDEX 1159 if self._match(TokenType.PARTITION_BY, advance=False): 1160 temp_properties = self._parse_properties() 1161 if properties and temp_properties: 1162 properties.expressions.extend(temp_properties.expressions) 1163 elif temp_properties: 1164 properties = temp_properties 1165 1166 if not index: 1167 break 1168 else: 1169 indexes.append(index) 1170 elif create_token.token_type == TokenType.VIEW: 1171 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1172 no_schema_binding = True 1173 1174 return self.expression( 1175 exp.Create, 1176 this=this, 1177 kind=create_token.text, 1178 replace=replace, 1179 unique=unique, 1180 volatile=volatile, 1181 expression=expression, 1182 exists=exists, 1183 properties=properties, 1184 indexes=indexes, 1185 no_schema_binding=no_schema_binding, 1186 begin=begin, 1187 ) 1188 1189 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1190 self._match(TokenType.COMMA) 1191 1192 # parsers look to _prev for no/dual/default, so need to consume first 1193 self._match_text_seq("NO") 1194 self._match_text_seq("DUAL") 1195 self._match_text_seq("DEFAULT") 1196 1197 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): 1198 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) 1199 1200 return None 1201 1202 def _parse_property(self) -> t.Optional[exp.Expression]: 1203 if self._match_texts(self.PROPERTY_PARSERS): 1204 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1205 1206 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1207 return self._parse_character_set(default=True) 1208 1209 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): 1210 return self._parse_sortkey(compound=True) 1211 1212 if self._match_text_seq("SQL", "SECURITY"): 1213 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1214 1215 assignment = self._match_pair( 1216 TokenType.VAR, TokenType.EQ, advance=False 1217 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1218 1219 if assignment: 1220 key = self._parse_var_or_string() 1221 self._match(TokenType.EQ) 1222 return self.expression(exp.Property, this=key, value=self._parse_column()) 1223 1224 return None 1225 1226 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1227 self._match(TokenType.EQ) 1228 self._match(TokenType.ALIAS) 1229 return self.expression( 1230 exp_class, 1231 this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1232 ) 1233 1234 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: 1235 properties = [] 1236 1237 while True: 1238 if before: 1239 identified_property = self._parse_property_before() 1240 else: 1241 identified_property = self._parse_property() 1242 1243 if not identified_property: 1244 break 1245 for p in ensure_list(identified_property): 1246 properties.append(p) 1247 1248 if properties: 1249 return self.expression(exp.Properties, expressions=properties) 1250 1251 return None 1252 1253 def _parse_fallback(self, no=False) -> exp.Expression: 1254 self._match_text_seq("FALLBACK") 1255 return self.expression( 1256 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1257 ) 1258 1259 def _parse_with_property( 1260 self, 1261 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1262 self._match(TokenType.WITH) 1263 if self._match(TokenType.L_PAREN, advance=False): 1264 return self._parse_wrapped_csv(self._parse_property) 1265 1266 if self._match_text_seq("JOURNAL"): 1267 return self._parse_withjournaltable() 1268 1269 if self._match_text_seq("DATA"): 1270 return self._parse_withdata(no=False) 1271 elif self._match_text_seq("NO", "DATA"): 1272 return self._parse_withdata(no=True) 1273 1274 if not self._next: 1275 return None 1276 1277 return self._parse_withisolatedloading() 1278 1279 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1280 def _parse_definer(self) -> t.Optional[exp.Expression]: 1281 self._match(TokenType.EQ) 1282 1283 user = self._parse_id_var() 1284 self._match(TokenType.PARAMETER) 1285 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1286 1287 if not user or not host: 1288 return None 1289 1290 return exp.DefinerProperty(this=f"{user}@{host}") 1291 1292 def _parse_withjournaltable(self) -> exp.Expression: 1293 self._match(TokenType.TABLE) 1294 self._match(TokenType.EQ) 1295 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1296 1297 def _parse_log(self, no=False) -> exp.Expression: 1298 self._match_text_seq("LOG") 1299 return self.expression(exp.LogProperty, no=no) 1300 1301 def _parse_journal(self, no=False, dual=False) -> exp.Expression: 1302 before = self._match_text_seq("BEFORE") 1303 self._match_text_seq("JOURNAL") 1304 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) 1305 1306 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: 1307 self._match_text_seq("NOT") 1308 self._match_text_seq("LOCAL") 1309 self._match_text_seq("AFTER", "JOURNAL") 1310 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) 1311 1312 def _parse_checksum(self) -> exp.Expression: 1313 self._match_text_seq("CHECKSUM") 1314 self._match(TokenType.EQ) 1315 1316 on = None 1317 if self._match(TokenType.ON): 1318 on = True 1319 elif self._match_text_seq("OFF"): 1320 on = False 1321 default = self._match(TokenType.DEFAULT) 1322 1323 return self.expression( 1324 exp.ChecksumProperty, 1325 on=on, 1326 default=default, 1327 ) 1328 1329 def _parse_freespace(self) -> exp.Expression: 1330 self._match_text_seq("FREESPACE") 1331 self._match(TokenType.EQ) 1332 return self.expression( 1333 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1334 ) 1335 1336 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: 1337 self._match_text_seq("MERGEBLOCKRATIO") 1338 if self._match(TokenType.EQ): 1339 return self.expression( 1340 exp.MergeBlockRatioProperty, 1341 this=self._parse_number(), 1342 percent=self._match(TokenType.PERCENT), 1343 ) 1344 else: 1345 return self.expression( 1346 exp.MergeBlockRatioProperty, 1347 no=no, 1348 default=default, 1349 ) 1350 1351 def _parse_datablocksize(self, default=None) -> exp.Expression: 1352 if default: 1353 self._match_text_seq("DATABLOCKSIZE") 1354 return self.expression(exp.DataBlocksizeProperty, default=True) 1355 elif self._match_texts(("MIN", "MINIMUM")): 1356 self._match_text_seq("DATABLOCKSIZE") 1357 return self.expression(exp.DataBlocksizeProperty, min=True) 1358 elif self._match_texts(("MAX", "MAXIMUM")): 1359 self._match_text_seq("DATABLOCKSIZE") 1360 return self.expression(exp.DataBlocksizeProperty, min=False) 1361 1362 self._match_text_seq("DATABLOCKSIZE") 1363 self._match(TokenType.EQ) 1364 size = self._parse_number() 1365 units = None 1366 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1367 units = self._prev.text 1368 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) 1369 1370 def _parse_blockcompression(self) -> exp.Expression: 1371 self._match_text_seq("BLOCKCOMPRESSION") 1372 self._match(TokenType.EQ) 1373 always = self._match_text_seq("ALWAYS") 1374 manual = self._match_text_seq("MANUAL") 1375 never = self._match_text_seq("NEVER") 1376 default = self._match_text_seq("DEFAULT") 1377 autotemp = None 1378 if self._match_text_seq("AUTOTEMP"): 1379 autotemp = self._parse_schema() 1380 1381 return self.expression( 1382 exp.BlockCompressionProperty, 1383 always=always, 1384 manual=manual, 1385 never=never, 1386 default=default, 1387 autotemp=autotemp, 1388 ) 1389 1390 def _parse_withisolatedloading(self) -> exp.Expression: 1391 no = self._match_text_seq("NO") 1392 concurrent = self._match_text_seq("CONCURRENT") 1393 self._match_text_seq("ISOLATED", "LOADING") 1394 for_all = self._match_text_seq("FOR", "ALL") 1395 for_insert = self._match_text_seq("FOR", "INSERT") 1396 for_none = self._match_text_seq("FOR", "NONE") 1397 return self.expression( 1398 exp.IsolatedLoadingProperty, 1399 no=no, 1400 concurrent=concurrent, 1401 for_all=for_all, 1402 for_insert=for_insert, 1403 for_none=for_none, 1404 ) 1405 1406 def _parse_locking(self) -> exp.Expression: 1407 if self._match(TokenType.TABLE): 1408 kind = "TABLE" 1409 elif self._match(TokenType.VIEW): 1410 kind = "VIEW" 1411 elif self._match(TokenType.ROW): 1412 kind = "ROW" 1413 elif self._match_text_seq("DATABASE"): 1414 kind = "DATABASE" 1415 else: 1416 kind = None 1417 1418 if kind in ("DATABASE", "TABLE", "VIEW"): 1419 this = self._parse_table_parts() 1420 else: 1421 this = None 1422 1423 if self._match(TokenType.FOR): 1424 for_or_in = "FOR" 1425 elif self._match(TokenType.IN): 1426 for_or_in = "IN" 1427 else: 1428 for_or_in = None 1429 1430 if self._match_text_seq("ACCESS"): 1431 lock_type = "ACCESS" 1432 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1433 lock_type = "EXCLUSIVE" 1434 elif self._match_text_seq("SHARE"): 1435 lock_type = "SHARE" 1436 elif self._match_text_seq("READ"): 1437 lock_type = "READ" 1438 elif self._match_text_seq("WRITE"): 1439 lock_type = "WRITE" 1440 elif self._match_text_seq("CHECKSUM"): 1441 lock_type = "CHECKSUM" 1442 else: 1443 lock_type = None 1444 1445 override = self._match_text_seq("OVERRIDE") 1446 1447 return self.expression( 1448 exp.LockingProperty, 1449 this=this, 1450 kind=kind, 1451 for_or_in=for_or_in, 1452 lock_type=lock_type, 1453 override=override, 1454 ) 1455 1456 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1457 if self._match(TokenType.PARTITION_BY): 1458 return self._parse_csv(self._parse_conjunction) 1459 return [] 1460 1461 def _parse_partitioned_by(self) -> exp.Expression: 1462 self._match(TokenType.EQ) 1463 return self.expression( 1464 exp.PartitionedByProperty, 1465 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1466 ) 1467 1468 def _parse_withdata(self, no=False) -> exp.Expression: 1469 if self._match_text_seq("AND", "STATISTICS"): 1470 statistics = True 1471 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1472 statistics = False 1473 else: 1474 statistics = None 1475 1476 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1477 1478 def _parse_noprimaryindex(self) -> exp.Expression: 1479 self._match_text_seq("PRIMARY", "INDEX") 1480 return exp.NoPrimaryIndexProperty() 1481 1482 def _parse_oncommit(self) -> exp.Expression: 1483 self._match_text_seq("COMMIT", "PRESERVE", "ROWS") 1484 return exp.OnCommitProperty() 1485 1486 def _parse_distkey(self) -> exp.Expression: 1487 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1488 1489 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1490 table = self._parse_table(schema=True) 1491 options = [] 1492 while self._match_texts(("INCLUDING", "EXCLUDING")): 1493 this = self._prev.text.upper() 1494 id_var = self._parse_id_var() 1495 1496 if not id_var: 1497 return None 1498 1499 options.append( 1500 self.expression( 1501 exp.Property, 1502 this=this, 1503 value=exp.Var(this=id_var.this.upper()), 1504 ) 1505 ) 1506 return self.expression(exp.LikeProperty, this=table, expressions=options) 1507 1508 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1509 return self.expression( 1510 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1511 ) 1512 1513 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1514 self._match(TokenType.EQ) 1515 return self.expression( 1516 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1517 ) 1518 1519 def _parse_returns(self) -> exp.Expression: 1520 value: t.Optional[exp.Expression] 1521 is_table = self._match(TokenType.TABLE) 1522 1523 if is_table: 1524 if self._match(TokenType.LT): 1525 value = self.expression( 1526 exp.Schema, 1527 this="TABLE", 1528 expressions=self._parse_csv(self._parse_struct_kwargs), 1529 ) 1530 if not self._match(TokenType.GT): 1531 self.raise_error("Expecting >") 1532 else: 1533 value = self._parse_schema(exp.Var(this="TABLE")) 1534 else: 1535 value = self._parse_types() 1536 1537 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1538 1539 def _parse_temporary(self, global_=False) -> exp.Expression: 1540 self._match(TokenType.TEMPORARY) # in case calling from "GLOBAL" 1541 return self.expression(exp.TemporaryProperty, global_=global_) 1542 1543 def _parse_describe(self) -> exp.Expression: 1544 kind = self._match_set(self.CREATABLES) and self._prev.text 1545 this = self._parse_table() 1546 1547 return self.expression(exp.Describe, this=this, kind=kind) 1548 1549 def _parse_insert(self) -> exp.Expression: 1550 overwrite = self._match(TokenType.OVERWRITE) 1551 local = self._match(TokenType.LOCAL) 1552 alternative = None 1553 1554 if self._match_text_seq("DIRECTORY"): 1555 this: t.Optional[exp.Expression] = self.expression( 1556 exp.Directory, 1557 this=self._parse_var_or_string(), 1558 local=local, 1559 row_format=self._parse_row_format(match_row=True), 1560 ) 1561 else: 1562 if self._match(TokenType.OR): 1563 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1564 1565 self._match(TokenType.INTO) 1566 self._match(TokenType.TABLE) 1567 this = self._parse_table(schema=True) 1568 1569 return self.expression( 1570 exp.Insert, 1571 this=this, 1572 exists=self._parse_exists(), 1573 partition=self._parse_partition(), 1574 expression=self._parse_ddl_select(), 1575 returning=self._parse_returning(), 1576 overwrite=overwrite, 1577 alternative=alternative, 1578 ) 1579 1580 def _parse_returning(self) -> t.Optional[exp.Expression]: 1581 if not self._match(TokenType.RETURNING): 1582 return None 1583 1584 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1585 1586 def _parse_row(self) -> t.Optional[exp.Expression]: 1587 if not self._match(TokenType.FORMAT): 1588 return None 1589 return self._parse_row_format() 1590 1591 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1592 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1593 return None 1594 1595 if self._match_text_seq("SERDE"): 1596 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1597 1598 self._match_text_seq("DELIMITED") 1599 1600 kwargs = {} 1601 1602 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1603 kwargs["fields"] = self._parse_string() 1604 if self._match_text_seq("ESCAPED", "BY"): 1605 kwargs["escaped"] = self._parse_string() 1606 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1607 kwargs["collection_items"] = self._parse_string() 1608 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1609 kwargs["map_keys"] = self._parse_string() 1610 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1611 kwargs["lines"] = self._parse_string() 1612 if self._match_text_seq("NULL", "DEFINED", "AS"): 1613 kwargs["null"] = self._parse_string() 1614 1615 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1616 1617 def _parse_load_data(self) -> exp.Expression: 1618 local = self._match(TokenType.LOCAL) 1619 self._match_text_seq("INPATH") 1620 inpath = self._parse_string() 1621 overwrite = self._match(TokenType.OVERWRITE) 1622 self._match_pair(TokenType.INTO, TokenType.TABLE) 1623 1624 return self.expression( 1625 exp.LoadData, 1626 this=self._parse_table(schema=True), 1627 local=local, 1628 overwrite=overwrite, 1629 inpath=inpath, 1630 partition=self._parse_partition(), 1631 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1632 serde=self._match_text_seq("SERDE") and self._parse_string(), 1633 ) 1634 1635 def _parse_delete(self) -> exp.Expression: 1636 self._match(TokenType.FROM) 1637 1638 return self.expression( 1639 exp.Delete, 1640 this=self._parse_table(schema=True), 1641 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1642 where=self._parse_where(), 1643 returning=self._parse_returning(), 1644 ) 1645 1646 def _parse_update(self) -> exp.Expression: 1647 return self.expression( 1648 exp.Update, 1649 **{ # type: ignore 1650 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1651 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1652 "from": self._parse_from(), 1653 "where": self._parse_where(), 1654 "returning": self._parse_returning(), 1655 }, 1656 ) 1657 1658 def _parse_uncache(self) -> exp.Expression: 1659 if not self._match(TokenType.TABLE): 1660 self.raise_error("Expecting TABLE after UNCACHE") 1661 1662 return self.expression( 1663 exp.Uncache, 1664 exists=self._parse_exists(), 1665 this=self._parse_table(schema=True), 1666 ) 1667 1668 def _parse_cache(self) -> exp.Expression: 1669 lazy = self._match(TokenType.LAZY) 1670 self._match(TokenType.TABLE) 1671 table = self._parse_table(schema=True) 1672 options = [] 1673 1674 if self._match(TokenType.OPTIONS): 1675 self._match_l_paren() 1676 k = self._parse_string() 1677 self._match(TokenType.EQ) 1678 v = self._parse_string() 1679 options = [k, v] 1680 self._match_r_paren() 1681 1682 self._match(TokenType.ALIAS) 1683 return self.expression( 1684 exp.Cache, 1685 this=table, 1686 lazy=lazy, 1687 options=options, 1688 expression=self._parse_select(nested=True), 1689 ) 1690 1691 def _parse_partition(self) -> t.Optional[exp.Expression]: 1692 if not self._match(TokenType.PARTITION): 1693 return None 1694 1695 return self.expression( 1696 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1697 ) 1698 1699 def _parse_value(self) -> exp.Expression: 1700 if self._match(TokenType.L_PAREN): 1701 expressions = self._parse_csv(self._parse_conjunction) 1702 self._match_r_paren() 1703 return self.expression(exp.Tuple, expressions=expressions) 1704 1705 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1706 # Source: https://prestodb.io/docs/current/sql/values.html 1707 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1708 1709 def _parse_select( 1710 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1711 ) -> t.Optional[exp.Expression]: 1712 cte = self._parse_with() 1713 if cte: 1714 this = self._parse_statement() 1715 1716 if not this: 1717 self.raise_error("Failed to parse any statement following CTE") 1718 return cte 1719 1720 if "with" in this.arg_types: 1721 this.set("with", cte) 1722 else: 1723 self.raise_error(f"{this.key} does not support CTE") 1724 this = cte 1725 elif self._match(TokenType.SELECT): 1726 comments = self._prev_comments 1727 1728 kind = ( 1729 self._match(TokenType.ALIAS) 1730 and self._match_texts(("STRUCT", "VALUE")) 1731 and self._prev.text 1732 ) 1733 hint = self._parse_hint() 1734 all_ = self._match(TokenType.ALL) 1735 distinct = self._match(TokenType.DISTINCT) 1736 1737 if distinct: 1738 distinct = self.expression( 1739 exp.Distinct, 1740 on=self._parse_value() if self._match(TokenType.ON) else None, 1741 ) 1742 1743 if all_ and distinct: 1744 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1745 1746 limit = self._parse_limit(top=True) 1747 expressions = self._parse_csv(self._parse_expression) 1748 1749 this = self.expression( 1750 exp.Select, 1751 kind=kind, 1752 hint=hint, 1753 distinct=distinct, 1754 expressions=expressions, 1755 limit=limit, 1756 ) 1757 this.comments = comments 1758 1759 into = self._parse_into() 1760 if into: 1761 this.set("into", into) 1762 1763 from_ = self._parse_from() 1764 if from_: 1765 this.set("from", from_) 1766 1767 self._parse_query_modifiers(this) 1768 elif (table or nested) and self._match(TokenType.L_PAREN): 1769 this = self._parse_table() if table else self._parse_select(nested=True) 1770 self._parse_query_modifiers(this) 1771 this = self._parse_set_operations(this) 1772 self._match_r_paren() 1773 1774 # early return so that subquery unions aren't parsed again 1775 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1776 # Union ALL should be a property of the top select node, not the subquery 1777 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1778 elif self._match(TokenType.VALUES): 1779 this = self.expression( 1780 exp.Values, 1781 expressions=self._parse_csv(self._parse_value), 1782 alias=self._parse_table_alias(), 1783 ) 1784 else: 1785 this = None 1786 1787 return self._parse_set_operations(this) 1788 1789 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1790 if not skip_with_token and not self._match(TokenType.WITH): 1791 return None 1792 1793 recursive = self._match(TokenType.RECURSIVE) 1794 1795 expressions = [] 1796 while True: 1797 expressions.append(self._parse_cte()) 1798 1799 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1800 break 1801 else: 1802 self._match(TokenType.WITH) 1803 1804 return self.expression(exp.With, expressions=expressions, recursive=recursive) 1805 1806 def _parse_cte(self) -> exp.Expression: 1807 alias = self._parse_table_alias() 1808 if not alias or not alias.this: 1809 self.raise_error("Expected CTE to have alias") 1810 1811 self._match(TokenType.ALIAS) 1812 1813 return self.expression( 1814 exp.CTE, 1815 this=self._parse_wrapped(self._parse_statement), 1816 alias=alias, 1817 ) 1818 1819 def _parse_table_alias( 1820 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1821 ) -> t.Optional[exp.Expression]: 1822 any_token = self._match(TokenType.ALIAS) 1823 alias = ( 1824 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1825 or self._parse_string_as_identifier() 1826 ) 1827 1828 index = self._index 1829 if self._match(TokenType.L_PAREN): 1830 columns = self._parse_csv(self._parse_function_parameter) 1831 self._match_r_paren() if columns else self._retreat(index) 1832 else: 1833 columns = None 1834 1835 if not alias and not columns: 1836 return None 1837 1838 return self.expression(exp.TableAlias, this=alias, columns=columns) 1839 1840 def _parse_subquery( 1841 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1842 ) -> exp.Expression: 1843 return self.expression( 1844 exp.Subquery, 1845 this=this, 1846 pivots=self._parse_pivots(), 1847 alias=self._parse_table_alias() if parse_alias else None, 1848 ) 1849 1850 def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None: 1851 if not isinstance(this, self.MODIFIABLES): 1852 return 1853 1854 table = isinstance(this, exp.Table) 1855 1856 while True: 1857 lateral = self._parse_lateral() 1858 join = self._parse_join() 1859 comma = None if table else self._match(TokenType.COMMA) 1860 if lateral: 1861 this.append("laterals", lateral) 1862 if join: 1863 this.append("joins", join) 1864 if comma: 1865 this.args["from"].append("expressions", self._parse_table()) 1866 if not (lateral or join or comma): 1867 break 1868 1869 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1870 expression = parser(self) 1871 1872 if expression: 1873 this.set(key, expression) 1874 1875 def _parse_hint(self) -> t.Optional[exp.Expression]: 1876 if self._match(TokenType.HINT): 1877 hints = self._parse_csv(self._parse_function) 1878 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 1879 self.raise_error("Expected */ after HINT") 1880 return self.expression(exp.Hint, expressions=hints) 1881 1882 return None 1883 1884 def _parse_into(self) -> t.Optional[exp.Expression]: 1885 if not self._match(TokenType.INTO): 1886 return None 1887 1888 temp = self._match(TokenType.TEMPORARY) 1889 unlogged = self._match(TokenType.UNLOGGED) 1890 self._match(TokenType.TABLE) 1891 1892 return self.expression( 1893 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 1894 ) 1895 1896 def _parse_from(self) -> t.Optional[exp.Expression]: 1897 if not self._match(TokenType.FROM): 1898 return None 1899 1900 return self.expression( 1901 exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table) 1902 ) 1903 1904 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 1905 if not self._match(TokenType.MATCH_RECOGNIZE): 1906 return None 1907 self._match_l_paren() 1908 1909 partition = self._parse_partition_by() 1910 order = self._parse_order() 1911 measures = ( 1912 self._parse_alias(self._parse_conjunction()) 1913 if self._match_text_seq("MEASURES") 1914 else None 1915 ) 1916 1917 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 1918 rows = exp.Var(this="ONE ROW PER MATCH") 1919 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 1920 text = "ALL ROWS PER MATCH" 1921 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 1922 text += f" SHOW EMPTY MATCHES" 1923 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 1924 text += f" OMIT EMPTY MATCHES" 1925 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 1926 text += f" WITH UNMATCHED ROWS" 1927 rows = exp.Var(this=text) 1928 else: 1929 rows = None 1930 1931 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 1932 text = "AFTER MATCH SKIP" 1933 if self._match_text_seq("PAST", "LAST", "ROW"): 1934 text += f" PAST LAST ROW" 1935 elif self._match_text_seq("TO", "NEXT", "ROW"): 1936 text += f" TO NEXT ROW" 1937 elif self._match_text_seq("TO", "FIRST"): 1938 text += f" TO FIRST {self._advance_any().text}" # type: ignore 1939 elif self._match_text_seq("TO", "LAST"): 1940 text += f" TO LAST {self._advance_any().text}" # type: ignore 1941 after = exp.Var(this=text) 1942 else: 1943 after = None 1944 1945 if self._match_text_seq("PATTERN"): 1946 self._match_l_paren() 1947 1948 if not self._curr: 1949 self.raise_error("Expecting )", self._curr) 1950 1951 paren = 1 1952 start = self._curr 1953 1954 while self._curr and paren > 0: 1955 if self._curr.token_type == TokenType.L_PAREN: 1956 paren += 1 1957 if self._curr.token_type == TokenType.R_PAREN: 1958 paren -= 1 1959 end = self._prev 1960 self._advance() 1961 if paren > 0: 1962 self.raise_error("Expecting )", self._curr) 1963 pattern = exp.Var(this=self._find_sql(start, end)) 1964 else: 1965 pattern = None 1966 1967 define = ( 1968 self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None 1969 ) 1970 self._match_r_paren() 1971 1972 return self.expression( 1973 exp.MatchRecognize, 1974 partition_by=partition, 1975 order=order, 1976 measures=measures, 1977 rows=rows, 1978 after=after, 1979 pattern=pattern, 1980 define=define, 1981 ) 1982 1983 def _parse_lateral(self) -> t.Optional[exp.Expression]: 1984 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 1985 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 1986 1987 if outer_apply or cross_apply: 1988 this = self._parse_select(table=True) 1989 view = None 1990 outer = not cross_apply 1991 elif self._match(TokenType.LATERAL): 1992 this = self._parse_select(table=True) 1993 view = self._match(TokenType.VIEW) 1994 outer = self._match(TokenType.OUTER) 1995 else: 1996 return None 1997 1998 if not this: 1999 this = self._parse_function() or self._parse_id_var(any_token=False) 2000 while self._match(TokenType.DOT): 2001 this = exp.Dot( 2002 this=this, 2003 expression=self._parse_function() or self._parse_id_var(any_token=False), 2004 ) 2005 2006 table_alias: t.Optional[exp.Expression] 2007 2008 if view: 2009 table = self._parse_id_var(any_token=False) 2010 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2011 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 2012 else: 2013 table_alias = self._parse_table_alias() 2014 2015 expression = self.expression( 2016 exp.Lateral, 2017 this=this, 2018 view=view, 2019 outer=outer, 2020 alias=table_alias, 2021 ) 2022 2023 if outer_apply or cross_apply: 2024 return self.expression(exp.Join, this=expression, side=None if cross_apply else "LEFT") 2025 2026 return expression 2027 2028 def _parse_join_side_and_kind( 2029 self, 2030 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2031 return ( 2032 self._match(TokenType.NATURAL) and self._prev, 2033 self._match_set(self.JOIN_SIDES) and self._prev, 2034 self._match_set(self.JOIN_KINDS) and self._prev, 2035 ) 2036 2037 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 2038 natural, side, kind = self._parse_join_side_and_kind() 2039 2040 if not skip_join_token and not self._match(TokenType.JOIN): 2041 return None 2042 2043 kwargs: t.Dict[ 2044 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 2045 ] = {"this": self._parse_table()} 2046 2047 if natural: 2048 kwargs["natural"] = True 2049 if side: 2050 kwargs["side"] = side.text 2051 if kind: 2052 kwargs["kind"] = kind.text 2053 2054 if self._match(TokenType.ON): 2055 kwargs["on"] = self._parse_conjunction() 2056 elif self._match(TokenType.USING): 2057 kwargs["using"] = self._parse_wrapped_id_vars() 2058 2059 return self.expression(exp.Join, **kwargs) # type: ignore 2060 2061 def _parse_index(self) -> exp.Expression: 2062 index = self._parse_id_var() 2063 self._match(TokenType.ON) 2064 self._match(TokenType.TABLE) # hive 2065 2066 return self.expression( 2067 exp.Index, 2068 this=index, 2069 table=self.expression(exp.Table, this=self._parse_id_var()), 2070 columns=self._parse_expression(), 2071 ) 2072 2073 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: 2074 unique = self._match(TokenType.UNIQUE) 2075 primary = self._match_text_seq("PRIMARY") 2076 amp = self._match_text_seq("AMP") 2077 if not self._match(TokenType.INDEX): 2078 return None 2079 index = self._parse_id_var() 2080 columns = None 2081 if self._match(TokenType.L_PAREN, advance=False): 2082 columns = self._parse_wrapped_csv(self._parse_column) 2083 return self.expression( 2084 exp.Index, 2085 this=index, 2086 columns=columns, 2087 unique=unique, 2088 primary=primary, 2089 amp=amp, 2090 ) 2091 2092 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 2093 catalog = None 2094 db = None 2095 2096 table = ( 2097 (not schema and self._parse_function()) 2098 or self._parse_id_var(any_token=False) 2099 or self._parse_string_as_identifier() 2100 ) 2101 2102 while self._match(TokenType.DOT): 2103 if catalog: 2104 # This allows nesting the table in arbitrarily many dot expressions if needed 2105 table = self.expression(exp.Dot, this=table, expression=self._parse_id_var()) 2106 else: 2107 catalog = db 2108 db = table 2109 table = self._parse_id_var() 2110 2111 if not table: 2112 self.raise_error(f"Expected table name but got {self._curr}") 2113 2114 return self.expression( 2115 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2116 ) 2117 2118 def _parse_table( 2119 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2120 ) -> t.Optional[exp.Expression]: 2121 lateral = self._parse_lateral() 2122 2123 if lateral: 2124 return lateral 2125 2126 unnest = self._parse_unnest() 2127 2128 if unnest: 2129 return unnest 2130 2131 values = self._parse_derived_table_values() 2132 2133 if values: 2134 return values 2135 2136 subquery = self._parse_select(table=True) 2137 2138 if subquery: 2139 if not subquery.args.get("pivots"): 2140 subquery.set("pivots", self._parse_pivots()) 2141 return subquery 2142 2143 this = self._parse_table_parts(schema=schema) 2144 2145 if schema: 2146 return self._parse_schema(this=this) 2147 2148 if self.alias_post_tablesample: 2149 table_sample = self._parse_table_sample() 2150 2151 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2152 2153 if alias: 2154 this.set("alias", alias) 2155 2156 if not this.args.get("pivots"): 2157 this.set("pivots", self._parse_pivots()) 2158 2159 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2160 this.set( 2161 "hints", 2162 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2163 ) 2164 self._match_r_paren() 2165 2166 if not self.alias_post_tablesample: 2167 table_sample = self._parse_table_sample() 2168 2169 if table_sample: 2170 table_sample.set("this", this) 2171 this = table_sample 2172 2173 return this 2174 2175 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2176 if not self._match(TokenType.UNNEST): 2177 return None 2178 2179 expressions = self._parse_wrapped_csv(self._parse_column) 2180 ordinality = bool(self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY)) 2181 alias = self._parse_table_alias() 2182 2183 if alias and self.unnest_column_only: 2184 if alias.args.get("columns"): 2185 self.raise_error("Unexpected extra column alias in unnest.") 2186 alias.set("columns", [alias.this]) 2187 alias.set("this", None) 2188 2189 offset = None 2190 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2191 self._match(TokenType.ALIAS) 2192 offset = self._parse_conjunction() 2193 2194 return self.expression( 2195 exp.Unnest, 2196 expressions=expressions, 2197 ordinality=ordinality, 2198 alias=alias, 2199 offset=offset, 2200 ) 2201 2202 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2203 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2204 if not is_derived and not self._match(TokenType.VALUES): 2205 return None 2206 2207 expressions = self._parse_csv(self._parse_value) 2208 2209 if is_derived: 2210 self._match_r_paren() 2211 2212 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2213 2214 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]: 2215 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2216 as_modifier and self._match_text_seq("USING", "SAMPLE") 2217 ): 2218 return None 2219 2220 bucket_numerator = None 2221 bucket_denominator = None 2222 bucket_field = None 2223 percent = None 2224 rows = None 2225 size = None 2226 seed = None 2227 2228 kind = "TABLESAMPLE" if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2229 method = self._parse_var(tokens=(TokenType.ROW,)) 2230 2231 self._match(TokenType.L_PAREN) 2232 2233 num = self._parse_number() 2234 2235 if self._match(TokenType.BUCKET): 2236 bucket_numerator = self._parse_number() 2237 self._match(TokenType.OUT_OF) 2238 bucket_denominator = bucket_denominator = self._parse_number() 2239 self._match(TokenType.ON) 2240 bucket_field = self._parse_field() 2241 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2242 percent = num 2243 elif self._match(TokenType.ROWS): 2244 rows = num 2245 else: 2246 size = num 2247 2248 self._match(TokenType.R_PAREN) 2249 2250 if self._match(TokenType.L_PAREN): 2251 method = self._parse_var() 2252 seed = self._match(TokenType.COMMA) and self._parse_number() 2253 self._match_r_paren() 2254 elif self._match_texts(("SEED", "REPEATABLE")): 2255 seed = self._parse_wrapped(self._parse_number) 2256 2257 return self.expression( 2258 exp.TableSample, 2259 method=method, 2260 bucket_numerator=bucket_numerator, 2261 bucket_denominator=bucket_denominator, 2262 bucket_field=bucket_field, 2263 percent=percent, 2264 rows=rows, 2265 size=size, 2266 seed=seed, 2267 kind=kind, 2268 ) 2269 2270 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2271 return list(iter(self._parse_pivot, None)) 2272 2273 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2274 index = self._index 2275 2276 if self._match(TokenType.PIVOT): 2277 unpivot = False 2278 elif self._match(TokenType.UNPIVOT): 2279 unpivot = True 2280 else: 2281 return None 2282 2283 expressions = [] 2284 field = None 2285 2286 if not self._match(TokenType.L_PAREN): 2287 self._retreat(index) 2288 return None 2289 2290 if unpivot: 2291 expressions = self._parse_csv(self._parse_column) 2292 else: 2293 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2294 2295 if not self._match(TokenType.FOR): 2296 self.raise_error("Expecting FOR") 2297 2298 value = self._parse_column() 2299 2300 if not self._match(TokenType.IN): 2301 self.raise_error("Expecting IN") 2302 2303 field = self._parse_in(value) 2304 2305 self._match_r_paren() 2306 2307 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2308 2309 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2310 pivot.set("alias", self._parse_table_alias()) 2311 2312 return pivot 2313 2314 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2315 if not skip_where_token and not self._match(TokenType.WHERE): 2316 return None 2317 2318 return self.expression( 2319 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2320 ) 2321 2322 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2323 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2324 return None 2325 2326 elements = defaultdict(list) 2327 2328 while True: 2329 expressions = self._parse_csv(self._parse_conjunction) 2330 if expressions: 2331 elements["expressions"].extend(expressions) 2332 2333 grouping_sets = self._parse_grouping_sets() 2334 if grouping_sets: 2335 elements["grouping_sets"].extend(grouping_sets) 2336 2337 rollup = None 2338 cube = None 2339 2340 with_ = self._match(TokenType.WITH) 2341 if self._match(TokenType.ROLLUP): 2342 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2343 elements["rollup"].extend(ensure_list(rollup)) 2344 2345 if self._match(TokenType.CUBE): 2346 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2347 elements["cube"].extend(ensure_list(cube)) 2348 2349 if not (expressions or grouping_sets or rollup or cube): 2350 break 2351 2352 return self.expression(exp.Group, **elements) # type: ignore 2353 2354 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2355 if not self._match(TokenType.GROUPING_SETS): 2356 return None 2357 2358 return self._parse_wrapped_csv(self._parse_grouping_set) 2359 2360 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2361 if self._match(TokenType.L_PAREN): 2362 grouping_set = self._parse_csv(self._parse_column) 2363 self._match_r_paren() 2364 return self.expression(exp.Tuple, expressions=grouping_set) 2365 2366 return self._parse_column() 2367 2368 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2369 if not skip_having_token and not self._match(TokenType.HAVING): 2370 return None 2371 return self.expression(exp.Having, this=self._parse_conjunction()) 2372 2373 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2374 if not self._match(TokenType.QUALIFY): 2375 return None 2376 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2377 2378 def _parse_order( 2379 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2380 ) -> t.Optional[exp.Expression]: 2381 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2382 return this 2383 2384 return self.expression( 2385 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2386 ) 2387 2388 def _parse_sort( 2389 self, token_type: TokenType, exp_class: t.Type[exp.Expression] 2390 ) -> t.Optional[exp.Expression]: 2391 if not self._match(token_type): 2392 return None 2393 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2394 2395 def _parse_ordered(self) -> exp.Expression: 2396 this = self._parse_conjunction() 2397 self._match(TokenType.ASC) 2398 is_desc = self._match(TokenType.DESC) 2399 is_nulls_first = self._match(TokenType.NULLS_FIRST) 2400 is_nulls_last = self._match(TokenType.NULLS_LAST) 2401 desc = is_desc or False 2402 asc = not desc 2403 nulls_first = is_nulls_first or False 2404 explicitly_null_ordered = is_nulls_first or is_nulls_last 2405 if ( 2406 not explicitly_null_ordered 2407 and ( 2408 (asc and self.null_ordering == "nulls_are_small") 2409 or (desc and self.null_ordering != "nulls_are_small") 2410 ) 2411 and self.null_ordering != "nulls_are_last" 2412 ): 2413 nulls_first = True 2414 2415 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2416 2417 def _parse_limit( 2418 self, this: t.Optional[exp.Expression] = None, top: bool = False 2419 ) -> t.Optional[exp.Expression]: 2420 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2421 limit_paren = self._match(TokenType.L_PAREN) 2422 limit_exp = self.expression( 2423 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2424 ) 2425 2426 if limit_paren: 2427 self._match_r_paren() 2428 2429 return limit_exp 2430 2431 if self._match(TokenType.FETCH): 2432 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2433 direction = self._prev.text if direction else "FIRST" 2434 count = self._parse_number() 2435 self._match_set((TokenType.ROW, TokenType.ROWS)) 2436 self._match(TokenType.ONLY) 2437 return self.expression(exp.Fetch, direction=direction, count=count) 2438 2439 return this 2440 2441 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2442 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2443 return this 2444 2445 count = self._parse_number() 2446 self._match_set((TokenType.ROW, TokenType.ROWS)) 2447 return self.expression(exp.Offset, this=this, expression=count) 2448 2449 def _parse_lock(self) -> t.Optional[exp.Expression]: 2450 if self._match_text_seq("FOR", "UPDATE"): 2451 return self.expression(exp.Lock, update=True) 2452 if self._match_text_seq("FOR", "SHARE"): 2453 return self.expression(exp.Lock, update=False) 2454 2455 return None 2456 2457 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2458 if not self._match_set(self.SET_OPERATIONS): 2459 return this 2460 2461 token_type = self._prev.token_type 2462 2463 if token_type == TokenType.UNION: 2464 expression = exp.Union 2465 elif token_type == TokenType.EXCEPT: 2466 expression = exp.Except 2467 else: 2468 expression = exp.Intersect 2469 2470 return self.expression( 2471 expression, 2472 this=this, 2473 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2474 expression=self._parse_set_operations(self._parse_select(nested=True)), 2475 ) 2476 2477 def _parse_expression(self) -> t.Optional[exp.Expression]: 2478 return self._parse_alias(self._parse_conjunction()) 2479 2480 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2481 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2482 2483 def _parse_equality(self) -> t.Optional[exp.Expression]: 2484 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2485 2486 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2487 return self._parse_tokens(self._parse_range, self.COMPARISON) 2488 2489 def _parse_range(self) -> t.Optional[exp.Expression]: 2490 this = self._parse_bitwise() 2491 negate = self._match(TokenType.NOT) 2492 2493 if self._match_set(self.RANGE_PARSERS): 2494 this = self.RANGE_PARSERS[self._prev.token_type](self, this) 2495 elif self._match(TokenType.ISNULL): 2496 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2497 2498 # Postgres supports ISNULL and NOTNULL for conditions. 2499 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2500 if self._match(TokenType.NOTNULL): 2501 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2502 this = self.expression(exp.Not, this=this) 2503 2504 if negate: 2505 this = self.expression(exp.Not, this=this) 2506 2507 if self._match(TokenType.IS): 2508 this = self._parse_is(this) 2509 2510 return this 2511 2512 def _parse_is(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2513 negate = self._match(TokenType.NOT) 2514 if self._match(TokenType.DISTINCT_FROM): 2515 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2516 return self.expression(klass, this=this, expression=self._parse_expression()) 2517 2518 this = self.expression( 2519 exp.Is, 2520 this=this, 2521 expression=self._parse_null() or self._parse_boolean(), 2522 ) 2523 return self.expression(exp.Not, this=this) if negate else this 2524 2525 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2526 unnest = self._parse_unnest() 2527 if unnest: 2528 this = self.expression(exp.In, this=this, unnest=unnest) 2529 elif self._match(TokenType.L_PAREN): 2530 expressions = self._parse_csv(self._parse_select_or_expression) 2531 2532 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2533 this = self.expression(exp.In, this=this, query=expressions[0]) 2534 else: 2535 this = self.expression(exp.In, this=this, expressions=expressions) 2536 2537 self._match_r_paren() 2538 else: 2539 this = self.expression(exp.In, this=this, field=self._parse_field()) 2540 2541 return this 2542 2543 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2544 low = self._parse_bitwise() 2545 self._match(TokenType.AND) 2546 high = self._parse_bitwise() 2547 return self.expression(exp.Between, this=this, low=low, high=high) 2548 2549 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2550 if not self._match(TokenType.ESCAPE): 2551 return this 2552 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2553 2554 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2555 this = self._parse_term() 2556 2557 while True: 2558 if self._match_set(self.BITWISE): 2559 this = self.expression( 2560 self.BITWISE[self._prev.token_type], 2561 this=this, 2562 expression=self._parse_term(), 2563 ) 2564 elif self._match_pair(TokenType.LT, TokenType.LT): 2565 this = self.expression( 2566 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2567 ) 2568 elif self._match_pair(TokenType.GT, TokenType.GT): 2569 this = self.expression( 2570 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2571 ) 2572 else: 2573 break 2574 2575 return this 2576 2577 def _parse_term(self) -> t.Optional[exp.Expression]: 2578 return self._parse_tokens(self._parse_factor, self.TERM) 2579 2580 def _parse_factor(self) -> t.Optional[exp.Expression]: 2581 return self._parse_tokens(self._parse_unary, self.FACTOR) 2582 2583 def _parse_unary(self) -> t.Optional[exp.Expression]: 2584 if self._match_set(self.UNARY_PARSERS): 2585 return self.UNARY_PARSERS[self._prev.token_type](self) 2586 return self._parse_at_time_zone(self._parse_type()) 2587 2588 def _parse_type(self) -> t.Optional[exp.Expression]: 2589 if self._match(TokenType.INTERVAL): 2590 return self.expression(exp.Interval, this=self._parse_term(), unit=self._parse_field()) 2591 2592 index = self._index 2593 type_token = self._parse_types(check_func=True) 2594 this = self._parse_column() 2595 2596 if type_token: 2597 if isinstance(this, exp.Literal): 2598 return self.expression(exp.Cast, this=this, to=type_token) 2599 if not type_token.args.get("expressions"): 2600 self._retreat(index) 2601 return self._parse_column() 2602 return type_token 2603 2604 return this 2605 2606 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2607 index = self._index 2608 2609 prefix = self._match_text_seq("SYSUDTLIB", ".") 2610 2611 if not self._match_set(self.TYPE_TOKENS): 2612 return None 2613 2614 type_token = self._prev.token_type 2615 2616 if type_token == TokenType.PSEUDO_TYPE: 2617 return self.expression(exp.PseudoType, this=self._prev.text) 2618 2619 nested = type_token in self.NESTED_TYPE_TOKENS 2620 is_struct = type_token == TokenType.STRUCT 2621 expressions = None 2622 maybe_func = False 2623 2624 if self._match(TokenType.L_PAREN): 2625 if is_struct: 2626 expressions = self._parse_csv(self._parse_struct_kwargs) 2627 elif nested: 2628 expressions = self._parse_csv(self._parse_types) 2629 else: 2630 expressions = self._parse_csv(self._parse_conjunction) 2631 2632 if not expressions: 2633 self._retreat(index) 2634 return None 2635 2636 self._match_r_paren() 2637 maybe_func = True 2638 2639 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2640 this = exp.DataType( 2641 this=exp.DataType.Type.ARRAY, 2642 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2643 nested=True, 2644 ) 2645 2646 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2647 this = exp.DataType( 2648 this=exp.DataType.Type.ARRAY, 2649 expressions=[this], 2650 nested=True, 2651 ) 2652 2653 return this 2654 2655 if self._match(TokenType.L_BRACKET): 2656 self._retreat(index) 2657 return None 2658 2659 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2660 if nested and self._match(TokenType.LT): 2661 if is_struct: 2662 expressions = self._parse_csv(self._parse_struct_kwargs) 2663 else: 2664 expressions = self._parse_csv(self._parse_types) 2665 2666 if not self._match(TokenType.GT): 2667 self.raise_error("Expecting >") 2668 2669 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2670 values = self._parse_csv(self._parse_conjunction) 2671 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2672 2673 value: t.Optional[exp.Expression] = None 2674 if type_token in self.TIMESTAMPS: 2675 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: 2676 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2677 elif ( 2678 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ 2679 ): 2680 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2681 elif self._match(TokenType.WITHOUT_TIME_ZONE): 2682 if type_token == TokenType.TIME: 2683 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2684 else: 2685 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2686 2687 maybe_func = maybe_func and value is None 2688 2689 if value is None: 2690 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2691 elif type_token == TokenType.INTERVAL: 2692 unit = self._parse_var() 2693 2694 if not unit: 2695 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 2696 else: 2697 value = self.expression(exp.Interval, unit=unit) 2698 2699 if maybe_func and check_func: 2700 index2 = self._index 2701 peek = self._parse_string() 2702 2703 if not peek: 2704 self._retreat(index) 2705 return None 2706 2707 self._retreat(index2) 2708 2709 if value: 2710 return value 2711 2712 return exp.DataType( 2713 this=exp.DataType.Type[type_token.value.upper()], 2714 expressions=expressions, 2715 nested=nested, 2716 values=values, 2717 prefix=prefix, 2718 ) 2719 2720 def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]: 2721 if self._curr and self._curr.token_type in self.TYPE_TOKENS: 2722 return self._parse_types() 2723 2724 this = self._parse_id_var() 2725 self._match(TokenType.COLON) 2726 data_type = self._parse_types() 2727 2728 if not data_type: 2729 return None 2730 return self.expression(exp.StructKwarg, this=this, expression=data_type) 2731 2732 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2733 if not self._match(TokenType.AT_TIME_ZONE): 2734 return this 2735 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 2736 2737 def _parse_column(self) -> t.Optional[exp.Expression]: 2738 this = self._parse_field() 2739 if isinstance(this, exp.Identifier): 2740 this = self.expression(exp.Column, this=this) 2741 elif not this: 2742 return self._parse_bracket(this) 2743 this = self._parse_bracket(this) 2744 2745 while self._match_set(self.COLUMN_OPERATORS): 2746 op_token = self._prev.token_type 2747 op = self.COLUMN_OPERATORS.get(op_token) 2748 2749 if op_token == TokenType.DCOLON: 2750 field = self._parse_types() 2751 if not field: 2752 self.raise_error("Expected type") 2753 elif op: 2754 self._advance() 2755 value = self._prev.text 2756 field = ( 2757 exp.Literal.number(value) 2758 if self._prev.token_type == TokenType.NUMBER 2759 else exp.Literal.string(value) 2760 ) 2761 else: 2762 field = self._parse_star() or self._parse_function() or self._parse_id_var() 2763 2764 if isinstance(field, exp.Func): 2765 # bigquery allows function calls like x.y.count(...) 2766 # SAFE.SUBSTR(...) 2767 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 2768 this = self._replace_columns_with_dots(this) 2769 2770 if op: 2771 this = op(self, this, field) 2772 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 2773 this = self.expression( 2774 exp.Column, 2775 this=field, 2776 table=this.this, 2777 db=this.args.get("table"), 2778 catalog=this.args.get("db"), 2779 ) 2780 else: 2781 this = self.expression(exp.Dot, this=this, expression=field) 2782 this = self._parse_bracket(this) 2783 2784 return this 2785 2786 def _parse_primary(self) -> t.Optional[exp.Expression]: 2787 if self._match_set(self.PRIMARY_PARSERS): 2788 token_type = self._prev.token_type 2789 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 2790 2791 if token_type == TokenType.STRING: 2792 expressions = [primary] 2793 while self._match(TokenType.STRING): 2794 expressions.append(exp.Literal.string(self._prev.text)) 2795 if len(expressions) > 1: 2796 return self.expression(exp.Concat, expressions=expressions) 2797 return primary 2798 2799 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 2800 return exp.Literal.number(f"0.{self._prev.text}") 2801 2802 if self._match(TokenType.L_PAREN): 2803 comments = self._prev_comments 2804 query = self._parse_select() 2805 2806 if query: 2807 expressions = [query] 2808 else: 2809 expressions = self._parse_csv( 2810 lambda: self._parse_alias(self._parse_conjunction(), explicit=True) 2811 ) 2812 2813 this = seq_get(expressions, 0) 2814 self._parse_query_modifiers(this) 2815 2816 if isinstance(this, exp.Subqueryable): 2817 this = self._parse_set_operations( 2818 self._parse_subquery(this=this, parse_alias=False) 2819 ) 2820 elif len(expressions) > 1: 2821 this = self.expression(exp.Tuple, expressions=expressions) 2822 else: 2823 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 2824 2825 self._match_r_paren() 2826 2827 if this and comments: 2828 this.comments = comments 2829 2830 return this 2831 2832 return None 2833 2834 def _parse_field(self, any_token: bool = False) -> t.Optional[exp.Expression]: 2835 return self._parse_primary() or self._parse_function() or self._parse_id_var(any_token) 2836 2837 def _parse_function( 2838 self, functions: t.Optional[t.Dict[str, t.Callable]] = None 2839 ) -> t.Optional[exp.Expression]: 2840 if not self._curr: 2841 return None 2842 2843 token_type = self._curr.token_type 2844 2845 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 2846 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 2847 2848 if not self._next or self._next.token_type != TokenType.L_PAREN: 2849 if token_type in self.NO_PAREN_FUNCTIONS: 2850 self._advance() 2851 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 2852 2853 return None 2854 2855 if token_type not in self.FUNC_TOKENS: 2856 return None 2857 2858 this = self._curr.text 2859 upper = this.upper() 2860 self._advance(2) 2861 2862 parser = self.FUNCTION_PARSERS.get(upper) 2863 2864 if parser: 2865 this = parser(self) 2866 else: 2867 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 2868 2869 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 2870 this = self.expression(subquery_predicate, this=self._parse_select()) 2871 self._match_r_paren() 2872 return this 2873 2874 if functions is None: 2875 functions = self.FUNCTIONS 2876 2877 function = functions.get(upper) 2878 args = self._parse_csv(self._parse_lambda) 2879 2880 if function: 2881 # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the 2882 # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists. 2883 if count_params(function) == 2: 2884 params = None 2885 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 2886 params = self._parse_csv(self._parse_lambda) 2887 2888 this = function(args, params) 2889 else: 2890 this = function(args) 2891 2892 self.validate_expression(this, args) 2893 else: 2894 this = self.expression(exp.Anonymous, this=this, expressions=args) 2895 2896 self._match_r_paren(this) 2897 return self._parse_window(this) 2898 2899 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 2900 return self._parse_column_def(self._parse_id_var()) 2901 2902 def _parse_user_defined_function( 2903 self, kind: t.Optional[TokenType] = None 2904 ) -> t.Optional[exp.Expression]: 2905 this = self._parse_id_var() 2906 2907 while self._match(TokenType.DOT): 2908 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 2909 2910 if not self._match(TokenType.L_PAREN): 2911 return this 2912 2913 expressions = self._parse_csv(self._parse_function_parameter) 2914 self._match_r_paren() 2915 return self.expression( 2916 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 2917 ) 2918 2919 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 2920 literal = self._parse_primary() 2921 if literal: 2922 return self.expression(exp.Introducer, this=token.text, expression=literal) 2923 2924 return self.expression(exp.Identifier, this=token.text) 2925 2926 def _parse_national(self, token: Token) -> exp.Expression: 2927 return self.expression(exp.National, this=exp.Literal.string(token.text)) 2928 2929 def _parse_session_parameter(self) -> exp.Expression: 2930 kind = None 2931 this = self._parse_id_var() or self._parse_primary() 2932 2933 if this and self._match(TokenType.DOT): 2934 kind = this.name 2935 this = self._parse_var() or self._parse_primary() 2936 2937 return self.expression(exp.SessionParameter, this=this, kind=kind) 2938 2939 def _parse_lambda(self) -> t.Optional[exp.Expression]: 2940 index = self._index 2941 2942 if self._match(TokenType.L_PAREN): 2943 expressions = self._parse_csv(self._parse_id_var) 2944 2945 if not self._match(TokenType.R_PAREN): 2946 self._retreat(index) 2947 else: 2948 expressions = [self._parse_id_var()] 2949 2950 if self._match_set(self.LAMBDAS): 2951 return self.LAMBDAS[self._prev.token_type](self, expressions) 2952 2953 self._retreat(index) 2954 2955 this: t.Optional[exp.Expression] 2956 2957 if self._match(TokenType.DISTINCT): 2958 this = self.expression( 2959 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 2960 ) 2961 else: 2962 this = self._parse_select_or_expression() 2963 2964 if isinstance(this, exp.EQ): 2965 left = this.this 2966 if isinstance(left, exp.Column): 2967 left.replace(exp.Var(this=left.text("this"))) 2968 2969 if self._match(TokenType.IGNORE_NULLS): 2970 this = self.expression(exp.IgnoreNulls, this=this) 2971 else: 2972 self._match(TokenType.RESPECT_NULLS) 2973 2974 return self._parse_limit(self._parse_order(this)) 2975 2976 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2977 index = self._index 2978 if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT): 2979 self._retreat(index) 2980 return this 2981 2982 args = self._parse_csv( 2983 lambda: self._parse_constraint() 2984 or self._parse_column_def(self._parse_field(any_token=True)) 2985 ) 2986 self._match_r_paren() 2987 return self.expression(exp.Schema, this=this, expressions=args) 2988 2989 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2990 kind = self._parse_types() 2991 2992 if self._match_text_seq("FOR", "ORDINALITY"): 2993 return self.expression(exp.ColumnDef, this=this, ordinality=True) 2994 2995 constraints = [] 2996 while True: 2997 constraint = self._parse_column_constraint() 2998 if not constraint: 2999 break 3000 constraints.append(constraint) 3001 3002 if not kind and not constraints: 3003 return this 3004 3005 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3006 3007 def _parse_auto_increment(self) -> exp.Expression: 3008 start = None 3009 increment = None 3010 3011 if self._match(TokenType.L_PAREN, advance=False): 3012 args = self._parse_wrapped_csv(self._parse_bitwise) 3013 start = seq_get(args, 0) 3014 increment = seq_get(args, 1) 3015 elif self._match_text_seq("START"): 3016 start = self._parse_bitwise() 3017 self._match_text_seq("INCREMENT") 3018 increment = self._parse_bitwise() 3019 3020 if start and increment: 3021 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3022 3023 return exp.AutoIncrementColumnConstraint() 3024 3025 def _parse_compress(self) -> exp.Expression: 3026 if self._match(TokenType.L_PAREN, advance=False): 3027 return self.expression( 3028 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3029 ) 3030 3031 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3032 3033 def _parse_generated_as_identity(self) -> exp.Expression: 3034 if self._match(TokenType.BY_DEFAULT): 3035 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False) 3036 else: 3037 self._match_text_seq("ALWAYS") 3038 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3039 3040 self._match_text_seq("AS", "IDENTITY") 3041 if self._match(TokenType.L_PAREN): 3042 if self._match_text_seq("START", "WITH"): 3043 this.set("start", self._parse_bitwise()) 3044 if self._match_text_seq("INCREMENT", "BY"): 3045 this.set("increment", self._parse_bitwise()) 3046 if self._match_text_seq("MINVALUE"): 3047 this.set("minvalue", self._parse_bitwise()) 3048 if self._match_text_seq("MAXVALUE"): 3049 this.set("maxvalue", self._parse_bitwise()) 3050 3051 if self._match_text_seq("CYCLE"): 3052 this.set("cycle", True) 3053 elif self._match_text_seq("NO", "CYCLE"): 3054 this.set("cycle", False) 3055 3056 self._match_r_paren() 3057 3058 return this 3059 3060 def _parse_inline(self) -> t.Optional[exp.Expression]: 3061 self._match_text_seq("LENGTH") 3062 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3063 3064 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 3065 if self._match_text_seq("NULL"): 3066 return self.expression(exp.NotNullColumnConstraint) 3067 if self._match_text_seq("CASESPECIFIC"): 3068 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3069 return None 3070 3071 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3072 this = self._parse_references() 3073 if this: 3074 return this 3075 3076 if self._match(TokenType.CONSTRAINT): 3077 this = self._parse_id_var() 3078 3079 if self._match_texts(self.CONSTRAINT_PARSERS): 3080 return self.expression( 3081 exp.ColumnConstraint, 3082 this=this, 3083 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3084 ) 3085 3086 return this 3087 3088 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3089 if not self._match(TokenType.CONSTRAINT): 3090 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3091 3092 this = self._parse_id_var() 3093 expressions = [] 3094 3095 while True: 3096 constraint = self._parse_unnamed_constraint() or self._parse_function() 3097 if not constraint: 3098 break 3099 expressions.append(constraint) 3100 3101 return self.expression(exp.Constraint, this=this, expressions=expressions) 3102 3103 def _parse_unnamed_constraint( 3104 self, constraints: t.Optional[t.Collection[str]] = None 3105 ) -> t.Optional[exp.Expression]: 3106 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3107 return None 3108 3109 constraint = self._prev.text.upper() 3110 if constraint not in self.CONSTRAINT_PARSERS: 3111 self.raise_error(f"No parser found for schema constraint {constraint}.") 3112 3113 return self.CONSTRAINT_PARSERS[constraint](self) 3114 3115 def _parse_unique(self) -> exp.Expression: 3116 if not self._match(TokenType.L_PAREN, advance=False): 3117 return self.expression(exp.UniqueColumnConstraint) 3118 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 3119 3120 def _parse_key_constraint_options(self) -> t.List[str]: 3121 options = [] 3122 while True: 3123 if not self._curr: 3124 break 3125 3126 if self._match(TokenType.ON): 3127 action = None 3128 on = self._advance_any() and self._prev.text 3129 3130 if self._match(TokenType.NO_ACTION): 3131 action = "NO ACTION" 3132 elif self._match(TokenType.CASCADE): 3133 action = "CASCADE" 3134 elif self._match_pair(TokenType.SET, TokenType.NULL): 3135 action = "SET NULL" 3136 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3137 action = "SET DEFAULT" 3138 else: 3139 self.raise_error("Invalid key constraint") 3140 3141 options.append(f"ON {on} {action}") 3142 elif self._match_text_seq("NOT", "ENFORCED"): 3143 options.append("NOT ENFORCED") 3144 elif self._match_text_seq("DEFERRABLE"): 3145 options.append("DEFERRABLE") 3146 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3147 options.append("INITIALLY DEFERRED") 3148 elif self._match_text_seq("NORELY"): 3149 options.append("NORELY") 3150 elif self._match_text_seq("MATCH", "FULL"): 3151 options.append("MATCH FULL") 3152 else: 3153 break 3154 3155 return options 3156 3157 def _parse_references(self) -> t.Optional[exp.Expression]: 3158 if not self._match(TokenType.REFERENCES): 3159 return None 3160 3161 expressions = None 3162 this = self._parse_id_var() 3163 3164 if self._match(TokenType.L_PAREN, advance=False): 3165 expressions = self._parse_wrapped_id_vars() 3166 3167 options = self._parse_key_constraint_options() 3168 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3169 3170 def _parse_foreign_key(self) -> exp.Expression: 3171 expressions = self._parse_wrapped_id_vars() 3172 reference = self._parse_references() 3173 options = {} 3174 3175 while self._match(TokenType.ON): 3176 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3177 self.raise_error("Expected DELETE or UPDATE") 3178 3179 kind = self._prev.text.lower() 3180 3181 if self._match(TokenType.NO_ACTION): 3182 action = "NO ACTION" 3183 elif self._match(TokenType.SET): 3184 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3185 action = "SET " + self._prev.text.upper() 3186 else: 3187 self._advance() 3188 action = self._prev.text.upper() 3189 3190 options[kind] = action 3191 3192 return self.expression( 3193 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3194 ) 3195 3196 def _parse_primary_key(self) -> exp.Expression: 3197 desc = ( 3198 self._match_set((TokenType.ASC, TokenType.DESC)) 3199 and self._prev.token_type == TokenType.DESC 3200 ) 3201 3202 if not self._match(TokenType.L_PAREN, advance=False): 3203 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3204 3205 expressions = self._parse_wrapped_id_vars() 3206 options = self._parse_key_constraint_options() 3207 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3208 3209 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3210 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3211 return this 3212 3213 bracket_kind = self._prev.token_type 3214 expressions: t.List[t.Optional[exp.Expression]] 3215 3216 if self._match(TokenType.COLON): 3217 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3218 else: 3219 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3220 3221 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3222 if bracket_kind == TokenType.L_BRACE: 3223 this = self.expression(exp.Struct, expressions=expressions) 3224 elif not this or this.name.upper() == "ARRAY": 3225 this = self.expression(exp.Array, expressions=expressions) 3226 else: 3227 expressions = apply_index_offset(expressions, -self.index_offset) 3228 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3229 3230 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3231 self.raise_error("Expected ]") 3232 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3233 self.raise_error("Expected }") 3234 3235 this.comments = self._prev_comments 3236 return self._parse_bracket(this) 3237 3238 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3239 if self._match(TokenType.COLON): 3240 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3241 return this 3242 3243 def _parse_case(self) -> t.Optional[exp.Expression]: 3244 ifs = [] 3245 default = None 3246 3247 expression = self._parse_conjunction() 3248 3249 while self._match(TokenType.WHEN): 3250 this = self._parse_conjunction() 3251 self._match(TokenType.THEN) 3252 then = self._parse_conjunction() 3253 ifs.append(self.expression(exp.If, this=this, true=then)) 3254 3255 if self._match(TokenType.ELSE): 3256 default = self._parse_conjunction() 3257 3258 if not self._match(TokenType.END): 3259 self.raise_error("Expected END after CASE", self._prev) 3260 3261 return self._parse_window( 3262 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3263 ) 3264 3265 def _parse_if(self) -> t.Optional[exp.Expression]: 3266 if self._match(TokenType.L_PAREN): 3267 args = self._parse_csv(self._parse_conjunction) 3268 this = exp.If.from_arg_list(args) 3269 self.validate_expression(this, args) 3270 self._match_r_paren() 3271 else: 3272 condition = self._parse_conjunction() 3273 self._match(TokenType.THEN) 3274 true = self._parse_conjunction() 3275 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3276 self._match(TokenType.END) 3277 this = self.expression(exp.If, this=condition, true=true, false=false) 3278 3279 return self._parse_window(this) 3280 3281 def _parse_extract(self) -> exp.Expression: 3282 this = self._parse_function() or self._parse_var() or self._parse_type() 3283 3284 if self._match(TokenType.FROM): 3285 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3286 3287 if not self._match(TokenType.COMMA): 3288 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3289 3290 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3291 3292 def _parse_cast(self, strict: bool) -> exp.Expression: 3293 this = self._parse_conjunction() 3294 3295 if not self._match(TokenType.ALIAS): 3296 self.raise_error("Expected AS after CAST") 3297 3298 to = self._parse_types() 3299 3300 if not to: 3301 self.raise_error("Expected TYPE after CAST") 3302 elif to.this == exp.DataType.Type.CHAR: 3303 if self._match(TokenType.CHARACTER_SET): 3304 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3305 3306 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3307 3308 def _parse_string_agg(self) -> exp.Expression: 3309 expression: t.Optional[exp.Expression] 3310 3311 if self._match(TokenType.DISTINCT): 3312 args = self._parse_csv(self._parse_conjunction) 3313 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3314 else: 3315 args = self._parse_csv(self._parse_conjunction) 3316 expression = seq_get(args, 0) 3317 3318 index = self._index 3319 if not self._match(TokenType.R_PAREN): 3320 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3321 order = self._parse_order(this=expression) 3322 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3323 3324 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3325 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3326 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3327 if not self._match(TokenType.WITHIN_GROUP): 3328 self._retreat(index) 3329 this = exp.GroupConcat.from_arg_list(args) 3330 self.validate_expression(this, args) 3331 return this 3332 3333 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3334 order = self._parse_order(this=expression) 3335 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3336 3337 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3338 to: t.Optional[exp.Expression] 3339 this = self._parse_bitwise() 3340 3341 if self._match(TokenType.USING): 3342 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3343 elif self._match(TokenType.COMMA): 3344 to = self._parse_bitwise() 3345 else: 3346 to = None 3347 3348 # Swap the argument order if needed to produce the correct AST 3349 if self.CONVERT_TYPE_FIRST: 3350 this, to = to, this 3351 3352 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3353 3354 def _parse_decode(self) -> t.Optional[exp.Expression]: 3355 """ 3356 There are generally two variants of the DECODE function: 3357 3358 - DECODE(bin, charset) 3359 - DECODE(expression, search, result [, search, result] ... [, default]) 3360 3361 The second variant will always be parsed into a CASE expression. Note that NULL 3362 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3363 instead of relying on pattern matching. 3364 """ 3365 args = self._parse_csv(self._parse_conjunction) 3366 3367 if len(args) < 3: 3368 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3369 3370 expression, *expressions = args 3371 if not expression: 3372 return None 3373 3374 ifs = [] 3375 for search, result in zip(expressions[::2], expressions[1::2]): 3376 if not search or not result: 3377 return None 3378 3379 if isinstance(search, exp.Literal): 3380 ifs.append( 3381 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3382 ) 3383 elif isinstance(search, exp.Null): 3384 ifs.append( 3385 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3386 ) 3387 else: 3388 cond = exp.or_( 3389 exp.EQ(this=expression.copy(), expression=search), 3390 exp.and_( 3391 exp.Is(this=expression.copy(), expression=exp.Null()), 3392 exp.Is(this=search.copy(), expression=exp.Null()), 3393 ), 3394 ) 3395 ifs.append(exp.If(this=cond, true=result)) 3396 3397 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3398 3399 def _parse_json_key_value(self) -> t.Optional[exp.Expression]: 3400 self._match_text_seq("KEY") 3401 key = self._parse_field() 3402 self._match(TokenType.COLON) 3403 self._match_text_seq("VALUE") 3404 value = self._parse_field() 3405 if not key and not value: 3406 return None 3407 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3408 3409 def _parse_json_object(self) -> exp.Expression: 3410 expressions = self._parse_csv(self._parse_json_key_value) 3411 3412 null_handling = None 3413 if self._match_text_seq("NULL", "ON", "NULL"): 3414 null_handling = "NULL ON NULL" 3415 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3416 null_handling = "ABSENT ON NULL" 3417 3418 unique_keys = None 3419 if self._match_text_seq("WITH", "UNIQUE"): 3420 unique_keys = True 3421 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3422 unique_keys = False 3423 3424 self._match_text_seq("KEYS") 3425 3426 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3427 format_json = self._match_text_seq("FORMAT", "JSON") 3428 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3429 3430 return self.expression( 3431 exp.JSONObject, 3432 expressions=expressions, 3433 null_handling=null_handling, 3434 unique_keys=unique_keys, 3435 return_type=return_type, 3436 format_json=format_json, 3437 encoding=encoding, 3438 ) 3439 3440 def _parse_logarithm(self) -> exp.Expression: 3441 # Default argument order is base, expression 3442 args = self._parse_csv(self._parse_range) 3443 3444 if len(args) > 1: 3445 if not self.LOG_BASE_FIRST: 3446 args.reverse() 3447 return exp.Log.from_arg_list(args) 3448 3449 return self.expression( 3450 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3451 ) 3452 3453 def _parse_match_against(self) -> exp.Expression: 3454 expressions = self._parse_csv(self._parse_column) 3455 3456 self._match_text_seq(")", "AGAINST", "(") 3457 3458 this = self._parse_string() 3459 3460 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3461 modifier = "IN NATURAL LANGUAGE MODE" 3462 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3463 modifier = f"{modifier} WITH QUERY EXPANSION" 3464 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3465 modifier = "IN BOOLEAN MODE" 3466 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3467 modifier = "WITH QUERY EXPANSION" 3468 else: 3469 modifier = None 3470 3471 return self.expression( 3472 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3473 ) 3474 3475 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3476 args = self._parse_csv(self._parse_bitwise) 3477 3478 if self._match(TokenType.IN): 3479 return self.expression( 3480 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3481 ) 3482 3483 if haystack_first: 3484 haystack = seq_get(args, 0) 3485 needle = seq_get(args, 1) 3486 else: 3487 needle = seq_get(args, 0) 3488 haystack = seq_get(args, 1) 3489 3490 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3491 3492 self.validate_expression(this, args) 3493 3494 return this 3495 3496 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3497 args = self._parse_csv(self._parse_table) 3498 return exp.JoinHint(this=func_name.upper(), expressions=args) 3499 3500 def _parse_substring(self) -> exp.Expression: 3501 # Postgres supports the form: substring(string [from int] [for int]) 3502 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3503 3504 args = self._parse_csv(self._parse_bitwise) 3505 3506 if self._match(TokenType.FROM): 3507 args.append(self._parse_bitwise()) 3508 if self._match(TokenType.FOR): 3509 args.append(self._parse_bitwise()) 3510 3511 this = exp.Substring.from_arg_list(args) 3512 self.validate_expression(this, args) 3513 3514 return this 3515 3516 def _parse_trim(self) -> exp.Expression: 3517 # https://www.w3resource.com/sql/character-functions/trim.php 3518 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3519 3520 position = None 3521 collation = None 3522 3523 if self._match_set(self.TRIM_TYPES): 3524 position = self._prev.text.upper() 3525 3526 expression = self._parse_term() 3527 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3528 this = self._parse_term() 3529 else: 3530 this = expression 3531 expression = None 3532 3533 if self._match(TokenType.COLLATE): 3534 collation = self._parse_term() 3535 3536 return self.expression( 3537 exp.Trim, 3538 this=this, 3539 position=position, 3540 expression=expression, 3541 collation=collation, 3542 ) 3543 3544 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3545 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3546 3547 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3548 return self._parse_window(self._parse_id_var(), alias=True) 3549 3550 def _parse_window( 3551 self, this: t.Optional[exp.Expression], alias: bool = False 3552 ) -> t.Optional[exp.Expression]: 3553 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3554 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3555 self._match_r_paren() 3556 3557 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3558 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3559 if self._match(TokenType.WITHIN_GROUP): 3560 order = self._parse_wrapped(self._parse_order) 3561 this = self.expression(exp.WithinGroup, this=this, expression=order) 3562 3563 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3564 # Some dialects choose to implement and some do not. 3565 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3566 3567 # There is some code above in _parse_lambda that handles 3568 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3569 3570 # The below changes handle 3571 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3572 3573 # Oracle allows both formats 3574 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3575 # and Snowflake chose to do the same for familiarity 3576 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3577 if self._match(TokenType.IGNORE_NULLS): 3578 this = self.expression(exp.IgnoreNulls, this=this) 3579 elif self._match(TokenType.RESPECT_NULLS): 3580 this = self.expression(exp.RespectNulls, this=this) 3581 3582 # bigquery select from window x AS (partition by ...) 3583 if alias: 3584 self._match(TokenType.ALIAS) 3585 elif not self._match(TokenType.OVER): 3586 return this 3587 3588 if not self._match(TokenType.L_PAREN): 3589 return self.expression(exp.Window, this=this, alias=self._parse_id_var(False)) 3590 3591 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3592 partition = self._parse_partition_by() 3593 order = self._parse_order() 3594 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3595 3596 if kind: 3597 self._match(TokenType.BETWEEN) 3598 start = self._parse_window_spec() 3599 self._match(TokenType.AND) 3600 end = self._parse_window_spec() 3601 3602 spec = self.expression( 3603 exp.WindowSpec, 3604 kind=kind, 3605 start=start["value"], 3606 start_side=start["side"], 3607 end=end["value"], 3608 end_side=end["side"], 3609 ) 3610 else: 3611 spec = None 3612 3613 self._match_r_paren() 3614 3615 return self.expression( 3616 exp.Window, 3617 this=this, 3618 partition_by=partition, 3619 order=order, 3620 spec=spec, 3621 alias=window_alias, 3622 ) 3623 3624 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3625 self._match(TokenType.BETWEEN) 3626 3627 return { 3628 "value": ( 3629 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text 3630 ) 3631 or self._parse_bitwise(), 3632 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, 3633 } 3634 3635 def _parse_alias( 3636 self, this: t.Optional[exp.Expression], explicit: bool = False 3637 ) -> t.Optional[exp.Expression]: 3638 any_token = self._match(TokenType.ALIAS) 3639 3640 if explicit and not any_token: 3641 return this 3642 3643 if self._match(TokenType.L_PAREN): 3644 aliases = self.expression( 3645 exp.Aliases, 3646 this=this, 3647 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3648 ) 3649 self._match_r_paren(aliases) 3650 return aliases 3651 3652 alias = self._parse_id_var(any_token) 3653 3654 if alias: 3655 return self.expression(exp.Alias, this=this, alias=alias) 3656 3657 return this 3658 3659 def _parse_id_var( 3660 self, 3661 any_token: bool = True, 3662 tokens: t.Optional[t.Collection[TokenType]] = None, 3663 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3664 ) -> t.Optional[exp.Expression]: 3665 identifier = self._parse_identifier() 3666 3667 if identifier: 3668 return identifier 3669 3670 prefix = "" 3671 3672 if prefix_tokens: 3673 while self._match_set(prefix_tokens): 3674 prefix += self._prev.text 3675 3676 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3677 quoted = self._prev.token_type == TokenType.STRING 3678 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3679 3680 return None 3681 3682 def _parse_string(self) -> t.Optional[exp.Expression]: 3683 if self._match(TokenType.STRING): 3684 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 3685 return self._parse_placeholder() 3686 3687 def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]: 3688 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 3689 3690 def _parse_number(self) -> t.Optional[exp.Expression]: 3691 if self._match(TokenType.NUMBER): 3692 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 3693 return self._parse_placeholder() 3694 3695 def _parse_identifier(self) -> t.Optional[exp.Expression]: 3696 if self._match(TokenType.IDENTIFIER): 3697 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 3698 return self._parse_placeholder() 3699 3700 def _parse_var( 3701 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 3702 ) -> t.Optional[exp.Expression]: 3703 if ( 3704 (any_token and self._advance_any()) 3705 or self._match(TokenType.VAR) 3706 or (self._match_set(tokens) if tokens else False) 3707 ): 3708 return self.expression(exp.Var, this=self._prev.text) 3709 return self._parse_placeholder() 3710 3711 def _advance_any(self) -> t.Optional[Token]: 3712 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 3713 self._advance() 3714 return self._prev 3715 return None 3716 3717 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 3718 return self._parse_var() or self._parse_string() 3719 3720 def _parse_null(self) -> t.Optional[exp.Expression]: 3721 if self._match(TokenType.NULL): 3722 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 3723 return None 3724 3725 def _parse_boolean(self) -> t.Optional[exp.Expression]: 3726 if self._match(TokenType.TRUE): 3727 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 3728 if self._match(TokenType.FALSE): 3729 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 3730 return None 3731 3732 def _parse_star(self) -> t.Optional[exp.Expression]: 3733 if self._match(TokenType.STAR): 3734 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 3735 return None 3736 3737 def _parse_parameter(self) -> exp.Expression: 3738 wrapped = self._match(TokenType.L_BRACE) 3739 this = self._parse_var() or self._parse_primary() 3740 self._match(TokenType.R_BRACE) 3741 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 3742 3743 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 3744 if self._match_set(self.PLACEHOLDER_PARSERS): 3745 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 3746 if placeholder: 3747 return placeholder 3748 self._advance(-1) 3749 return None 3750 3751 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3752 if not self._match(TokenType.EXCEPT): 3753 return None 3754 if self._match(TokenType.L_PAREN, advance=False): 3755 return self._parse_wrapped_csv(self._parse_column) 3756 return self._parse_csv(self._parse_column) 3757 3758 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3759 if not self._match(TokenType.REPLACE): 3760 return None 3761 if self._match(TokenType.L_PAREN, advance=False): 3762 return self._parse_wrapped_csv(self._parse_expression) 3763 return self._parse_csv(self._parse_expression) 3764 3765 def _parse_csv( 3766 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3767 ) -> t.List[t.Optional[exp.Expression]]: 3768 parse_result = parse_method() 3769 items = [parse_result] if parse_result is not None else [] 3770 3771 while self._match(sep): 3772 if parse_result and self._prev_comments: 3773 parse_result.comments = self._prev_comments 3774 3775 parse_result = parse_method() 3776 if parse_result is not None: 3777 items.append(parse_result) 3778 3779 return items 3780 3781 def _parse_tokens( 3782 self, parse_method: t.Callable, expressions: t.Dict 3783 ) -> t.Optional[exp.Expression]: 3784 this = parse_method() 3785 3786 while self._match_set(expressions): 3787 this = self.expression( 3788 expressions[self._prev.token_type], 3789 this=this, 3790 comments=self._prev_comments, 3791 expression=parse_method(), 3792 ) 3793 3794 return this 3795 3796 def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]: 3797 return self._parse_wrapped_csv(self._parse_id_var) 3798 3799 def _parse_wrapped_csv( 3800 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3801 ) -> t.List[t.Optional[exp.Expression]]: 3802 return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep)) 3803 3804 def _parse_wrapped(self, parse_method: t.Callable) -> t.Any: 3805 self._match_l_paren() 3806 parse_result = parse_method() 3807 self._match_r_paren() 3808 return parse_result 3809 3810 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: 3811 return self._parse_select() or self._parse_set_operations(self._parse_expression()) 3812 3813 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 3814 return self._parse_set_operations( 3815 self._parse_select(nested=True, parse_subquery_alias=False) 3816 ) 3817 3818 def _parse_transaction(self) -> exp.Expression: 3819 this = None 3820 if self._match_texts(self.TRANSACTION_KIND): 3821 this = self._prev.text 3822 3823 self._match_texts({"TRANSACTION", "WORK"}) 3824 3825 modes = [] 3826 while True: 3827 mode = [] 3828 while self._match(TokenType.VAR): 3829 mode.append(self._prev.text) 3830 3831 if mode: 3832 modes.append(" ".join(mode)) 3833 if not self._match(TokenType.COMMA): 3834 break 3835 3836 return self.expression(exp.Transaction, this=this, modes=modes) 3837 3838 def _parse_commit_or_rollback(self) -> exp.Expression: 3839 chain = None 3840 savepoint = None 3841 is_rollback = self._prev.token_type == TokenType.ROLLBACK 3842 3843 self._match_texts({"TRANSACTION", "WORK"}) 3844 3845 if self._match_text_seq("TO"): 3846 self._match_text_seq("SAVEPOINT") 3847 savepoint = self._parse_id_var() 3848 3849 if self._match(TokenType.AND): 3850 chain = not self._match_text_seq("NO") 3851 self._match_text_seq("CHAIN") 3852 3853 if is_rollback: 3854 return self.expression(exp.Rollback, savepoint=savepoint) 3855 return self.expression(exp.Commit, chain=chain) 3856 3857 def _parse_add_column(self) -> t.Optional[exp.Expression]: 3858 if not self._match_text_seq("ADD"): 3859 return None 3860 3861 self._match(TokenType.COLUMN) 3862 exists_column = self._parse_exists(not_=True) 3863 expression = self._parse_column_def(self._parse_field(any_token=True)) 3864 3865 if expression: 3866 expression.set("exists", exists_column) 3867 3868 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 3869 if self._match_texts(("FIRST", "AFTER")): 3870 position = self._prev.text 3871 column_position = self.expression( 3872 exp.ColumnPosition, this=self._parse_column(), position=position 3873 ) 3874 expression.set("position", column_position) 3875 3876 return expression 3877 3878 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 3879 return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN") 3880 3881 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 3882 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 3883 return self.expression( 3884 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 3885 ) 3886 3887 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 3888 this = None 3889 kind = self._prev.token_type 3890 3891 if kind == TokenType.CONSTRAINT: 3892 this = self._parse_id_var() 3893 3894 if self._match_text_seq("CHECK"): 3895 expression = self._parse_wrapped(self._parse_conjunction) 3896 enforced = self._match_text_seq("ENFORCED") 3897 3898 return self.expression( 3899 exp.AddConstraint, this=this, expression=expression, enforced=enforced 3900 ) 3901 3902 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 3903 expression = self._parse_foreign_key() 3904 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 3905 expression = self._parse_primary_key() 3906 else: 3907 expression = None 3908 3909 return self.expression(exp.AddConstraint, this=this, expression=expression) 3910 3911 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 3912 index = self._index - 1 3913 3914 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 3915 return self._parse_csv(self._parse_add_constraint) 3916 3917 self._retreat(index) 3918 return self._parse_csv(self._parse_add_column) 3919 3920 def _parse_alter_table_alter(self) -> exp.Expression: 3921 self._match(TokenType.COLUMN) 3922 column = self._parse_field(any_token=True) 3923 3924 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 3925 return self.expression(exp.AlterColumn, this=column, drop=True) 3926 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 3927 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 3928 3929 self._match_text_seq("SET", "DATA") 3930 return self.expression( 3931 exp.AlterColumn, 3932 this=column, 3933 dtype=self._match_text_seq("TYPE") and self._parse_types(), 3934 collate=self._match(TokenType.COLLATE) and self._parse_term(), 3935 using=self._match(TokenType.USING) and self._parse_conjunction(), 3936 ) 3937 3938 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 3939 index = self._index - 1 3940 3941 partition_exists = self._parse_exists() 3942 if self._match(TokenType.PARTITION, advance=False): 3943 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 3944 3945 self._retreat(index) 3946 return self._parse_csv(self._parse_drop_column) 3947 3948 def _parse_alter_table_rename(self) -> exp.Expression: 3949 self._match_text_seq("TO") 3950 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 3951 3952 def _parse_alter(self) -> t.Optional[exp.Expression]: 3953 start = self._prev 3954 3955 if not self._match(TokenType.TABLE): 3956 return self._parse_as_command(start) 3957 3958 exists = self._parse_exists() 3959 this = self._parse_table(schema=True) 3960 3961 if self._next: 3962 self._advance() 3963 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 3964 3965 if parser: 3966 actions = ensure_list(parser(self)) 3967 3968 if not self._curr: 3969 return self.expression( 3970 exp.AlterTable, 3971 this=this, 3972 exists=exists, 3973 actions=actions, 3974 ) 3975 return self._parse_as_command(start) 3976 3977 def _parse_merge(self) -> exp.Expression: 3978 self._match(TokenType.INTO) 3979 target = self._parse_table() 3980 3981 self._match(TokenType.USING) 3982 using = self._parse_table() 3983 3984 self._match(TokenType.ON) 3985 on = self._parse_conjunction() 3986 3987 whens = [] 3988 while self._match(TokenType.WHEN): 3989 matched = not self._match(TokenType.NOT) 3990 self._match_text_seq("MATCHED") 3991 source = ( 3992 False 3993 if self._match_text_seq("BY", "TARGET") 3994 else self._match_text_seq("BY", "SOURCE") 3995 ) 3996 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 3997 3998 self._match(TokenType.THEN) 3999 4000 if self._match(TokenType.INSERT): 4001 _this = self._parse_star() 4002 if _this: 4003 then = self.expression(exp.Insert, this=_this) 4004 else: 4005 then = self.expression( 4006 exp.Insert, 4007 this=self._parse_value(), 4008 expression=self._match(TokenType.VALUES) and self._parse_value(), 4009 ) 4010 elif self._match(TokenType.UPDATE): 4011 expressions = self._parse_star() 4012 if expressions: 4013 then = self.expression(exp.Update, expressions=expressions) 4014 else: 4015 then = self.expression( 4016 exp.Update, 4017 expressions=self._match(TokenType.SET) 4018 and self._parse_csv(self._parse_equality), 4019 ) 4020 elif self._match(TokenType.DELETE): 4021 then = self.expression(exp.Var, this=self._prev.text) 4022 else: 4023 then = None 4024 4025 whens.append( 4026 self.expression( 4027 exp.When, 4028 matched=matched, 4029 source=source, 4030 condition=condition, 4031 then=then, 4032 ) 4033 ) 4034 4035 return self.expression( 4036 exp.Merge, 4037 this=target, 4038 using=using, 4039 on=on, 4040 expressions=whens, 4041 ) 4042 4043 def _parse_show(self) -> t.Optional[exp.Expression]: 4044 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 4045 if parser: 4046 return parser(self) 4047 self._advance() 4048 return self.expression(exp.Show, this=self._prev.text.upper()) 4049 4050 def _parse_set_item_assignment( 4051 self, kind: t.Optional[str] = None 4052 ) -> t.Optional[exp.Expression]: 4053 index = self._index 4054 4055 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4056 return self._parse_set_transaction(global_=kind == "GLOBAL") 4057 4058 left = self._parse_primary() or self._parse_id_var() 4059 4060 if not self._match_texts(("=", "TO")): 4061 self._retreat(index) 4062 return None 4063 4064 right = self._parse_statement() or self._parse_id_var() 4065 this = self.expression( 4066 exp.EQ, 4067 this=left, 4068 expression=right, 4069 ) 4070 4071 return self.expression( 4072 exp.SetItem, 4073 this=this, 4074 kind=kind, 4075 ) 4076 4077 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4078 self._match_text_seq("TRANSACTION") 4079 characteristics = self._parse_csv( 4080 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4081 ) 4082 return self.expression( 4083 exp.SetItem, 4084 expressions=characteristics, 4085 kind="TRANSACTION", 4086 **{"global": global_}, # type: ignore 4087 ) 4088 4089 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4090 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 4091 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4092 4093 def _parse_set(self) -> exp.Expression: 4094 index = self._index 4095 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4096 4097 if self._curr: 4098 self._retreat(index) 4099 return self._parse_as_command(self._prev) 4100 4101 return set_ 4102 4103 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]: 4104 for option in options: 4105 if self._match_text_seq(*option.split(" ")): 4106 return exp.Var(this=option) 4107 return None 4108 4109 def _parse_as_command(self, start: Token) -> exp.Command: 4110 while self._curr: 4111 self._advance() 4112 text = self._find_sql(start, self._prev) 4113 size = len(start.text) 4114 return exp.Command(this=text[:size], expression=text[size:]) 4115 4116 def _find_parser( 4117 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4118 ) -> t.Optional[t.Callable]: 4119 if not self._curr: 4120 return None 4121 4122 index = self._index 4123 this = [] 4124 while True: 4125 # The current token might be multiple words 4126 curr = self._curr.text.upper() 4127 key = curr.split(" ") 4128 this.append(curr) 4129 self._advance() 4130 result, trie = in_trie(trie, key) 4131 if result == 0: 4132 break 4133 if result == 2: 4134 subparser = parsers[" ".join(this)] 4135 return subparser 4136 self._retreat(index) 4137 return None 4138 4139 def _match(self, token_type, advance=True): 4140 if not self._curr: 4141 return None 4142 4143 if self._curr.token_type == token_type: 4144 if advance: 4145 self._advance() 4146 return True 4147 4148 return None 4149 4150 def _match_set(self, types, advance=True): 4151 if not self._curr: 4152 return None 4153 4154 if self._curr.token_type in types: 4155 if advance: 4156 self._advance() 4157 return True 4158 4159 return None 4160 4161 def _match_pair(self, token_type_a, token_type_b, advance=True): 4162 if not self._curr or not self._next: 4163 return None 4164 4165 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4166 if advance: 4167 self._advance(2) 4168 return True 4169 4170 return None 4171 4172 def _match_l_paren(self, expression=None): 4173 if not self._match(TokenType.L_PAREN): 4174 self.raise_error("Expecting (") 4175 if expression and self._prev_comments: 4176 expression.comments = self._prev_comments 4177 4178 def _match_r_paren(self, expression=None): 4179 if not self._match(TokenType.R_PAREN): 4180 self.raise_error("Expecting )") 4181 if expression and self._prev_comments: 4182 expression.comments = self._prev_comments 4183 4184 def _match_texts(self, texts, advance=True): 4185 if self._curr and self._curr.text.upper() in texts: 4186 if advance: 4187 self._advance() 4188 return True 4189 return False 4190 4191 def _match_text_seq(self, *texts, advance=True): 4192 index = self._index 4193 for text in texts: 4194 if self._curr and self._curr.text.upper() == text: 4195 self._advance() 4196 else: 4197 self._retreat(index) 4198 return False 4199 4200 if not advance: 4201 self._retreat(index) 4202 4203 return True 4204 4205 def _replace_columns_with_dots(self, this): 4206 if isinstance(this, exp.Dot): 4207 exp.replace_children(this, self._replace_columns_with_dots) 4208 elif isinstance(this, exp.Column): 4209 exp.replace_children(this, self._replace_columns_with_dots) 4210 table = this.args.get("table") 4211 this = ( 4212 self.expression(exp.Dot, this=table, expression=this.this) 4213 if table 4214 else self.expression(exp.Var, this=this.name) 4215 ) 4216 elif isinstance(this, exp.Identifier): 4217 this = self.expression(exp.Var, this=this.name) 4218 return this 4219 4220 def _replace_lambda(self, node, lambda_variables): 4221 for column in node.find_all(exp.Column): 4222 if column.parts[0].name in lambda_variables: 4223 dot_or_id = column.to_dot() if column.table else column.this 4224 parent = column.parent 4225 4226 while isinstance(parent, exp.Dot): 4227 if not isinstance(parent.parent, exp.Dot): 4228 parent.replace(dot_or_id) 4229 break 4230 parent = parent.parent 4231 else: 4232 column.replace(dot_or_id) 4233 return node
57class Parser(metaclass=_Parser): 58 """ 59 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 60 a parsed syntax tree. 61 62 Args: 63 error_level: the desired error level. 64 Default: ErrorLevel.RAISE 65 error_message_context: determines the amount of context to capture from a 66 query string when displaying the error message (in number of characters). 67 Default: 50. 68 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 69 Default: 0 70 alias_post_tablesample: If the table alias comes after tablesample. 71 Default: False 72 max_errors: Maximum number of error messages to include in a raised ParseError. 73 This is only relevant if error_level is ErrorLevel.RAISE. 74 Default: 3 75 null_ordering: Indicates the default null ordering method to use if not explicitly set. 76 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 77 Default: "nulls_are_small" 78 """ 79 80 FUNCTIONS: t.Dict[str, t.Callable] = { 81 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 82 "DATE_TO_DATE_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 87 "IFNULL": exp.Coalesce.from_arg_list, 88 "LIKE": parse_like, 89 "TIME_TO_TIME_STR": lambda args: exp.Cast( 90 this=seq_get(args, 0), 91 to=exp.DataType(this=exp.DataType.Type.TEXT), 92 ), 93 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 94 this=exp.Cast( 95 this=seq_get(args, 0), 96 to=exp.DataType(this=exp.DataType.Type.TEXT), 97 ), 98 start=exp.Literal.number(1), 99 length=exp.Literal.number(10), 100 ), 101 "VAR_MAP": parse_var_map, 102 } 103 104 NO_PAREN_FUNCTIONS = { 105 TokenType.CURRENT_DATE: exp.CurrentDate, 106 TokenType.CURRENT_DATETIME: exp.CurrentDate, 107 TokenType.CURRENT_TIME: exp.CurrentTime, 108 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.MAP, 114 TokenType.STRUCT, 115 TokenType.NULLABLE, 116 } 117 118 TYPE_TOKENS = { 119 TokenType.BIT, 120 TokenType.BOOLEAN, 121 TokenType.TINYINT, 122 TokenType.SMALLINT, 123 TokenType.INT, 124 TokenType.BIGINT, 125 TokenType.FLOAT, 126 TokenType.DOUBLE, 127 TokenType.CHAR, 128 TokenType.NCHAR, 129 TokenType.VARCHAR, 130 TokenType.NVARCHAR, 131 TokenType.TEXT, 132 TokenType.MEDIUMTEXT, 133 TokenType.LONGTEXT, 134 TokenType.MEDIUMBLOB, 135 TokenType.LONGBLOB, 136 TokenType.BINARY, 137 TokenType.VARBINARY, 138 TokenType.JSON, 139 TokenType.JSONB, 140 TokenType.INTERVAL, 141 TokenType.TIME, 142 TokenType.TIMESTAMP, 143 TokenType.TIMESTAMPTZ, 144 TokenType.TIMESTAMPLTZ, 145 TokenType.DATETIME, 146 TokenType.DATE, 147 TokenType.DECIMAL, 148 TokenType.UUID, 149 TokenType.GEOGRAPHY, 150 TokenType.GEOMETRY, 151 TokenType.HLLSKETCH, 152 TokenType.HSTORE, 153 TokenType.PSEUDO_TYPE, 154 TokenType.SUPER, 155 TokenType.SERIAL, 156 TokenType.SMALLSERIAL, 157 TokenType.BIGSERIAL, 158 TokenType.XML, 159 TokenType.UNIQUEIDENTIFIER, 160 TokenType.MONEY, 161 TokenType.SMALLMONEY, 162 TokenType.ROWVERSION, 163 TokenType.IMAGE, 164 TokenType.VARIANT, 165 TokenType.OBJECT, 166 TokenType.INET, 167 *NESTED_TYPE_TOKENS, 168 } 169 170 SUBQUERY_PREDICATES = { 171 TokenType.ANY: exp.Any, 172 TokenType.ALL: exp.All, 173 TokenType.EXISTS: exp.Exists, 174 TokenType.SOME: exp.Any, 175 } 176 177 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 178 179 DB_CREATABLES = { 180 TokenType.DATABASE, 181 TokenType.SCHEMA, 182 TokenType.TABLE, 183 TokenType.VIEW, 184 } 185 186 CREATABLES = { 187 TokenType.COLUMN, 188 TokenType.FUNCTION, 189 TokenType.INDEX, 190 TokenType.PROCEDURE, 191 *DB_CREATABLES, 192 } 193 194 ID_VAR_TOKENS = { 195 TokenType.VAR, 196 TokenType.ANTI, 197 TokenType.APPLY, 198 TokenType.AUTO_INCREMENT, 199 TokenType.BEGIN, 200 TokenType.BOTH, 201 TokenType.BUCKET, 202 TokenType.CACHE, 203 TokenType.CASCADE, 204 TokenType.COLLATE, 205 TokenType.COMMAND, 206 TokenType.COMMENT, 207 TokenType.COMMIT, 208 TokenType.COMPOUND, 209 TokenType.CONSTRAINT, 210 TokenType.DEFAULT, 211 TokenType.DELETE, 212 TokenType.DESCRIBE, 213 TokenType.DIV, 214 TokenType.END, 215 TokenType.EXECUTE, 216 TokenType.ESCAPE, 217 TokenType.FALSE, 218 TokenType.FIRST, 219 TokenType.FILTER, 220 TokenType.FOLLOWING, 221 TokenType.FORMAT, 222 TokenType.FULL, 223 TokenType.IF, 224 TokenType.ISNULL, 225 TokenType.INTERVAL, 226 TokenType.LAZY, 227 TokenType.LEADING, 228 TokenType.LEFT, 229 TokenType.LOCAL, 230 TokenType.MATERIALIZED, 231 TokenType.MERGE, 232 TokenType.NATURAL, 233 TokenType.NEXT, 234 TokenType.OFFSET, 235 TokenType.ONLY, 236 TokenType.OPTIONS, 237 TokenType.ORDINALITY, 238 TokenType.PARTITION, 239 TokenType.PERCENT, 240 TokenType.PIVOT, 241 TokenType.PRAGMA, 242 TokenType.PRECEDING, 243 TokenType.RANGE, 244 TokenType.REFERENCES, 245 TokenType.RIGHT, 246 TokenType.ROW, 247 TokenType.ROWS, 248 TokenType.SEED, 249 TokenType.SEMI, 250 TokenType.SET, 251 TokenType.SHOW, 252 TokenType.SORTKEY, 253 TokenType.TEMPORARY, 254 TokenType.TOP, 255 TokenType.TRAILING, 256 TokenType.TRUE, 257 TokenType.UNBOUNDED, 258 TokenType.UNIQUE, 259 TokenType.UNLOGGED, 260 TokenType.UNPIVOT, 261 TokenType.VOLATILE, 262 TokenType.WINDOW, 263 *CREATABLES, 264 *SUBQUERY_PREDICATES, 265 *TYPE_TOKENS, 266 *NO_PAREN_FUNCTIONS, 267 } 268 269 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 270 TokenType.APPLY, 271 TokenType.FULL, 272 TokenType.LEFT, 273 TokenType.NATURAL, 274 TokenType.OFFSET, 275 TokenType.RIGHT, 276 TokenType.WINDOW, 277 } 278 279 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 280 281 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} 282 283 FUNC_TOKENS = { 284 TokenType.COMMAND, 285 TokenType.CURRENT_DATE, 286 TokenType.CURRENT_DATETIME, 287 TokenType.CURRENT_TIMESTAMP, 288 TokenType.CURRENT_TIME, 289 TokenType.FILTER, 290 TokenType.FIRST, 291 TokenType.FORMAT, 292 TokenType.GLOB, 293 TokenType.IDENTIFIER, 294 TokenType.INDEX, 295 TokenType.ISNULL, 296 TokenType.ILIKE, 297 TokenType.LIKE, 298 TokenType.MERGE, 299 TokenType.OFFSET, 300 TokenType.PRIMARY_KEY, 301 TokenType.REPLACE, 302 TokenType.ROW, 303 TokenType.UNNEST, 304 TokenType.VAR, 305 TokenType.LEFT, 306 TokenType.RIGHT, 307 TokenType.DATE, 308 TokenType.DATETIME, 309 TokenType.TABLE, 310 TokenType.TIMESTAMP, 311 TokenType.TIMESTAMPTZ, 312 TokenType.WINDOW, 313 *TYPE_TOKENS, 314 *SUBQUERY_PREDICATES, 315 } 316 317 CONJUNCTION = { 318 TokenType.AND: exp.And, 319 TokenType.OR: exp.Or, 320 } 321 322 EQUALITY = { 323 TokenType.EQ: exp.EQ, 324 TokenType.NEQ: exp.NEQ, 325 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 326 } 327 328 COMPARISON = { 329 TokenType.GT: exp.GT, 330 TokenType.GTE: exp.GTE, 331 TokenType.LT: exp.LT, 332 TokenType.LTE: exp.LTE, 333 } 334 335 BITWISE = { 336 TokenType.AMP: exp.BitwiseAnd, 337 TokenType.CARET: exp.BitwiseXor, 338 TokenType.PIPE: exp.BitwiseOr, 339 TokenType.DPIPE: exp.DPipe, 340 } 341 342 TERM = { 343 TokenType.DASH: exp.Sub, 344 TokenType.PLUS: exp.Add, 345 TokenType.MOD: exp.Mod, 346 TokenType.COLLATE: exp.Collate, 347 } 348 349 FACTOR = { 350 TokenType.DIV: exp.IntDiv, 351 TokenType.LR_ARROW: exp.Distance, 352 TokenType.SLASH: exp.Div, 353 TokenType.STAR: exp.Mul, 354 } 355 356 TIMESTAMPS = { 357 TokenType.TIME, 358 TokenType.TIMESTAMP, 359 TokenType.TIMESTAMPTZ, 360 TokenType.TIMESTAMPLTZ, 361 } 362 363 SET_OPERATIONS = { 364 TokenType.UNION, 365 TokenType.INTERSECT, 366 TokenType.EXCEPT, 367 } 368 369 JOIN_SIDES = { 370 TokenType.LEFT, 371 TokenType.RIGHT, 372 TokenType.FULL, 373 } 374 375 JOIN_KINDS = { 376 TokenType.INNER, 377 TokenType.OUTER, 378 TokenType.CROSS, 379 TokenType.SEMI, 380 TokenType.ANTI, 381 } 382 383 LAMBDAS = { 384 TokenType.ARROW: lambda self, expressions: self.expression( 385 exp.Lambda, 386 this=self._replace_lambda( 387 self._parse_conjunction(), 388 {node.name for node in expressions}, 389 ), 390 expressions=expressions, 391 ), 392 TokenType.FARROW: lambda self, expressions: self.expression( 393 exp.Kwarg, 394 this=exp.Var(this=expressions[0].name), 395 expression=self._parse_conjunction(), 396 ), 397 } 398 399 COLUMN_OPERATORS = { 400 TokenType.DOT: None, 401 TokenType.DCOLON: lambda self, this, to: self.expression( 402 exp.Cast, 403 this=this, 404 to=to, 405 ), 406 TokenType.ARROW: lambda self, this, path: self.expression( 407 exp.JSONExtract, 408 this=this, 409 expression=path, 410 ), 411 TokenType.DARROW: lambda self, this, path: self.expression( 412 exp.JSONExtractScalar, 413 this=this, 414 expression=path, 415 ), 416 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 417 exp.JSONBExtract, 418 this=this, 419 expression=path, 420 ), 421 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 422 exp.JSONBExtractScalar, 423 this=this, 424 expression=path, 425 ), 426 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 427 exp.JSONBContains, 428 this=this, 429 expression=key, 430 ), 431 } 432 433 EXPRESSION_PARSERS = { 434 exp.Column: lambda self: self._parse_column(), 435 exp.DataType: lambda self: self._parse_types(), 436 exp.From: lambda self: self._parse_from(), 437 exp.Group: lambda self: self._parse_group(), 438 exp.Identifier: lambda self: self._parse_id_var(), 439 exp.Lateral: lambda self: self._parse_lateral(), 440 exp.Join: lambda self: self._parse_join(), 441 exp.Order: lambda self: self._parse_order(), 442 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 443 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 444 exp.Lambda: lambda self: self._parse_lambda(), 445 exp.Limit: lambda self: self._parse_limit(), 446 exp.Offset: lambda self: self._parse_offset(), 447 exp.TableAlias: lambda self: self._parse_table_alias(), 448 exp.Table: lambda self: self._parse_table(), 449 exp.Condition: lambda self: self._parse_conjunction(), 450 exp.Expression: lambda self: self._parse_statement(), 451 exp.Properties: lambda self: self._parse_properties(), 452 exp.Where: lambda self: self._parse_where(), 453 exp.Ordered: lambda self: self._parse_ordered(), 454 exp.Having: lambda self: self._parse_having(), 455 exp.With: lambda self: self._parse_with(), 456 exp.Window: lambda self: self._parse_named_window(), 457 exp.Qualify: lambda self: self._parse_qualify(), 458 exp.Returning: lambda self: self._parse_returning(), 459 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 460 } 461 462 STATEMENT_PARSERS = { 463 TokenType.ALTER: lambda self: self._parse_alter(), 464 TokenType.BEGIN: lambda self: self._parse_transaction(), 465 TokenType.CACHE: lambda self: self._parse_cache(), 466 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 467 TokenType.COMMENT: lambda self: self._parse_comment(), 468 TokenType.CREATE: lambda self: self._parse_create(), 469 TokenType.DELETE: lambda self: self._parse_delete(), 470 TokenType.DESC: lambda self: self._parse_describe(), 471 TokenType.DESCRIBE: lambda self: self._parse_describe(), 472 TokenType.DROP: lambda self: self._parse_drop(), 473 TokenType.END: lambda self: self._parse_commit_or_rollback(), 474 TokenType.INSERT: lambda self: self._parse_insert(), 475 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), 476 TokenType.MERGE: lambda self: self._parse_merge(), 477 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 478 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 479 TokenType.SET: lambda self: self._parse_set(), 480 TokenType.UNCACHE: lambda self: self._parse_uncache(), 481 TokenType.UPDATE: lambda self: self._parse_update(), 482 TokenType.USE: lambda self: self.expression( 483 exp.Use, 484 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 485 and exp.Var(this=self._prev.text), 486 this=self._parse_table(schema=False), 487 ), 488 } 489 490 UNARY_PARSERS = { 491 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 492 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 493 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 494 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 495 } 496 497 PRIMARY_PARSERS = { 498 TokenType.STRING: lambda self, token: self.expression( 499 exp.Literal, this=token.text, is_string=True 500 ), 501 TokenType.NUMBER: lambda self, token: self.expression( 502 exp.Literal, this=token.text, is_string=False 503 ), 504 TokenType.STAR: lambda self, _: self.expression( 505 exp.Star, 506 **{"except": self._parse_except(), "replace": self._parse_replace()}, 507 ), 508 TokenType.NULL: lambda self, _: self.expression(exp.Null), 509 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 510 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 511 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 512 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 513 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 514 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 515 TokenType.NATIONAL: lambda self, token: self._parse_national(token), 516 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 517 } 518 519 PLACEHOLDER_PARSERS = { 520 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 521 TokenType.PARAMETER: lambda self: self._parse_parameter(), 522 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 523 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 524 else None, 525 } 526 527 RANGE_PARSERS = { 528 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 529 TokenType.GLOB: binary_range_parser(exp.Glob), 530 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 531 TokenType.IN: lambda self, this: self._parse_in(this), 532 TokenType.IS: lambda self, this: self._parse_is(this), 533 TokenType.LIKE: binary_range_parser(exp.Like), 534 TokenType.ILIKE: binary_range_parser(exp.ILike), 535 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 536 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 537 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 538 } 539 540 PROPERTY_PARSERS = { 541 "AFTER": lambda self: self._parse_afterjournal( 542 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 543 ), 544 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 545 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 546 "BEFORE": lambda self: self._parse_journal( 547 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 548 ), 549 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 550 "CHARACTER SET": lambda self: self._parse_character_set(), 551 "CHECKSUM": lambda self: self._parse_checksum(), 552 "CLUSTER BY": lambda self: self.expression( 553 exp.Cluster, expressions=self._parse_csv(self._parse_ordered) 554 ), 555 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 556 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 557 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( 558 default=self._prev.text.upper() == "DEFAULT" 559 ), 560 "DEFINER": lambda self: self._parse_definer(), 561 "DETERMINISTIC": lambda self: self.expression( 562 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 563 ), 564 "DISTKEY": lambda self: self._parse_distkey(), 565 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 566 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 567 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 568 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), 569 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 570 "FREESPACE": lambda self: self._parse_freespace(), 571 "GLOBAL": lambda self: self._parse_temporary(global_=True), 572 "IMMUTABLE": lambda self: self.expression( 573 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 574 ), 575 "JOURNAL": lambda self: self._parse_journal( 576 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 577 ), 578 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 579 "LIKE": lambda self: self._parse_create_like(), 580 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), 581 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 582 "LOCK": lambda self: self._parse_locking(), 583 "LOCKING": lambda self: self._parse_locking(), 584 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), 585 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 586 "MAX": lambda self: self._parse_datablocksize(), 587 "MAXIMUM": lambda self: self._parse_datablocksize(), 588 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( 589 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" 590 ), 591 "MIN": lambda self: self._parse_datablocksize(), 592 "MINIMUM": lambda self: self._parse_datablocksize(), 593 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 594 "NO": lambda self: self._parse_noprimaryindex(), 595 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), 596 "ON": lambda self: self._parse_oncommit(), 597 "PARTITION BY": lambda self: self._parse_partitioned_by(), 598 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 599 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 600 "RETURNS": lambda self: self._parse_returns(), 601 "ROW": lambda self: self._parse_row(), 602 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 603 "SORTKEY": lambda self: self._parse_sortkey(), 604 "STABLE": lambda self: self.expression( 605 exp.VolatilityProperty, this=exp.Literal.string("STABLE") 606 ), 607 "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 608 "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 609 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 610 "TEMPORARY": lambda self: self._parse_temporary(global_=False), 611 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 612 "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 613 "VOLATILE": lambda self: self.expression( 614 exp.VolatilityProperty, this=exp.Literal.string("VOLATILE") 615 ), 616 "WITH": lambda self: self._parse_with_property(), 617 } 618 619 CONSTRAINT_PARSERS = { 620 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 621 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 622 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 623 "CHARACTER SET": lambda self: self.expression( 624 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 625 ), 626 "CHECK": lambda self: self.expression( 627 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 628 ), 629 "COLLATE": lambda self: self.expression( 630 exp.CollateColumnConstraint, this=self._parse_var() 631 ), 632 "COMMENT": lambda self: self.expression( 633 exp.CommentColumnConstraint, this=self._parse_string() 634 ), 635 "COMPRESS": lambda self: self._parse_compress(), 636 "DEFAULT": lambda self: self.expression( 637 exp.DefaultColumnConstraint, this=self._parse_bitwise() 638 ), 639 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 640 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 641 "FORMAT": lambda self: self.expression( 642 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 643 ), 644 "GENERATED": lambda self: self._parse_generated_as_identity(), 645 "IDENTITY": lambda self: self._parse_auto_increment(), 646 "INLINE": lambda self: self._parse_inline(), 647 "LIKE": lambda self: self._parse_create_like(), 648 "NOT": lambda self: self._parse_not_constraint(), 649 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 650 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 651 "PRIMARY KEY": lambda self: self._parse_primary_key(), 652 "TITLE": lambda self: self.expression( 653 exp.TitleColumnConstraint, this=self._parse_var_or_string() 654 ), 655 "UNIQUE": lambda self: self._parse_unique(), 656 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 657 } 658 659 ALTER_PARSERS = { 660 "ADD": lambda self: self._parse_alter_table_add(), 661 "ALTER": lambda self: self._parse_alter_table_alter(), 662 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 663 "DROP": lambda self: self._parse_alter_table_drop(), 664 "RENAME": lambda self: self._parse_alter_table_rename(), 665 } 666 667 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 668 669 NO_PAREN_FUNCTION_PARSERS = { 670 TokenType.CASE: lambda self: self._parse_case(), 671 TokenType.IF: lambda self: self._parse_if(), 672 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 673 } 674 675 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 676 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 677 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 678 "DECODE": lambda self: self._parse_decode(), 679 "EXTRACT": lambda self: self._parse_extract(), 680 "JSON_OBJECT": lambda self: self._parse_json_object(), 681 "LOG": lambda self: self._parse_logarithm(), 682 "MATCH": lambda self: self._parse_match_against(), 683 "POSITION": lambda self: self._parse_position(), 684 "STRING_AGG": lambda self: self._parse_string_agg(), 685 "SUBSTRING": lambda self: self._parse_substring(), 686 "TRIM": lambda self: self._parse_trim(), 687 "TRY_CAST": lambda self: self._parse_cast(False), 688 "TRY_CONVERT": lambda self: self._parse_convert(False), 689 } 690 691 QUERY_MODIFIER_PARSERS = { 692 "match": lambda self: self._parse_match_recognize(), 693 "where": lambda self: self._parse_where(), 694 "group": lambda self: self._parse_group(), 695 "having": lambda self: self._parse_having(), 696 "qualify": lambda self: self._parse_qualify(), 697 "windows": lambda self: self._parse_window_clause(), 698 "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute), 699 "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 700 "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 701 "order": lambda self: self._parse_order(), 702 "limit": lambda self: self._parse_limit(), 703 "offset": lambda self: self._parse_offset(), 704 "lock": lambda self: self._parse_lock(), 705 "sample": lambda self: self._parse_table_sample(as_modifier=True), 706 } 707 708 SET_PARSERS = { 709 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 710 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 711 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 712 "TRANSACTION": lambda self: self._parse_set_transaction(), 713 } 714 715 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 716 717 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 718 719 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 720 721 TRANSACTION_CHARACTERISTICS = { 722 "ISOLATION LEVEL REPEATABLE READ", 723 "ISOLATION LEVEL READ COMMITTED", 724 "ISOLATION LEVEL READ UNCOMMITTED", 725 "ISOLATION LEVEL SERIALIZABLE", 726 "READ WRITE", 727 "READ ONLY", 728 } 729 730 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 731 732 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 733 734 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 735 736 STRICT_CAST = True 737 738 CONVERT_TYPE_FIRST = False 739 740 LOG_BASE_FIRST = True 741 LOG_DEFAULTS_TO_LN = False 742 743 __slots__ = ( 744 "error_level", 745 "error_message_context", 746 "sql", 747 "errors", 748 "index_offset", 749 "unnest_column_only", 750 "alias_post_tablesample", 751 "max_errors", 752 "null_ordering", 753 "_tokens", 754 "_index", 755 "_curr", 756 "_next", 757 "_prev", 758 "_prev_comments", 759 "_show_trie", 760 "_set_trie", 761 ) 762 763 def __init__( 764 self, 765 error_level: t.Optional[ErrorLevel] = None, 766 error_message_context: int = 100, 767 index_offset: int = 0, 768 unnest_column_only: bool = False, 769 alias_post_tablesample: bool = False, 770 max_errors: int = 3, 771 null_ordering: t.Optional[str] = None, 772 ): 773 self.error_level = error_level or ErrorLevel.IMMEDIATE 774 self.error_message_context = error_message_context 775 self.index_offset = index_offset 776 self.unnest_column_only = unnest_column_only 777 self.alias_post_tablesample = alias_post_tablesample 778 self.max_errors = max_errors 779 self.null_ordering = null_ordering 780 self.reset() 781 782 def reset(self): 783 self.sql = "" 784 self.errors = [] 785 self._tokens = [] 786 self._index = 0 787 self._curr = None 788 self._next = None 789 self._prev = None 790 self._prev_comments = None 791 792 def parse( 793 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 794 ) -> t.List[t.Optional[exp.Expression]]: 795 """ 796 Parses a list of tokens and returns a list of syntax trees, one tree 797 per parsed SQL statement. 798 799 Args: 800 raw_tokens: the list of tokens. 801 sql: the original SQL string, used to produce helpful debug messages. 802 803 Returns: 804 The list of syntax trees. 805 """ 806 return self._parse( 807 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 808 ) 809 810 def parse_into( 811 self, 812 expression_types: exp.IntoType, 813 raw_tokens: t.List[Token], 814 sql: t.Optional[str] = None, 815 ) -> t.List[t.Optional[exp.Expression]]: 816 """ 817 Parses a list of tokens into a given Expression type. If a collection of Expression 818 types is given instead, this method will try to parse the token list into each one 819 of them, stopping at the first for which the parsing succeeds. 820 821 Args: 822 expression_types: the expression type(s) to try and parse the token list into. 823 raw_tokens: the list of tokens. 824 sql: the original SQL string, used to produce helpful debug messages. 825 826 Returns: 827 The target Expression. 828 """ 829 errors = [] 830 for expression_type in ensure_collection(expression_types): 831 parser = self.EXPRESSION_PARSERS.get(expression_type) 832 if not parser: 833 raise TypeError(f"No parser registered for {expression_type}") 834 try: 835 return self._parse(parser, raw_tokens, sql) 836 except ParseError as e: 837 e.errors[0]["into_expression"] = expression_type 838 errors.append(e) 839 raise ParseError( 840 f"Failed to parse into {expression_types}", 841 errors=merge_errors(errors), 842 ) from errors[-1] 843 844 def _parse( 845 self, 846 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 847 raw_tokens: t.List[Token], 848 sql: t.Optional[str] = None, 849 ) -> t.List[t.Optional[exp.Expression]]: 850 self.reset() 851 self.sql = sql or "" 852 total = len(raw_tokens) 853 chunks: t.List[t.List[Token]] = [[]] 854 855 for i, token in enumerate(raw_tokens): 856 if token.token_type == TokenType.SEMICOLON: 857 if i < total - 1: 858 chunks.append([]) 859 else: 860 chunks[-1].append(token) 861 862 expressions = [] 863 864 for tokens in chunks: 865 self._index = -1 866 self._tokens = tokens 867 self._advance() 868 869 expressions.append(parse_method(self)) 870 871 if self._index < len(self._tokens): 872 self.raise_error("Invalid expression / Unexpected token") 873 874 self.check_errors() 875 876 return expressions 877 878 def check_errors(self) -> None: 879 """ 880 Logs or raises any found errors, depending on the chosen error level setting. 881 """ 882 if self.error_level == ErrorLevel.WARN: 883 for error in self.errors: 884 logger.error(str(error)) 885 elif self.error_level == ErrorLevel.RAISE and self.errors: 886 raise ParseError( 887 concat_messages(self.errors, self.max_errors), 888 errors=merge_errors(self.errors), 889 ) 890 891 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 892 """ 893 Appends an error in the list of recorded errors or raises it, depending on the chosen 894 error level setting. 895 """ 896 token = token or self._curr or self._prev or Token.string("") 897 start = self._find_token(token) 898 end = start + len(token.text) 899 start_context = self.sql[max(start - self.error_message_context, 0) : start] 900 highlight = self.sql[start:end] 901 end_context = self.sql[end : end + self.error_message_context] 902 903 error = ParseError.new( 904 f"{message}. Line {token.line}, Col: {token.col}.\n" 905 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 906 description=message, 907 line=token.line, 908 col=token.col, 909 start_context=start_context, 910 highlight=highlight, 911 end_context=end_context, 912 ) 913 914 if self.error_level == ErrorLevel.IMMEDIATE: 915 raise error 916 917 self.errors.append(error) 918 919 def expression( 920 self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs 921 ) -> exp.Expression: 922 """ 923 Creates a new, validated Expression. 924 925 Args: 926 exp_class: the expression class to instantiate. 927 comments: an optional list of comments to attach to the expression. 928 kwargs: the arguments to set for the expression along with their respective values. 929 930 Returns: 931 The target expression. 932 """ 933 instance = exp_class(**kwargs) 934 if self._prev_comments: 935 instance.comments = self._prev_comments 936 self._prev_comments = None 937 if comments: 938 instance.comments = comments 939 self.validate_expression(instance) 940 return instance 941 942 def validate_expression( 943 self, expression: exp.Expression, args: t.Optional[t.List] = None 944 ) -> None: 945 """ 946 Validates an already instantiated expression, making sure that all its mandatory arguments 947 are set. 948 949 Args: 950 expression: the expression to validate. 951 args: an optional list of items that was used to instantiate the expression, if it's a Func. 952 """ 953 if self.error_level == ErrorLevel.IGNORE: 954 return 955 956 for error_message in expression.error_messages(args): 957 self.raise_error(error_message) 958 959 def _find_sql(self, start: Token, end: Token) -> str: 960 return self.sql[self._find_token(start) : self._find_token(end) + len(end.text)] 961 962 def _find_token(self, token: Token) -> int: 963 line = 1 964 col = 1 965 index = 0 966 967 while line < token.line or col < token.col: 968 if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK: 969 line += 1 970 col = 1 971 else: 972 col += 1 973 index += 1 974 975 return index 976 977 def _advance(self, times: int = 1) -> None: 978 self._index += times 979 self._curr = seq_get(self._tokens, self._index) 980 self._next = seq_get(self._tokens, self._index + 1) 981 if self._index > 0: 982 self._prev = self._tokens[self._index - 1] 983 self._prev_comments = self._prev.comments 984 else: 985 self._prev = None 986 self._prev_comments = None 987 988 def _retreat(self, index: int) -> None: 989 if index != self._index: 990 self._advance(index - self._index) 991 992 def _parse_command(self) -> exp.Expression: 993 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 994 995 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 996 start = self._prev 997 exists = self._parse_exists() if allow_exists else None 998 999 self._match(TokenType.ON) 1000 1001 kind = self._match_set(self.CREATABLES) and self._prev 1002 1003 if not kind: 1004 return self._parse_as_command(start) 1005 1006 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1007 this = self._parse_user_defined_function(kind=kind.token_type) 1008 elif kind.token_type == TokenType.TABLE: 1009 this = self._parse_table() 1010 elif kind.token_type == TokenType.COLUMN: 1011 this = self._parse_column() 1012 else: 1013 this = self._parse_id_var() 1014 1015 self._match(TokenType.IS) 1016 1017 return self.expression( 1018 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1019 ) 1020 1021 def _parse_statement(self) -> t.Optional[exp.Expression]: 1022 if self._curr is None: 1023 return None 1024 1025 if self._match_set(self.STATEMENT_PARSERS): 1026 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1027 1028 if self._match_set(Tokenizer.COMMANDS): 1029 return self._parse_command() 1030 1031 expression = self._parse_expression() 1032 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1033 1034 self._parse_query_modifiers(expression) 1035 return expression 1036 1037 def _parse_drop(self, default_kind: t.Optional[str] = None) -> t.Optional[exp.Expression]: 1038 start = self._prev 1039 temporary = self._match(TokenType.TEMPORARY) 1040 materialized = self._match(TokenType.MATERIALIZED) 1041 kind = self._match_set(self.CREATABLES) and self._prev.text 1042 if not kind: 1043 if default_kind: 1044 kind = default_kind 1045 else: 1046 return self._parse_as_command(start) 1047 1048 return self.expression( 1049 exp.Drop, 1050 exists=self._parse_exists(), 1051 this=self._parse_table(schema=True), 1052 kind=kind, 1053 temporary=temporary, 1054 materialized=materialized, 1055 cascade=self._match(TokenType.CASCADE), 1056 constraints=self._match_text_seq("CONSTRAINTS"), 1057 ) 1058 1059 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1060 return ( 1061 self._match(TokenType.IF) 1062 and (not not_ or self._match(TokenType.NOT)) 1063 and self._match(TokenType.EXISTS) 1064 ) 1065 1066 def _parse_create(self) -> t.Optional[exp.Expression]: 1067 start = self._prev 1068 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 1069 TokenType.OR, TokenType.REPLACE 1070 ) 1071 unique = self._match(TokenType.UNIQUE) 1072 volatile = self._match(TokenType.VOLATILE) 1073 1074 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1075 self._match(TokenType.TABLE) 1076 1077 properties = None 1078 create_token = self._match_set(self.CREATABLES) and self._prev 1079 1080 if not create_token: 1081 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1082 create_token = self._match_set(self.CREATABLES) and self._prev 1083 1084 if not properties or not create_token: 1085 return self._parse_as_command(start) 1086 1087 exists = self._parse_exists(not_=True) 1088 this = None 1089 expression = None 1090 indexes = None 1091 no_schema_binding = None 1092 begin = None 1093 1094 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1095 this = self._parse_user_defined_function(kind=create_token.token_type) 1096 temp_properties = self._parse_properties() 1097 if properties and temp_properties: 1098 properties.expressions.extend(temp_properties.expressions) 1099 elif temp_properties: 1100 properties = temp_properties 1101 1102 self._match(TokenType.ALIAS) 1103 begin = self._match(TokenType.BEGIN) 1104 return_ = self._match_text_seq("RETURN") 1105 expression = self._parse_statement() 1106 1107 if return_: 1108 expression = self.expression(exp.Return, this=expression) 1109 elif create_token.token_type == TokenType.INDEX: 1110 this = self._parse_index() 1111 elif create_token.token_type in self.DB_CREATABLES: 1112 table_parts = self._parse_table_parts(schema=True) 1113 1114 # exp.Properties.Location.POST_NAME 1115 if self._match(TokenType.COMMA): 1116 temp_properties = self._parse_properties(before=True) 1117 if properties and temp_properties: 1118 properties.expressions.extend(temp_properties.expressions) 1119 elif temp_properties: 1120 properties = temp_properties 1121 1122 this = self._parse_schema(this=table_parts) 1123 1124 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1125 temp_properties = self._parse_properties() 1126 if properties and temp_properties: 1127 properties.expressions.extend(temp_properties.expressions) 1128 elif temp_properties: 1129 properties = temp_properties 1130 1131 self._match(TokenType.ALIAS) 1132 1133 # exp.Properties.Location.POST_ALIAS 1134 if not ( 1135 self._match(TokenType.SELECT, advance=False) 1136 or self._match(TokenType.WITH, advance=False) 1137 or self._match(TokenType.L_PAREN, advance=False) 1138 ): 1139 temp_properties = self._parse_properties() 1140 if properties and temp_properties: 1141 properties.expressions.extend(temp_properties.expressions) 1142 elif temp_properties: 1143 properties = temp_properties 1144 1145 expression = self._parse_ddl_select() 1146 1147 if create_token.token_type == TokenType.TABLE: 1148 # exp.Properties.Location.POST_EXPRESSION 1149 temp_properties = self._parse_properties() 1150 if properties and temp_properties: 1151 properties.expressions.extend(temp_properties.expressions) 1152 elif temp_properties: 1153 properties = temp_properties 1154 1155 indexes = [] 1156 while True: 1157 index = self._parse_create_table_index() 1158 1159 # exp.Properties.Location.POST_INDEX 1160 if self._match(TokenType.PARTITION_BY, advance=False): 1161 temp_properties = self._parse_properties() 1162 if properties and temp_properties: 1163 properties.expressions.extend(temp_properties.expressions) 1164 elif temp_properties: 1165 properties = temp_properties 1166 1167 if not index: 1168 break 1169 else: 1170 indexes.append(index) 1171 elif create_token.token_type == TokenType.VIEW: 1172 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1173 no_schema_binding = True 1174 1175 return self.expression( 1176 exp.Create, 1177 this=this, 1178 kind=create_token.text, 1179 replace=replace, 1180 unique=unique, 1181 volatile=volatile, 1182 expression=expression, 1183 exists=exists, 1184 properties=properties, 1185 indexes=indexes, 1186 no_schema_binding=no_schema_binding, 1187 begin=begin, 1188 ) 1189 1190 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1191 self._match(TokenType.COMMA) 1192 1193 # parsers look to _prev for no/dual/default, so need to consume first 1194 self._match_text_seq("NO") 1195 self._match_text_seq("DUAL") 1196 self._match_text_seq("DEFAULT") 1197 1198 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): 1199 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) 1200 1201 return None 1202 1203 def _parse_property(self) -> t.Optional[exp.Expression]: 1204 if self._match_texts(self.PROPERTY_PARSERS): 1205 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1206 1207 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1208 return self._parse_character_set(default=True) 1209 1210 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): 1211 return self._parse_sortkey(compound=True) 1212 1213 if self._match_text_seq("SQL", "SECURITY"): 1214 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1215 1216 assignment = self._match_pair( 1217 TokenType.VAR, TokenType.EQ, advance=False 1218 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1219 1220 if assignment: 1221 key = self._parse_var_or_string() 1222 self._match(TokenType.EQ) 1223 return self.expression(exp.Property, this=key, value=self._parse_column()) 1224 1225 return None 1226 1227 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1228 self._match(TokenType.EQ) 1229 self._match(TokenType.ALIAS) 1230 return self.expression( 1231 exp_class, 1232 this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1233 ) 1234 1235 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: 1236 properties = [] 1237 1238 while True: 1239 if before: 1240 identified_property = self._parse_property_before() 1241 else: 1242 identified_property = self._parse_property() 1243 1244 if not identified_property: 1245 break 1246 for p in ensure_list(identified_property): 1247 properties.append(p) 1248 1249 if properties: 1250 return self.expression(exp.Properties, expressions=properties) 1251 1252 return None 1253 1254 def _parse_fallback(self, no=False) -> exp.Expression: 1255 self._match_text_seq("FALLBACK") 1256 return self.expression( 1257 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1258 ) 1259 1260 def _parse_with_property( 1261 self, 1262 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1263 self._match(TokenType.WITH) 1264 if self._match(TokenType.L_PAREN, advance=False): 1265 return self._parse_wrapped_csv(self._parse_property) 1266 1267 if self._match_text_seq("JOURNAL"): 1268 return self._parse_withjournaltable() 1269 1270 if self._match_text_seq("DATA"): 1271 return self._parse_withdata(no=False) 1272 elif self._match_text_seq("NO", "DATA"): 1273 return self._parse_withdata(no=True) 1274 1275 if not self._next: 1276 return None 1277 1278 return self._parse_withisolatedloading() 1279 1280 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1281 def _parse_definer(self) -> t.Optional[exp.Expression]: 1282 self._match(TokenType.EQ) 1283 1284 user = self._parse_id_var() 1285 self._match(TokenType.PARAMETER) 1286 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1287 1288 if not user or not host: 1289 return None 1290 1291 return exp.DefinerProperty(this=f"{user}@{host}") 1292 1293 def _parse_withjournaltable(self) -> exp.Expression: 1294 self._match(TokenType.TABLE) 1295 self._match(TokenType.EQ) 1296 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1297 1298 def _parse_log(self, no=False) -> exp.Expression: 1299 self._match_text_seq("LOG") 1300 return self.expression(exp.LogProperty, no=no) 1301 1302 def _parse_journal(self, no=False, dual=False) -> exp.Expression: 1303 before = self._match_text_seq("BEFORE") 1304 self._match_text_seq("JOURNAL") 1305 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) 1306 1307 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: 1308 self._match_text_seq("NOT") 1309 self._match_text_seq("LOCAL") 1310 self._match_text_seq("AFTER", "JOURNAL") 1311 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) 1312 1313 def _parse_checksum(self) -> exp.Expression: 1314 self._match_text_seq("CHECKSUM") 1315 self._match(TokenType.EQ) 1316 1317 on = None 1318 if self._match(TokenType.ON): 1319 on = True 1320 elif self._match_text_seq("OFF"): 1321 on = False 1322 default = self._match(TokenType.DEFAULT) 1323 1324 return self.expression( 1325 exp.ChecksumProperty, 1326 on=on, 1327 default=default, 1328 ) 1329 1330 def _parse_freespace(self) -> exp.Expression: 1331 self._match_text_seq("FREESPACE") 1332 self._match(TokenType.EQ) 1333 return self.expression( 1334 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1335 ) 1336 1337 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: 1338 self._match_text_seq("MERGEBLOCKRATIO") 1339 if self._match(TokenType.EQ): 1340 return self.expression( 1341 exp.MergeBlockRatioProperty, 1342 this=self._parse_number(), 1343 percent=self._match(TokenType.PERCENT), 1344 ) 1345 else: 1346 return self.expression( 1347 exp.MergeBlockRatioProperty, 1348 no=no, 1349 default=default, 1350 ) 1351 1352 def _parse_datablocksize(self, default=None) -> exp.Expression: 1353 if default: 1354 self._match_text_seq("DATABLOCKSIZE") 1355 return self.expression(exp.DataBlocksizeProperty, default=True) 1356 elif self._match_texts(("MIN", "MINIMUM")): 1357 self._match_text_seq("DATABLOCKSIZE") 1358 return self.expression(exp.DataBlocksizeProperty, min=True) 1359 elif self._match_texts(("MAX", "MAXIMUM")): 1360 self._match_text_seq("DATABLOCKSIZE") 1361 return self.expression(exp.DataBlocksizeProperty, min=False) 1362 1363 self._match_text_seq("DATABLOCKSIZE") 1364 self._match(TokenType.EQ) 1365 size = self._parse_number() 1366 units = None 1367 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1368 units = self._prev.text 1369 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) 1370 1371 def _parse_blockcompression(self) -> exp.Expression: 1372 self._match_text_seq("BLOCKCOMPRESSION") 1373 self._match(TokenType.EQ) 1374 always = self._match_text_seq("ALWAYS") 1375 manual = self._match_text_seq("MANUAL") 1376 never = self._match_text_seq("NEVER") 1377 default = self._match_text_seq("DEFAULT") 1378 autotemp = None 1379 if self._match_text_seq("AUTOTEMP"): 1380 autotemp = self._parse_schema() 1381 1382 return self.expression( 1383 exp.BlockCompressionProperty, 1384 always=always, 1385 manual=manual, 1386 never=never, 1387 default=default, 1388 autotemp=autotemp, 1389 ) 1390 1391 def _parse_withisolatedloading(self) -> exp.Expression: 1392 no = self._match_text_seq("NO") 1393 concurrent = self._match_text_seq("CONCURRENT") 1394 self._match_text_seq("ISOLATED", "LOADING") 1395 for_all = self._match_text_seq("FOR", "ALL") 1396 for_insert = self._match_text_seq("FOR", "INSERT") 1397 for_none = self._match_text_seq("FOR", "NONE") 1398 return self.expression( 1399 exp.IsolatedLoadingProperty, 1400 no=no, 1401 concurrent=concurrent, 1402 for_all=for_all, 1403 for_insert=for_insert, 1404 for_none=for_none, 1405 ) 1406 1407 def _parse_locking(self) -> exp.Expression: 1408 if self._match(TokenType.TABLE): 1409 kind = "TABLE" 1410 elif self._match(TokenType.VIEW): 1411 kind = "VIEW" 1412 elif self._match(TokenType.ROW): 1413 kind = "ROW" 1414 elif self._match_text_seq("DATABASE"): 1415 kind = "DATABASE" 1416 else: 1417 kind = None 1418 1419 if kind in ("DATABASE", "TABLE", "VIEW"): 1420 this = self._parse_table_parts() 1421 else: 1422 this = None 1423 1424 if self._match(TokenType.FOR): 1425 for_or_in = "FOR" 1426 elif self._match(TokenType.IN): 1427 for_or_in = "IN" 1428 else: 1429 for_or_in = None 1430 1431 if self._match_text_seq("ACCESS"): 1432 lock_type = "ACCESS" 1433 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1434 lock_type = "EXCLUSIVE" 1435 elif self._match_text_seq("SHARE"): 1436 lock_type = "SHARE" 1437 elif self._match_text_seq("READ"): 1438 lock_type = "READ" 1439 elif self._match_text_seq("WRITE"): 1440 lock_type = "WRITE" 1441 elif self._match_text_seq("CHECKSUM"): 1442 lock_type = "CHECKSUM" 1443 else: 1444 lock_type = None 1445 1446 override = self._match_text_seq("OVERRIDE") 1447 1448 return self.expression( 1449 exp.LockingProperty, 1450 this=this, 1451 kind=kind, 1452 for_or_in=for_or_in, 1453 lock_type=lock_type, 1454 override=override, 1455 ) 1456 1457 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1458 if self._match(TokenType.PARTITION_BY): 1459 return self._parse_csv(self._parse_conjunction) 1460 return [] 1461 1462 def _parse_partitioned_by(self) -> exp.Expression: 1463 self._match(TokenType.EQ) 1464 return self.expression( 1465 exp.PartitionedByProperty, 1466 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1467 ) 1468 1469 def _parse_withdata(self, no=False) -> exp.Expression: 1470 if self._match_text_seq("AND", "STATISTICS"): 1471 statistics = True 1472 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1473 statistics = False 1474 else: 1475 statistics = None 1476 1477 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1478 1479 def _parse_noprimaryindex(self) -> exp.Expression: 1480 self._match_text_seq("PRIMARY", "INDEX") 1481 return exp.NoPrimaryIndexProperty() 1482 1483 def _parse_oncommit(self) -> exp.Expression: 1484 self._match_text_seq("COMMIT", "PRESERVE", "ROWS") 1485 return exp.OnCommitProperty() 1486 1487 def _parse_distkey(self) -> exp.Expression: 1488 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1489 1490 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1491 table = self._parse_table(schema=True) 1492 options = [] 1493 while self._match_texts(("INCLUDING", "EXCLUDING")): 1494 this = self._prev.text.upper() 1495 id_var = self._parse_id_var() 1496 1497 if not id_var: 1498 return None 1499 1500 options.append( 1501 self.expression( 1502 exp.Property, 1503 this=this, 1504 value=exp.Var(this=id_var.this.upper()), 1505 ) 1506 ) 1507 return self.expression(exp.LikeProperty, this=table, expressions=options) 1508 1509 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1510 return self.expression( 1511 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1512 ) 1513 1514 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1515 self._match(TokenType.EQ) 1516 return self.expression( 1517 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1518 ) 1519 1520 def _parse_returns(self) -> exp.Expression: 1521 value: t.Optional[exp.Expression] 1522 is_table = self._match(TokenType.TABLE) 1523 1524 if is_table: 1525 if self._match(TokenType.LT): 1526 value = self.expression( 1527 exp.Schema, 1528 this="TABLE", 1529 expressions=self._parse_csv(self._parse_struct_kwargs), 1530 ) 1531 if not self._match(TokenType.GT): 1532 self.raise_error("Expecting >") 1533 else: 1534 value = self._parse_schema(exp.Var(this="TABLE")) 1535 else: 1536 value = self._parse_types() 1537 1538 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1539 1540 def _parse_temporary(self, global_=False) -> exp.Expression: 1541 self._match(TokenType.TEMPORARY) # in case calling from "GLOBAL" 1542 return self.expression(exp.TemporaryProperty, global_=global_) 1543 1544 def _parse_describe(self) -> exp.Expression: 1545 kind = self._match_set(self.CREATABLES) and self._prev.text 1546 this = self._parse_table() 1547 1548 return self.expression(exp.Describe, this=this, kind=kind) 1549 1550 def _parse_insert(self) -> exp.Expression: 1551 overwrite = self._match(TokenType.OVERWRITE) 1552 local = self._match(TokenType.LOCAL) 1553 alternative = None 1554 1555 if self._match_text_seq("DIRECTORY"): 1556 this: t.Optional[exp.Expression] = self.expression( 1557 exp.Directory, 1558 this=self._parse_var_or_string(), 1559 local=local, 1560 row_format=self._parse_row_format(match_row=True), 1561 ) 1562 else: 1563 if self._match(TokenType.OR): 1564 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1565 1566 self._match(TokenType.INTO) 1567 self._match(TokenType.TABLE) 1568 this = self._parse_table(schema=True) 1569 1570 return self.expression( 1571 exp.Insert, 1572 this=this, 1573 exists=self._parse_exists(), 1574 partition=self._parse_partition(), 1575 expression=self._parse_ddl_select(), 1576 returning=self._parse_returning(), 1577 overwrite=overwrite, 1578 alternative=alternative, 1579 ) 1580 1581 def _parse_returning(self) -> t.Optional[exp.Expression]: 1582 if not self._match(TokenType.RETURNING): 1583 return None 1584 1585 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1586 1587 def _parse_row(self) -> t.Optional[exp.Expression]: 1588 if not self._match(TokenType.FORMAT): 1589 return None 1590 return self._parse_row_format() 1591 1592 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1593 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1594 return None 1595 1596 if self._match_text_seq("SERDE"): 1597 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1598 1599 self._match_text_seq("DELIMITED") 1600 1601 kwargs = {} 1602 1603 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1604 kwargs["fields"] = self._parse_string() 1605 if self._match_text_seq("ESCAPED", "BY"): 1606 kwargs["escaped"] = self._parse_string() 1607 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1608 kwargs["collection_items"] = self._parse_string() 1609 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1610 kwargs["map_keys"] = self._parse_string() 1611 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1612 kwargs["lines"] = self._parse_string() 1613 if self._match_text_seq("NULL", "DEFINED", "AS"): 1614 kwargs["null"] = self._parse_string() 1615 1616 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1617 1618 def _parse_load_data(self) -> exp.Expression: 1619 local = self._match(TokenType.LOCAL) 1620 self._match_text_seq("INPATH") 1621 inpath = self._parse_string() 1622 overwrite = self._match(TokenType.OVERWRITE) 1623 self._match_pair(TokenType.INTO, TokenType.TABLE) 1624 1625 return self.expression( 1626 exp.LoadData, 1627 this=self._parse_table(schema=True), 1628 local=local, 1629 overwrite=overwrite, 1630 inpath=inpath, 1631 partition=self._parse_partition(), 1632 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1633 serde=self._match_text_seq("SERDE") and self._parse_string(), 1634 ) 1635 1636 def _parse_delete(self) -> exp.Expression: 1637 self._match(TokenType.FROM) 1638 1639 return self.expression( 1640 exp.Delete, 1641 this=self._parse_table(schema=True), 1642 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1643 where=self._parse_where(), 1644 returning=self._parse_returning(), 1645 ) 1646 1647 def _parse_update(self) -> exp.Expression: 1648 return self.expression( 1649 exp.Update, 1650 **{ # type: ignore 1651 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1652 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1653 "from": self._parse_from(), 1654 "where": self._parse_where(), 1655 "returning": self._parse_returning(), 1656 }, 1657 ) 1658 1659 def _parse_uncache(self) -> exp.Expression: 1660 if not self._match(TokenType.TABLE): 1661 self.raise_error("Expecting TABLE after UNCACHE") 1662 1663 return self.expression( 1664 exp.Uncache, 1665 exists=self._parse_exists(), 1666 this=self._parse_table(schema=True), 1667 ) 1668 1669 def _parse_cache(self) -> exp.Expression: 1670 lazy = self._match(TokenType.LAZY) 1671 self._match(TokenType.TABLE) 1672 table = self._parse_table(schema=True) 1673 options = [] 1674 1675 if self._match(TokenType.OPTIONS): 1676 self._match_l_paren() 1677 k = self._parse_string() 1678 self._match(TokenType.EQ) 1679 v = self._parse_string() 1680 options = [k, v] 1681 self._match_r_paren() 1682 1683 self._match(TokenType.ALIAS) 1684 return self.expression( 1685 exp.Cache, 1686 this=table, 1687 lazy=lazy, 1688 options=options, 1689 expression=self._parse_select(nested=True), 1690 ) 1691 1692 def _parse_partition(self) -> t.Optional[exp.Expression]: 1693 if not self._match(TokenType.PARTITION): 1694 return None 1695 1696 return self.expression( 1697 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1698 ) 1699 1700 def _parse_value(self) -> exp.Expression: 1701 if self._match(TokenType.L_PAREN): 1702 expressions = self._parse_csv(self._parse_conjunction) 1703 self._match_r_paren() 1704 return self.expression(exp.Tuple, expressions=expressions) 1705 1706 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1707 # Source: https://prestodb.io/docs/current/sql/values.html 1708 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1709 1710 def _parse_select( 1711 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1712 ) -> t.Optional[exp.Expression]: 1713 cte = self._parse_with() 1714 if cte: 1715 this = self._parse_statement() 1716 1717 if not this: 1718 self.raise_error("Failed to parse any statement following CTE") 1719 return cte 1720 1721 if "with" in this.arg_types: 1722 this.set("with", cte) 1723 else: 1724 self.raise_error(f"{this.key} does not support CTE") 1725 this = cte 1726 elif self._match(TokenType.SELECT): 1727 comments = self._prev_comments 1728 1729 kind = ( 1730 self._match(TokenType.ALIAS) 1731 and self._match_texts(("STRUCT", "VALUE")) 1732 and self._prev.text 1733 ) 1734 hint = self._parse_hint() 1735 all_ = self._match(TokenType.ALL) 1736 distinct = self._match(TokenType.DISTINCT) 1737 1738 if distinct: 1739 distinct = self.expression( 1740 exp.Distinct, 1741 on=self._parse_value() if self._match(TokenType.ON) else None, 1742 ) 1743 1744 if all_ and distinct: 1745 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1746 1747 limit = self._parse_limit(top=True) 1748 expressions = self._parse_csv(self._parse_expression) 1749 1750 this = self.expression( 1751 exp.Select, 1752 kind=kind, 1753 hint=hint, 1754 distinct=distinct, 1755 expressions=expressions, 1756 limit=limit, 1757 ) 1758 this.comments = comments 1759 1760 into = self._parse_into() 1761 if into: 1762 this.set("into", into) 1763 1764 from_ = self._parse_from() 1765 if from_: 1766 this.set("from", from_) 1767 1768 self._parse_query_modifiers(this) 1769 elif (table or nested) and self._match(TokenType.L_PAREN): 1770 this = self._parse_table() if table else self._parse_select(nested=True) 1771 self._parse_query_modifiers(this) 1772 this = self._parse_set_operations(this) 1773 self._match_r_paren() 1774 1775 # early return so that subquery unions aren't parsed again 1776 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1777 # Union ALL should be a property of the top select node, not the subquery 1778 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1779 elif self._match(TokenType.VALUES): 1780 this = self.expression( 1781 exp.Values, 1782 expressions=self._parse_csv(self._parse_value), 1783 alias=self._parse_table_alias(), 1784 ) 1785 else: 1786 this = None 1787 1788 return self._parse_set_operations(this) 1789 1790 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1791 if not skip_with_token and not self._match(TokenType.WITH): 1792 return None 1793 1794 recursive = self._match(TokenType.RECURSIVE) 1795 1796 expressions = [] 1797 while True: 1798 expressions.append(self._parse_cte()) 1799 1800 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1801 break 1802 else: 1803 self._match(TokenType.WITH) 1804 1805 return self.expression(exp.With, expressions=expressions, recursive=recursive) 1806 1807 def _parse_cte(self) -> exp.Expression: 1808 alias = self._parse_table_alias() 1809 if not alias or not alias.this: 1810 self.raise_error("Expected CTE to have alias") 1811 1812 self._match(TokenType.ALIAS) 1813 1814 return self.expression( 1815 exp.CTE, 1816 this=self._parse_wrapped(self._parse_statement), 1817 alias=alias, 1818 ) 1819 1820 def _parse_table_alias( 1821 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1822 ) -> t.Optional[exp.Expression]: 1823 any_token = self._match(TokenType.ALIAS) 1824 alias = ( 1825 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1826 or self._parse_string_as_identifier() 1827 ) 1828 1829 index = self._index 1830 if self._match(TokenType.L_PAREN): 1831 columns = self._parse_csv(self._parse_function_parameter) 1832 self._match_r_paren() if columns else self._retreat(index) 1833 else: 1834 columns = None 1835 1836 if not alias and not columns: 1837 return None 1838 1839 return self.expression(exp.TableAlias, this=alias, columns=columns) 1840 1841 def _parse_subquery( 1842 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1843 ) -> exp.Expression: 1844 return self.expression( 1845 exp.Subquery, 1846 this=this, 1847 pivots=self._parse_pivots(), 1848 alias=self._parse_table_alias() if parse_alias else None, 1849 ) 1850 1851 def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None: 1852 if not isinstance(this, self.MODIFIABLES): 1853 return 1854 1855 table = isinstance(this, exp.Table) 1856 1857 while True: 1858 lateral = self._parse_lateral() 1859 join = self._parse_join() 1860 comma = None if table else self._match(TokenType.COMMA) 1861 if lateral: 1862 this.append("laterals", lateral) 1863 if join: 1864 this.append("joins", join) 1865 if comma: 1866 this.args["from"].append("expressions", self._parse_table()) 1867 if not (lateral or join or comma): 1868 break 1869 1870 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1871 expression = parser(self) 1872 1873 if expression: 1874 this.set(key, expression) 1875 1876 def _parse_hint(self) -> t.Optional[exp.Expression]: 1877 if self._match(TokenType.HINT): 1878 hints = self._parse_csv(self._parse_function) 1879 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 1880 self.raise_error("Expected */ after HINT") 1881 return self.expression(exp.Hint, expressions=hints) 1882 1883 return None 1884 1885 def _parse_into(self) -> t.Optional[exp.Expression]: 1886 if not self._match(TokenType.INTO): 1887 return None 1888 1889 temp = self._match(TokenType.TEMPORARY) 1890 unlogged = self._match(TokenType.UNLOGGED) 1891 self._match(TokenType.TABLE) 1892 1893 return self.expression( 1894 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 1895 ) 1896 1897 def _parse_from(self) -> t.Optional[exp.Expression]: 1898 if not self._match(TokenType.FROM): 1899 return None 1900 1901 return self.expression( 1902 exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table) 1903 ) 1904 1905 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 1906 if not self._match(TokenType.MATCH_RECOGNIZE): 1907 return None 1908 self._match_l_paren() 1909 1910 partition = self._parse_partition_by() 1911 order = self._parse_order() 1912 measures = ( 1913 self._parse_alias(self._parse_conjunction()) 1914 if self._match_text_seq("MEASURES") 1915 else None 1916 ) 1917 1918 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 1919 rows = exp.Var(this="ONE ROW PER MATCH") 1920 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 1921 text = "ALL ROWS PER MATCH" 1922 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 1923 text += f" SHOW EMPTY MATCHES" 1924 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 1925 text += f" OMIT EMPTY MATCHES" 1926 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 1927 text += f" WITH UNMATCHED ROWS" 1928 rows = exp.Var(this=text) 1929 else: 1930 rows = None 1931 1932 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 1933 text = "AFTER MATCH SKIP" 1934 if self._match_text_seq("PAST", "LAST", "ROW"): 1935 text += f" PAST LAST ROW" 1936 elif self._match_text_seq("TO", "NEXT", "ROW"): 1937 text += f" TO NEXT ROW" 1938 elif self._match_text_seq("TO", "FIRST"): 1939 text += f" TO FIRST {self._advance_any().text}" # type: ignore 1940 elif self._match_text_seq("TO", "LAST"): 1941 text += f" TO LAST {self._advance_any().text}" # type: ignore 1942 after = exp.Var(this=text) 1943 else: 1944 after = None 1945 1946 if self._match_text_seq("PATTERN"): 1947 self._match_l_paren() 1948 1949 if not self._curr: 1950 self.raise_error("Expecting )", self._curr) 1951 1952 paren = 1 1953 start = self._curr 1954 1955 while self._curr and paren > 0: 1956 if self._curr.token_type == TokenType.L_PAREN: 1957 paren += 1 1958 if self._curr.token_type == TokenType.R_PAREN: 1959 paren -= 1 1960 end = self._prev 1961 self._advance() 1962 if paren > 0: 1963 self.raise_error("Expecting )", self._curr) 1964 pattern = exp.Var(this=self._find_sql(start, end)) 1965 else: 1966 pattern = None 1967 1968 define = ( 1969 self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None 1970 ) 1971 self._match_r_paren() 1972 1973 return self.expression( 1974 exp.MatchRecognize, 1975 partition_by=partition, 1976 order=order, 1977 measures=measures, 1978 rows=rows, 1979 after=after, 1980 pattern=pattern, 1981 define=define, 1982 ) 1983 1984 def _parse_lateral(self) -> t.Optional[exp.Expression]: 1985 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 1986 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 1987 1988 if outer_apply or cross_apply: 1989 this = self._parse_select(table=True) 1990 view = None 1991 outer = not cross_apply 1992 elif self._match(TokenType.LATERAL): 1993 this = self._parse_select(table=True) 1994 view = self._match(TokenType.VIEW) 1995 outer = self._match(TokenType.OUTER) 1996 else: 1997 return None 1998 1999 if not this: 2000 this = self._parse_function() or self._parse_id_var(any_token=False) 2001 while self._match(TokenType.DOT): 2002 this = exp.Dot( 2003 this=this, 2004 expression=self._parse_function() or self._parse_id_var(any_token=False), 2005 ) 2006 2007 table_alias: t.Optional[exp.Expression] 2008 2009 if view: 2010 table = self._parse_id_var(any_token=False) 2011 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2012 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 2013 else: 2014 table_alias = self._parse_table_alias() 2015 2016 expression = self.expression( 2017 exp.Lateral, 2018 this=this, 2019 view=view, 2020 outer=outer, 2021 alias=table_alias, 2022 ) 2023 2024 if outer_apply or cross_apply: 2025 return self.expression(exp.Join, this=expression, side=None if cross_apply else "LEFT") 2026 2027 return expression 2028 2029 def _parse_join_side_and_kind( 2030 self, 2031 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2032 return ( 2033 self._match(TokenType.NATURAL) and self._prev, 2034 self._match_set(self.JOIN_SIDES) and self._prev, 2035 self._match_set(self.JOIN_KINDS) and self._prev, 2036 ) 2037 2038 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 2039 natural, side, kind = self._parse_join_side_and_kind() 2040 2041 if not skip_join_token and not self._match(TokenType.JOIN): 2042 return None 2043 2044 kwargs: t.Dict[ 2045 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 2046 ] = {"this": self._parse_table()} 2047 2048 if natural: 2049 kwargs["natural"] = True 2050 if side: 2051 kwargs["side"] = side.text 2052 if kind: 2053 kwargs["kind"] = kind.text 2054 2055 if self._match(TokenType.ON): 2056 kwargs["on"] = self._parse_conjunction() 2057 elif self._match(TokenType.USING): 2058 kwargs["using"] = self._parse_wrapped_id_vars() 2059 2060 return self.expression(exp.Join, **kwargs) # type: ignore 2061 2062 def _parse_index(self) -> exp.Expression: 2063 index = self._parse_id_var() 2064 self._match(TokenType.ON) 2065 self._match(TokenType.TABLE) # hive 2066 2067 return self.expression( 2068 exp.Index, 2069 this=index, 2070 table=self.expression(exp.Table, this=self._parse_id_var()), 2071 columns=self._parse_expression(), 2072 ) 2073 2074 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: 2075 unique = self._match(TokenType.UNIQUE) 2076 primary = self._match_text_seq("PRIMARY") 2077 amp = self._match_text_seq("AMP") 2078 if not self._match(TokenType.INDEX): 2079 return None 2080 index = self._parse_id_var() 2081 columns = None 2082 if self._match(TokenType.L_PAREN, advance=False): 2083 columns = self._parse_wrapped_csv(self._parse_column) 2084 return self.expression( 2085 exp.Index, 2086 this=index, 2087 columns=columns, 2088 unique=unique, 2089 primary=primary, 2090 amp=amp, 2091 ) 2092 2093 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 2094 catalog = None 2095 db = None 2096 2097 table = ( 2098 (not schema and self._parse_function()) 2099 or self._parse_id_var(any_token=False) 2100 or self._parse_string_as_identifier() 2101 ) 2102 2103 while self._match(TokenType.DOT): 2104 if catalog: 2105 # This allows nesting the table in arbitrarily many dot expressions if needed 2106 table = self.expression(exp.Dot, this=table, expression=self._parse_id_var()) 2107 else: 2108 catalog = db 2109 db = table 2110 table = self._parse_id_var() 2111 2112 if not table: 2113 self.raise_error(f"Expected table name but got {self._curr}") 2114 2115 return self.expression( 2116 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2117 ) 2118 2119 def _parse_table( 2120 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2121 ) -> t.Optional[exp.Expression]: 2122 lateral = self._parse_lateral() 2123 2124 if lateral: 2125 return lateral 2126 2127 unnest = self._parse_unnest() 2128 2129 if unnest: 2130 return unnest 2131 2132 values = self._parse_derived_table_values() 2133 2134 if values: 2135 return values 2136 2137 subquery = self._parse_select(table=True) 2138 2139 if subquery: 2140 if not subquery.args.get("pivots"): 2141 subquery.set("pivots", self._parse_pivots()) 2142 return subquery 2143 2144 this = self._parse_table_parts(schema=schema) 2145 2146 if schema: 2147 return self._parse_schema(this=this) 2148 2149 if self.alias_post_tablesample: 2150 table_sample = self._parse_table_sample() 2151 2152 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2153 2154 if alias: 2155 this.set("alias", alias) 2156 2157 if not this.args.get("pivots"): 2158 this.set("pivots", self._parse_pivots()) 2159 2160 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2161 this.set( 2162 "hints", 2163 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2164 ) 2165 self._match_r_paren() 2166 2167 if not self.alias_post_tablesample: 2168 table_sample = self._parse_table_sample() 2169 2170 if table_sample: 2171 table_sample.set("this", this) 2172 this = table_sample 2173 2174 return this 2175 2176 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2177 if not self._match(TokenType.UNNEST): 2178 return None 2179 2180 expressions = self._parse_wrapped_csv(self._parse_column) 2181 ordinality = bool(self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY)) 2182 alias = self._parse_table_alias() 2183 2184 if alias and self.unnest_column_only: 2185 if alias.args.get("columns"): 2186 self.raise_error("Unexpected extra column alias in unnest.") 2187 alias.set("columns", [alias.this]) 2188 alias.set("this", None) 2189 2190 offset = None 2191 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2192 self._match(TokenType.ALIAS) 2193 offset = self._parse_conjunction() 2194 2195 return self.expression( 2196 exp.Unnest, 2197 expressions=expressions, 2198 ordinality=ordinality, 2199 alias=alias, 2200 offset=offset, 2201 ) 2202 2203 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2204 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2205 if not is_derived and not self._match(TokenType.VALUES): 2206 return None 2207 2208 expressions = self._parse_csv(self._parse_value) 2209 2210 if is_derived: 2211 self._match_r_paren() 2212 2213 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2214 2215 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]: 2216 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2217 as_modifier and self._match_text_seq("USING", "SAMPLE") 2218 ): 2219 return None 2220 2221 bucket_numerator = None 2222 bucket_denominator = None 2223 bucket_field = None 2224 percent = None 2225 rows = None 2226 size = None 2227 seed = None 2228 2229 kind = "TABLESAMPLE" if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2230 method = self._parse_var(tokens=(TokenType.ROW,)) 2231 2232 self._match(TokenType.L_PAREN) 2233 2234 num = self._parse_number() 2235 2236 if self._match(TokenType.BUCKET): 2237 bucket_numerator = self._parse_number() 2238 self._match(TokenType.OUT_OF) 2239 bucket_denominator = bucket_denominator = self._parse_number() 2240 self._match(TokenType.ON) 2241 bucket_field = self._parse_field() 2242 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2243 percent = num 2244 elif self._match(TokenType.ROWS): 2245 rows = num 2246 else: 2247 size = num 2248 2249 self._match(TokenType.R_PAREN) 2250 2251 if self._match(TokenType.L_PAREN): 2252 method = self._parse_var() 2253 seed = self._match(TokenType.COMMA) and self._parse_number() 2254 self._match_r_paren() 2255 elif self._match_texts(("SEED", "REPEATABLE")): 2256 seed = self._parse_wrapped(self._parse_number) 2257 2258 return self.expression( 2259 exp.TableSample, 2260 method=method, 2261 bucket_numerator=bucket_numerator, 2262 bucket_denominator=bucket_denominator, 2263 bucket_field=bucket_field, 2264 percent=percent, 2265 rows=rows, 2266 size=size, 2267 seed=seed, 2268 kind=kind, 2269 ) 2270 2271 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2272 return list(iter(self._parse_pivot, None)) 2273 2274 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2275 index = self._index 2276 2277 if self._match(TokenType.PIVOT): 2278 unpivot = False 2279 elif self._match(TokenType.UNPIVOT): 2280 unpivot = True 2281 else: 2282 return None 2283 2284 expressions = [] 2285 field = None 2286 2287 if not self._match(TokenType.L_PAREN): 2288 self._retreat(index) 2289 return None 2290 2291 if unpivot: 2292 expressions = self._parse_csv(self._parse_column) 2293 else: 2294 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2295 2296 if not self._match(TokenType.FOR): 2297 self.raise_error("Expecting FOR") 2298 2299 value = self._parse_column() 2300 2301 if not self._match(TokenType.IN): 2302 self.raise_error("Expecting IN") 2303 2304 field = self._parse_in(value) 2305 2306 self._match_r_paren() 2307 2308 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2309 2310 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2311 pivot.set("alias", self._parse_table_alias()) 2312 2313 return pivot 2314 2315 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2316 if not skip_where_token and not self._match(TokenType.WHERE): 2317 return None 2318 2319 return self.expression( 2320 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2321 ) 2322 2323 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2324 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2325 return None 2326 2327 elements = defaultdict(list) 2328 2329 while True: 2330 expressions = self._parse_csv(self._parse_conjunction) 2331 if expressions: 2332 elements["expressions"].extend(expressions) 2333 2334 grouping_sets = self._parse_grouping_sets() 2335 if grouping_sets: 2336 elements["grouping_sets"].extend(grouping_sets) 2337 2338 rollup = None 2339 cube = None 2340 2341 with_ = self._match(TokenType.WITH) 2342 if self._match(TokenType.ROLLUP): 2343 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2344 elements["rollup"].extend(ensure_list(rollup)) 2345 2346 if self._match(TokenType.CUBE): 2347 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2348 elements["cube"].extend(ensure_list(cube)) 2349 2350 if not (expressions or grouping_sets or rollup or cube): 2351 break 2352 2353 return self.expression(exp.Group, **elements) # type: ignore 2354 2355 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2356 if not self._match(TokenType.GROUPING_SETS): 2357 return None 2358 2359 return self._parse_wrapped_csv(self._parse_grouping_set) 2360 2361 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2362 if self._match(TokenType.L_PAREN): 2363 grouping_set = self._parse_csv(self._parse_column) 2364 self._match_r_paren() 2365 return self.expression(exp.Tuple, expressions=grouping_set) 2366 2367 return self._parse_column() 2368 2369 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2370 if not skip_having_token and not self._match(TokenType.HAVING): 2371 return None 2372 return self.expression(exp.Having, this=self._parse_conjunction()) 2373 2374 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2375 if not self._match(TokenType.QUALIFY): 2376 return None 2377 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2378 2379 def _parse_order( 2380 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2381 ) -> t.Optional[exp.Expression]: 2382 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2383 return this 2384 2385 return self.expression( 2386 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2387 ) 2388 2389 def _parse_sort( 2390 self, token_type: TokenType, exp_class: t.Type[exp.Expression] 2391 ) -> t.Optional[exp.Expression]: 2392 if not self._match(token_type): 2393 return None 2394 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2395 2396 def _parse_ordered(self) -> exp.Expression: 2397 this = self._parse_conjunction() 2398 self._match(TokenType.ASC) 2399 is_desc = self._match(TokenType.DESC) 2400 is_nulls_first = self._match(TokenType.NULLS_FIRST) 2401 is_nulls_last = self._match(TokenType.NULLS_LAST) 2402 desc = is_desc or False 2403 asc = not desc 2404 nulls_first = is_nulls_first or False 2405 explicitly_null_ordered = is_nulls_first or is_nulls_last 2406 if ( 2407 not explicitly_null_ordered 2408 and ( 2409 (asc and self.null_ordering == "nulls_are_small") 2410 or (desc and self.null_ordering != "nulls_are_small") 2411 ) 2412 and self.null_ordering != "nulls_are_last" 2413 ): 2414 nulls_first = True 2415 2416 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2417 2418 def _parse_limit( 2419 self, this: t.Optional[exp.Expression] = None, top: bool = False 2420 ) -> t.Optional[exp.Expression]: 2421 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2422 limit_paren = self._match(TokenType.L_PAREN) 2423 limit_exp = self.expression( 2424 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2425 ) 2426 2427 if limit_paren: 2428 self._match_r_paren() 2429 2430 return limit_exp 2431 2432 if self._match(TokenType.FETCH): 2433 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2434 direction = self._prev.text if direction else "FIRST" 2435 count = self._parse_number() 2436 self._match_set((TokenType.ROW, TokenType.ROWS)) 2437 self._match(TokenType.ONLY) 2438 return self.expression(exp.Fetch, direction=direction, count=count) 2439 2440 return this 2441 2442 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2443 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2444 return this 2445 2446 count = self._parse_number() 2447 self._match_set((TokenType.ROW, TokenType.ROWS)) 2448 return self.expression(exp.Offset, this=this, expression=count) 2449 2450 def _parse_lock(self) -> t.Optional[exp.Expression]: 2451 if self._match_text_seq("FOR", "UPDATE"): 2452 return self.expression(exp.Lock, update=True) 2453 if self._match_text_seq("FOR", "SHARE"): 2454 return self.expression(exp.Lock, update=False) 2455 2456 return None 2457 2458 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2459 if not self._match_set(self.SET_OPERATIONS): 2460 return this 2461 2462 token_type = self._prev.token_type 2463 2464 if token_type == TokenType.UNION: 2465 expression = exp.Union 2466 elif token_type == TokenType.EXCEPT: 2467 expression = exp.Except 2468 else: 2469 expression = exp.Intersect 2470 2471 return self.expression( 2472 expression, 2473 this=this, 2474 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2475 expression=self._parse_set_operations(self._parse_select(nested=True)), 2476 ) 2477 2478 def _parse_expression(self) -> t.Optional[exp.Expression]: 2479 return self._parse_alias(self._parse_conjunction()) 2480 2481 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2482 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2483 2484 def _parse_equality(self) -> t.Optional[exp.Expression]: 2485 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2486 2487 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2488 return self._parse_tokens(self._parse_range, self.COMPARISON) 2489 2490 def _parse_range(self) -> t.Optional[exp.Expression]: 2491 this = self._parse_bitwise() 2492 negate = self._match(TokenType.NOT) 2493 2494 if self._match_set(self.RANGE_PARSERS): 2495 this = self.RANGE_PARSERS[self._prev.token_type](self, this) 2496 elif self._match(TokenType.ISNULL): 2497 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2498 2499 # Postgres supports ISNULL and NOTNULL for conditions. 2500 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2501 if self._match(TokenType.NOTNULL): 2502 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2503 this = self.expression(exp.Not, this=this) 2504 2505 if negate: 2506 this = self.expression(exp.Not, this=this) 2507 2508 if self._match(TokenType.IS): 2509 this = self._parse_is(this) 2510 2511 return this 2512 2513 def _parse_is(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2514 negate = self._match(TokenType.NOT) 2515 if self._match(TokenType.DISTINCT_FROM): 2516 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2517 return self.expression(klass, this=this, expression=self._parse_expression()) 2518 2519 this = self.expression( 2520 exp.Is, 2521 this=this, 2522 expression=self._parse_null() or self._parse_boolean(), 2523 ) 2524 return self.expression(exp.Not, this=this) if negate else this 2525 2526 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2527 unnest = self._parse_unnest() 2528 if unnest: 2529 this = self.expression(exp.In, this=this, unnest=unnest) 2530 elif self._match(TokenType.L_PAREN): 2531 expressions = self._parse_csv(self._parse_select_or_expression) 2532 2533 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2534 this = self.expression(exp.In, this=this, query=expressions[0]) 2535 else: 2536 this = self.expression(exp.In, this=this, expressions=expressions) 2537 2538 self._match_r_paren() 2539 else: 2540 this = self.expression(exp.In, this=this, field=self._parse_field()) 2541 2542 return this 2543 2544 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2545 low = self._parse_bitwise() 2546 self._match(TokenType.AND) 2547 high = self._parse_bitwise() 2548 return self.expression(exp.Between, this=this, low=low, high=high) 2549 2550 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2551 if not self._match(TokenType.ESCAPE): 2552 return this 2553 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2554 2555 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2556 this = self._parse_term() 2557 2558 while True: 2559 if self._match_set(self.BITWISE): 2560 this = self.expression( 2561 self.BITWISE[self._prev.token_type], 2562 this=this, 2563 expression=self._parse_term(), 2564 ) 2565 elif self._match_pair(TokenType.LT, TokenType.LT): 2566 this = self.expression( 2567 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2568 ) 2569 elif self._match_pair(TokenType.GT, TokenType.GT): 2570 this = self.expression( 2571 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2572 ) 2573 else: 2574 break 2575 2576 return this 2577 2578 def _parse_term(self) -> t.Optional[exp.Expression]: 2579 return self._parse_tokens(self._parse_factor, self.TERM) 2580 2581 def _parse_factor(self) -> t.Optional[exp.Expression]: 2582 return self._parse_tokens(self._parse_unary, self.FACTOR) 2583 2584 def _parse_unary(self) -> t.Optional[exp.Expression]: 2585 if self._match_set(self.UNARY_PARSERS): 2586 return self.UNARY_PARSERS[self._prev.token_type](self) 2587 return self._parse_at_time_zone(self._parse_type()) 2588 2589 def _parse_type(self) -> t.Optional[exp.Expression]: 2590 if self._match(TokenType.INTERVAL): 2591 return self.expression(exp.Interval, this=self._parse_term(), unit=self._parse_field()) 2592 2593 index = self._index 2594 type_token = self._parse_types(check_func=True) 2595 this = self._parse_column() 2596 2597 if type_token: 2598 if isinstance(this, exp.Literal): 2599 return self.expression(exp.Cast, this=this, to=type_token) 2600 if not type_token.args.get("expressions"): 2601 self._retreat(index) 2602 return self._parse_column() 2603 return type_token 2604 2605 return this 2606 2607 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2608 index = self._index 2609 2610 prefix = self._match_text_seq("SYSUDTLIB", ".") 2611 2612 if not self._match_set(self.TYPE_TOKENS): 2613 return None 2614 2615 type_token = self._prev.token_type 2616 2617 if type_token == TokenType.PSEUDO_TYPE: 2618 return self.expression(exp.PseudoType, this=self._prev.text) 2619 2620 nested = type_token in self.NESTED_TYPE_TOKENS 2621 is_struct = type_token == TokenType.STRUCT 2622 expressions = None 2623 maybe_func = False 2624 2625 if self._match(TokenType.L_PAREN): 2626 if is_struct: 2627 expressions = self._parse_csv(self._parse_struct_kwargs) 2628 elif nested: 2629 expressions = self._parse_csv(self._parse_types) 2630 else: 2631 expressions = self._parse_csv(self._parse_conjunction) 2632 2633 if not expressions: 2634 self._retreat(index) 2635 return None 2636 2637 self._match_r_paren() 2638 maybe_func = True 2639 2640 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2641 this = exp.DataType( 2642 this=exp.DataType.Type.ARRAY, 2643 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2644 nested=True, 2645 ) 2646 2647 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2648 this = exp.DataType( 2649 this=exp.DataType.Type.ARRAY, 2650 expressions=[this], 2651 nested=True, 2652 ) 2653 2654 return this 2655 2656 if self._match(TokenType.L_BRACKET): 2657 self._retreat(index) 2658 return None 2659 2660 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2661 if nested and self._match(TokenType.LT): 2662 if is_struct: 2663 expressions = self._parse_csv(self._parse_struct_kwargs) 2664 else: 2665 expressions = self._parse_csv(self._parse_types) 2666 2667 if not self._match(TokenType.GT): 2668 self.raise_error("Expecting >") 2669 2670 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2671 values = self._parse_csv(self._parse_conjunction) 2672 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2673 2674 value: t.Optional[exp.Expression] = None 2675 if type_token in self.TIMESTAMPS: 2676 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: 2677 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2678 elif ( 2679 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ 2680 ): 2681 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2682 elif self._match(TokenType.WITHOUT_TIME_ZONE): 2683 if type_token == TokenType.TIME: 2684 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2685 else: 2686 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2687 2688 maybe_func = maybe_func and value is None 2689 2690 if value is None: 2691 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2692 elif type_token == TokenType.INTERVAL: 2693 unit = self._parse_var() 2694 2695 if not unit: 2696 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 2697 else: 2698 value = self.expression(exp.Interval, unit=unit) 2699 2700 if maybe_func and check_func: 2701 index2 = self._index 2702 peek = self._parse_string() 2703 2704 if not peek: 2705 self._retreat(index) 2706 return None 2707 2708 self._retreat(index2) 2709 2710 if value: 2711 return value 2712 2713 return exp.DataType( 2714 this=exp.DataType.Type[type_token.value.upper()], 2715 expressions=expressions, 2716 nested=nested, 2717 values=values, 2718 prefix=prefix, 2719 ) 2720 2721 def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]: 2722 if self._curr and self._curr.token_type in self.TYPE_TOKENS: 2723 return self._parse_types() 2724 2725 this = self._parse_id_var() 2726 self._match(TokenType.COLON) 2727 data_type = self._parse_types() 2728 2729 if not data_type: 2730 return None 2731 return self.expression(exp.StructKwarg, this=this, expression=data_type) 2732 2733 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2734 if not self._match(TokenType.AT_TIME_ZONE): 2735 return this 2736 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 2737 2738 def _parse_column(self) -> t.Optional[exp.Expression]: 2739 this = self._parse_field() 2740 if isinstance(this, exp.Identifier): 2741 this = self.expression(exp.Column, this=this) 2742 elif not this: 2743 return self._parse_bracket(this) 2744 this = self._parse_bracket(this) 2745 2746 while self._match_set(self.COLUMN_OPERATORS): 2747 op_token = self._prev.token_type 2748 op = self.COLUMN_OPERATORS.get(op_token) 2749 2750 if op_token == TokenType.DCOLON: 2751 field = self._parse_types() 2752 if not field: 2753 self.raise_error("Expected type") 2754 elif op: 2755 self._advance() 2756 value = self._prev.text 2757 field = ( 2758 exp.Literal.number(value) 2759 if self._prev.token_type == TokenType.NUMBER 2760 else exp.Literal.string(value) 2761 ) 2762 else: 2763 field = self._parse_star() or self._parse_function() or self._parse_id_var() 2764 2765 if isinstance(field, exp.Func): 2766 # bigquery allows function calls like x.y.count(...) 2767 # SAFE.SUBSTR(...) 2768 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 2769 this = self._replace_columns_with_dots(this) 2770 2771 if op: 2772 this = op(self, this, field) 2773 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 2774 this = self.expression( 2775 exp.Column, 2776 this=field, 2777 table=this.this, 2778 db=this.args.get("table"), 2779 catalog=this.args.get("db"), 2780 ) 2781 else: 2782 this = self.expression(exp.Dot, this=this, expression=field) 2783 this = self._parse_bracket(this) 2784 2785 return this 2786 2787 def _parse_primary(self) -> t.Optional[exp.Expression]: 2788 if self._match_set(self.PRIMARY_PARSERS): 2789 token_type = self._prev.token_type 2790 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 2791 2792 if token_type == TokenType.STRING: 2793 expressions = [primary] 2794 while self._match(TokenType.STRING): 2795 expressions.append(exp.Literal.string(self._prev.text)) 2796 if len(expressions) > 1: 2797 return self.expression(exp.Concat, expressions=expressions) 2798 return primary 2799 2800 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 2801 return exp.Literal.number(f"0.{self._prev.text}") 2802 2803 if self._match(TokenType.L_PAREN): 2804 comments = self._prev_comments 2805 query = self._parse_select() 2806 2807 if query: 2808 expressions = [query] 2809 else: 2810 expressions = self._parse_csv( 2811 lambda: self._parse_alias(self._parse_conjunction(), explicit=True) 2812 ) 2813 2814 this = seq_get(expressions, 0) 2815 self._parse_query_modifiers(this) 2816 2817 if isinstance(this, exp.Subqueryable): 2818 this = self._parse_set_operations( 2819 self._parse_subquery(this=this, parse_alias=False) 2820 ) 2821 elif len(expressions) > 1: 2822 this = self.expression(exp.Tuple, expressions=expressions) 2823 else: 2824 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 2825 2826 self._match_r_paren() 2827 2828 if this and comments: 2829 this.comments = comments 2830 2831 return this 2832 2833 return None 2834 2835 def _parse_field(self, any_token: bool = False) -> t.Optional[exp.Expression]: 2836 return self._parse_primary() or self._parse_function() or self._parse_id_var(any_token) 2837 2838 def _parse_function( 2839 self, functions: t.Optional[t.Dict[str, t.Callable]] = None 2840 ) -> t.Optional[exp.Expression]: 2841 if not self._curr: 2842 return None 2843 2844 token_type = self._curr.token_type 2845 2846 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 2847 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 2848 2849 if not self._next or self._next.token_type != TokenType.L_PAREN: 2850 if token_type in self.NO_PAREN_FUNCTIONS: 2851 self._advance() 2852 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 2853 2854 return None 2855 2856 if token_type not in self.FUNC_TOKENS: 2857 return None 2858 2859 this = self._curr.text 2860 upper = this.upper() 2861 self._advance(2) 2862 2863 parser = self.FUNCTION_PARSERS.get(upper) 2864 2865 if parser: 2866 this = parser(self) 2867 else: 2868 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 2869 2870 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 2871 this = self.expression(subquery_predicate, this=self._parse_select()) 2872 self._match_r_paren() 2873 return this 2874 2875 if functions is None: 2876 functions = self.FUNCTIONS 2877 2878 function = functions.get(upper) 2879 args = self._parse_csv(self._parse_lambda) 2880 2881 if function: 2882 # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the 2883 # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists. 2884 if count_params(function) == 2: 2885 params = None 2886 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 2887 params = self._parse_csv(self._parse_lambda) 2888 2889 this = function(args, params) 2890 else: 2891 this = function(args) 2892 2893 self.validate_expression(this, args) 2894 else: 2895 this = self.expression(exp.Anonymous, this=this, expressions=args) 2896 2897 self._match_r_paren(this) 2898 return self._parse_window(this) 2899 2900 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 2901 return self._parse_column_def(self._parse_id_var()) 2902 2903 def _parse_user_defined_function( 2904 self, kind: t.Optional[TokenType] = None 2905 ) -> t.Optional[exp.Expression]: 2906 this = self._parse_id_var() 2907 2908 while self._match(TokenType.DOT): 2909 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 2910 2911 if not self._match(TokenType.L_PAREN): 2912 return this 2913 2914 expressions = self._parse_csv(self._parse_function_parameter) 2915 self._match_r_paren() 2916 return self.expression( 2917 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 2918 ) 2919 2920 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 2921 literal = self._parse_primary() 2922 if literal: 2923 return self.expression(exp.Introducer, this=token.text, expression=literal) 2924 2925 return self.expression(exp.Identifier, this=token.text) 2926 2927 def _parse_national(self, token: Token) -> exp.Expression: 2928 return self.expression(exp.National, this=exp.Literal.string(token.text)) 2929 2930 def _parse_session_parameter(self) -> exp.Expression: 2931 kind = None 2932 this = self._parse_id_var() or self._parse_primary() 2933 2934 if this and self._match(TokenType.DOT): 2935 kind = this.name 2936 this = self._parse_var() or self._parse_primary() 2937 2938 return self.expression(exp.SessionParameter, this=this, kind=kind) 2939 2940 def _parse_lambda(self) -> t.Optional[exp.Expression]: 2941 index = self._index 2942 2943 if self._match(TokenType.L_PAREN): 2944 expressions = self._parse_csv(self._parse_id_var) 2945 2946 if not self._match(TokenType.R_PAREN): 2947 self._retreat(index) 2948 else: 2949 expressions = [self._parse_id_var()] 2950 2951 if self._match_set(self.LAMBDAS): 2952 return self.LAMBDAS[self._prev.token_type](self, expressions) 2953 2954 self._retreat(index) 2955 2956 this: t.Optional[exp.Expression] 2957 2958 if self._match(TokenType.DISTINCT): 2959 this = self.expression( 2960 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 2961 ) 2962 else: 2963 this = self._parse_select_or_expression() 2964 2965 if isinstance(this, exp.EQ): 2966 left = this.this 2967 if isinstance(left, exp.Column): 2968 left.replace(exp.Var(this=left.text("this"))) 2969 2970 if self._match(TokenType.IGNORE_NULLS): 2971 this = self.expression(exp.IgnoreNulls, this=this) 2972 else: 2973 self._match(TokenType.RESPECT_NULLS) 2974 2975 return self._parse_limit(self._parse_order(this)) 2976 2977 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2978 index = self._index 2979 if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT): 2980 self._retreat(index) 2981 return this 2982 2983 args = self._parse_csv( 2984 lambda: self._parse_constraint() 2985 or self._parse_column_def(self._parse_field(any_token=True)) 2986 ) 2987 self._match_r_paren() 2988 return self.expression(exp.Schema, this=this, expressions=args) 2989 2990 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2991 kind = self._parse_types() 2992 2993 if self._match_text_seq("FOR", "ORDINALITY"): 2994 return self.expression(exp.ColumnDef, this=this, ordinality=True) 2995 2996 constraints = [] 2997 while True: 2998 constraint = self._parse_column_constraint() 2999 if not constraint: 3000 break 3001 constraints.append(constraint) 3002 3003 if not kind and not constraints: 3004 return this 3005 3006 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3007 3008 def _parse_auto_increment(self) -> exp.Expression: 3009 start = None 3010 increment = None 3011 3012 if self._match(TokenType.L_PAREN, advance=False): 3013 args = self._parse_wrapped_csv(self._parse_bitwise) 3014 start = seq_get(args, 0) 3015 increment = seq_get(args, 1) 3016 elif self._match_text_seq("START"): 3017 start = self._parse_bitwise() 3018 self._match_text_seq("INCREMENT") 3019 increment = self._parse_bitwise() 3020 3021 if start and increment: 3022 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3023 3024 return exp.AutoIncrementColumnConstraint() 3025 3026 def _parse_compress(self) -> exp.Expression: 3027 if self._match(TokenType.L_PAREN, advance=False): 3028 return self.expression( 3029 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3030 ) 3031 3032 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3033 3034 def _parse_generated_as_identity(self) -> exp.Expression: 3035 if self._match(TokenType.BY_DEFAULT): 3036 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False) 3037 else: 3038 self._match_text_seq("ALWAYS") 3039 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3040 3041 self._match_text_seq("AS", "IDENTITY") 3042 if self._match(TokenType.L_PAREN): 3043 if self._match_text_seq("START", "WITH"): 3044 this.set("start", self._parse_bitwise()) 3045 if self._match_text_seq("INCREMENT", "BY"): 3046 this.set("increment", self._parse_bitwise()) 3047 if self._match_text_seq("MINVALUE"): 3048 this.set("minvalue", self._parse_bitwise()) 3049 if self._match_text_seq("MAXVALUE"): 3050 this.set("maxvalue", self._parse_bitwise()) 3051 3052 if self._match_text_seq("CYCLE"): 3053 this.set("cycle", True) 3054 elif self._match_text_seq("NO", "CYCLE"): 3055 this.set("cycle", False) 3056 3057 self._match_r_paren() 3058 3059 return this 3060 3061 def _parse_inline(self) -> t.Optional[exp.Expression]: 3062 self._match_text_seq("LENGTH") 3063 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3064 3065 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 3066 if self._match_text_seq("NULL"): 3067 return self.expression(exp.NotNullColumnConstraint) 3068 if self._match_text_seq("CASESPECIFIC"): 3069 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3070 return None 3071 3072 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3073 this = self._parse_references() 3074 if this: 3075 return this 3076 3077 if self._match(TokenType.CONSTRAINT): 3078 this = self._parse_id_var() 3079 3080 if self._match_texts(self.CONSTRAINT_PARSERS): 3081 return self.expression( 3082 exp.ColumnConstraint, 3083 this=this, 3084 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3085 ) 3086 3087 return this 3088 3089 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3090 if not self._match(TokenType.CONSTRAINT): 3091 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3092 3093 this = self._parse_id_var() 3094 expressions = [] 3095 3096 while True: 3097 constraint = self._parse_unnamed_constraint() or self._parse_function() 3098 if not constraint: 3099 break 3100 expressions.append(constraint) 3101 3102 return self.expression(exp.Constraint, this=this, expressions=expressions) 3103 3104 def _parse_unnamed_constraint( 3105 self, constraints: t.Optional[t.Collection[str]] = None 3106 ) -> t.Optional[exp.Expression]: 3107 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3108 return None 3109 3110 constraint = self._prev.text.upper() 3111 if constraint not in self.CONSTRAINT_PARSERS: 3112 self.raise_error(f"No parser found for schema constraint {constraint}.") 3113 3114 return self.CONSTRAINT_PARSERS[constraint](self) 3115 3116 def _parse_unique(self) -> exp.Expression: 3117 if not self._match(TokenType.L_PAREN, advance=False): 3118 return self.expression(exp.UniqueColumnConstraint) 3119 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 3120 3121 def _parse_key_constraint_options(self) -> t.List[str]: 3122 options = [] 3123 while True: 3124 if not self._curr: 3125 break 3126 3127 if self._match(TokenType.ON): 3128 action = None 3129 on = self._advance_any() and self._prev.text 3130 3131 if self._match(TokenType.NO_ACTION): 3132 action = "NO ACTION" 3133 elif self._match(TokenType.CASCADE): 3134 action = "CASCADE" 3135 elif self._match_pair(TokenType.SET, TokenType.NULL): 3136 action = "SET NULL" 3137 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3138 action = "SET DEFAULT" 3139 else: 3140 self.raise_error("Invalid key constraint") 3141 3142 options.append(f"ON {on} {action}") 3143 elif self._match_text_seq("NOT", "ENFORCED"): 3144 options.append("NOT ENFORCED") 3145 elif self._match_text_seq("DEFERRABLE"): 3146 options.append("DEFERRABLE") 3147 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3148 options.append("INITIALLY DEFERRED") 3149 elif self._match_text_seq("NORELY"): 3150 options.append("NORELY") 3151 elif self._match_text_seq("MATCH", "FULL"): 3152 options.append("MATCH FULL") 3153 else: 3154 break 3155 3156 return options 3157 3158 def _parse_references(self) -> t.Optional[exp.Expression]: 3159 if not self._match(TokenType.REFERENCES): 3160 return None 3161 3162 expressions = None 3163 this = self._parse_id_var() 3164 3165 if self._match(TokenType.L_PAREN, advance=False): 3166 expressions = self._parse_wrapped_id_vars() 3167 3168 options = self._parse_key_constraint_options() 3169 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3170 3171 def _parse_foreign_key(self) -> exp.Expression: 3172 expressions = self._parse_wrapped_id_vars() 3173 reference = self._parse_references() 3174 options = {} 3175 3176 while self._match(TokenType.ON): 3177 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3178 self.raise_error("Expected DELETE or UPDATE") 3179 3180 kind = self._prev.text.lower() 3181 3182 if self._match(TokenType.NO_ACTION): 3183 action = "NO ACTION" 3184 elif self._match(TokenType.SET): 3185 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3186 action = "SET " + self._prev.text.upper() 3187 else: 3188 self._advance() 3189 action = self._prev.text.upper() 3190 3191 options[kind] = action 3192 3193 return self.expression( 3194 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3195 ) 3196 3197 def _parse_primary_key(self) -> exp.Expression: 3198 desc = ( 3199 self._match_set((TokenType.ASC, TokenType.DESC)) 3200 and self._prev.token_type == TokenType.DESC 3201 ) 3202 3203 if not self._match(TokenType.L_PAREN, advance=False): 3204 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3205 3206 expressions = self._parse_wrapped_id_vars() 3207 options = self._parse_key_constraint_options() 3208 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3209 3210 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3211 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3212 return this 3213 3214 bracket_kind = self._prev.token_type 3215 expressions: t.List[t.Optional[exp.Expression]] 3216 3217 if self._match(TokenType.COLON): 3218 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3219 else: 3220 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3221 3222 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3223 if bracket_kind == TokenType.L_BRACE: 3224 this = self.expression(exp.Struct, expressions=expressions) 3225 elif not this or this.name.upper() == "ARRAY": 3226 this = self.expression(exp.Array, expressions=expressions) 3227 else: 3228 expressions = apply_index_offset(expressions, -self.index_offset) 3229 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3230 3231 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3232 self.raise_error("Expected ]") 3233 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3234 self.raise_error("Expected }") 3235 3236 this.comments = self._prev_comments 3237 return self._parse_bracket(this) 3238 3239 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3240 if self._match(TokenType.COLON): 3241 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3242 return this 3243 3244 def _parse_case(self) -> t.Optional[exp.Expression]: 3245 ifs = [] 3246 default = None 3247 3248 expression = self._parse_conjunction() 3249 3250 while self._match(TokenType.WHEN): 3251 this = self._parse_conjunction() 3252 self._match(TokenType.THEN) 3253 then = self._parse_conjunction() 3254 ifs.append(self.expression(exp.If, this=this, true=then)) 3255 3256 if self._match(TokenType.ELSE): 3257 default = self._parse_conjunction() 3258 3259 if not self._match(TokenType.END): 3260 self.raise_error("Expected END after CASE", self._prev) 3261 3262 return self._parse_window( 3263 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3264 ) 3265 3266 def _parse_if(self) -> t.Optional[exp.Expression]: 3267 if self._match(TokenType.L_PAREN): 3268 args = self._parse_csv(self._parse_conjunction) 3269 this = exp.If.from_arg_list(args) 3270 self.validate_expression(this, args) 3271 self._match_r_paren() 3272 else: 3273 condition = self._parse_conjunction() 3274 self._match(TokenType.THEN) 3275 true = self._parse_conjunction() 3276 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3277 self._match(TokenType.END) 3278 this = self.expression(exp.If, this=condition, true=true, false=false) 3279 3280 return self._parse_window(this) 3281 3282 def _parse_extract(self) -> exp.Expression: 3283 this = self._parse_function() or self._parse_var() or self._parse_type() 3284 3285 if self._match(TokenType.FROM): 3286 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3287 3288 if not self._match(TokenType.COMMA): 3289 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3290 3291 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3292 3293 def _parse_cast(self, strict: bool) -> exp.Expression: 3294 this = self._parse_conjunction() 3295 3296 if not self._match(TokenType.ALIAS): 3297 self.raise_error("Expected AS after CAST") 3298 3299 to = self._parse_types() 3300 3301 if not to: 3302 self.raise_error("Expected TYPE after CAST") 3303 elif to.this == exp.DataType.Type.CHAR: 3304 if self._match(TokenType.CHARACTER_SET): 3305 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3306 3307 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3308 3309 def _parse_string_agg(self) -> exp.Expression: 3310 expression: t.Optional[exp.Expression] 3311 3312 if self._match(TokenType.DISTINCT): 3313 args = self._parse_csv(self._parse_conjunction) 3314 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3315 else: 3316 args = self._parse_csv(self._parse_conjunction) 3317 expression = seq_get(args, 0) 3318 3319 index = self._index 3320 if not self._match(TokenType.R_PAREN): 3321 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3322 order = self._parse_order(this=expression) 3323 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3324 3325 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3326 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3327 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3328 if not self._match(TokenType.WITHIN_GROUP): 3329 self._retreat(index) 3330 this = exp.GroupConcat.from_arg_list(args) 3331 self.validate_expression(this, args) 3332 return this 3333 3334 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3335 order = self._parse_order(this=expression) 3336 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3337 3338 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3339 to: t.Optional[exp.Expression] 3340 this = self._parse_bitwise() 3341 3342 if self._match(TokenType.USING): 3343 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3344 elif self._match(TokenType.COMMA): 3345 to = self._parse_bitwise() 3346 else: 3347 to = None 3348 3349 # Swap the argument order if needed to produce the correct AST 3350 if self.CONVERT_TYPE_FIRST: 3351 this, to = to, this 3352 3353 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3354 3355 def _parse_decode(self) -> t.Optional[exp.Expression]: 3356 """ 3357 There are generally two variants of the DECODE function: 3358 3359 - DECODE(bin, charset) 3360 - DECODE(expression, search, result [, search, result] ... [, default]) 3361 3362 The second variant will always be parsed into a CASE expression. Note that NULL 3363 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3364 instead of relying on pattern matching. 3365 """ 3366 args = self._parse_csv(self._parse_conjunction) 3367 3368 if len(args) < 3: 3369 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3370 3371 expression, *expressions = args 3372 if not expression: 3373 return None 3374 3375 ifs = [] 3376 for search, result in zip(expressions[::2], expressions[1::2]): 3377 if not search or not result: 3378 return None 3379 3380 if isinstance(search, exp.Literal): 3381 ifs.append( 3382 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3383 ) 3384 elif isinstance(search, exp.Null): 3385 ifs.append( 3386 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3387 ) 3388 else: 3389 cond = exp.or_( 3390 exp.EQ(this=expression.copy(), expression=search), 3391 exp.and_( 3392 exp.Is(this=expression.copy(), expression=exp.Null()), 3393 exp.Is(this=search.copy(), expression=exp.Null()), 3394 ), 3395 ) 3396 ifs.append(exp.If(this=cond, true=result)) 3397 3398 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3399 3400 def _parse_json_key_value(self) -> t.Optional[exp.Expression]: 3401 self._match_text_seq("KEY") 3402 key = self._parse_field() 3403 self._match(TokenType.COLON) 3404 self._match_text_seq("VALUE") 3405 value = self._parse_field() 3406 if not key and not value: 3407 return None 3408 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3409 3410 def _parse_json_object(self) -> exp.Expression: 3411 expressions = self._parse_csv(self._parse_json_key_value) 3412 3413 null_handling = None 3414 if self._match_text_seq("NULL", "ON", "NULL"): 3415 null_handling = "NULL ON NULL" 3416 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3417 null_handling = "ABSENT ON NULL" 3418 3419 unique_keys = None 3420 if self._match_text_seq("WITH", "UNIQUE"): 3421 unique_keys = True 3422 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3423 unique_keys = False 3424 3425 self._match_text_seq("KEYS") 3426 3427 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3428 format_json = self._match_text_seq("FORMAT", "JSON") 3429 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3430 3431 return self.expression( 3432 exp.JSONObject, 3433 expressions=expressions, 3434 null_handling=null_handling, 3435 unique_keys=unique_keys, 3436 return_type=return_type, 3437 format_json=format_json, 3438 encoding=encoding, 3439 ) 3440 3441 def _parse_logarithm(self) -> exp.Expression: 3442 # Default argument order is base, expression 3443 args = self._parse_csv(self._parse_range) 3444 3445 if len(args) > 1: 3446 if not self.LOG_BASE_FIRST: 3447 args.reverse() 3448 return exp.Log.from_arg_list(args) 3449 3450 return self.expression( 3451 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3452 ) 3453 3454 def _parse_match_against(self) -> exp.Expression: 3455 expressions = self._parse_csv(self._parse_column) 3456 3457 self._match_text_seq(")", "AGAINST", "(") 3458 3459 this = self._parse_string() 3460 3461 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3462 modifier = "IN NATURAL LANGUAGE MODE" 3463 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3464 modifier = f"{modifier} WITH QUERY EXPANSION" 3465 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3466 modifier = "IN BOOLEAN MODE" 3467 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3468 modifier = "WITH QUERY EXPANSION" 3469 else: 3470 modifier = None 3471 3472 return self.expression( 3473 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3474 ) 3475 3476 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3477 args = self._parse_csv(self._parse_bitwise) 3478 3479 if self._match(TokenType.IN): 3480 return self.expression( 3481 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3482 ) 3483 3484 if haystack_first: 3485 haystack = seq_get(args, 0) 3486 needle = seq_get(args, 1) 3487 else: 3488 needle = seq_get(args, 0) 3489 haystack = seq_get(args, 1) 3490 3491 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3492 3493 self.validate_expression(this, args) 3494 3495 return this 3496 3497 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3498 args = self._parse_csv(self._parse_table) 3499 return exp.JoinHint(this=func_name.upper(), expressions=args) 3500 3501 def _parse_substring(self) -> exp.Expression: 3502 # Postgres supports the form: substring(string [from int] [for int]) 3503 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3504 3505 args = self._parse_csv(self._parse_bitwise) 3506 3507 if self._match(TokenType.FROM): 3508 args.append(self._parse_bitwise()) 3509 if self._match(TokenType.FOR): 3510 args.append(self._parse_bitwise()) 3511 3512 this = exp.Substring.from_arg_list(args) 3513 self.validate_expression(this, args) 3514 3515 return this 3516 3517 def _parse_trim(self) -> exp.Expression: 3518 # https://www.w3resource.com/sql/character-functions/trim.php 3519 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3520 3521 position = None 3522 collation = None 3523 3524 if self._match_set(self.TRIM_TYPES): 3525 position = self._prev.text.upper() 3526 3527 expression = self._parse_term() 3528 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3529 this = self._parse_term() 3530 else: 3531 this = expression 3532 expression = None 3533 3534 if self._match(TokenType.COLLATE): 3535 collation = self._parse_term() 3536 3537 return self.expression( 3538 exp.Trim, 3539 this=this, 3540 position=position, 3541 expression=expression, 3542 collation=collation, 3543 ) 3544 3545 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3546 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3547 3548 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3549 return self._parse_window(self._parse_id_var(), alias=True) 3550 3551 def _parse_window( 3552 self, this: t.Optional[exp.Expression], alias: bool = False 3553 ) -> t.Optional[exp.Expression]: 3554 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3555 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3556 self._match_r_paren() 3557 3558 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3559 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3560 if self._match(TokenType.WITHIN_GROUP): 3561 order = self._parse_wrapped(self._parse_order) 3562 this = self.expression(exp.WithinGroup, this=this, expression=order) 3563 3564 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3565 # Some dialects choose to implement and some do not. 3566 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3567 3568 # There is some code above in _parse_lambda that handles 3569 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3570 3571 # The below changes handle 3572 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3573 3574 # Oracle allows both formats 3575 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3576 # and Snowflake chose to do the same for familiarity 3577 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3578 if self._match(TokenType.IGNORE_NULLS): 3579 this = self.expression(exp.IgnoreNulls, this=this) 3580 elif self._match(TokenType.RESPECT_NULLS): 3581 this = self.expression(exp.RespectNulls, this=this) 3582 3583 # bigquery select from window x AS (partition by ...) 3584 if alias: 3585 self._match(TokenType.ALIAS) 3586 elif not self._match(TokenType.OVER): 3587 return this 3588 3589 if not self._match(TokenType.L_PAREN): 3590 return self.expression(exp.Window, this=this, alias=self._parse_id_var(False)) 3591 3592 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3593 partition = self._parse_partition_by() 3594 order = self._parse_order() 3595 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3596 3597 if kind: 3598 self._match(TokenType.BETWEEN) 3599 start = self._parse_window_spec() 3600 self._match(TokenType.AND) 3601 end = self._parse_window_spec() 3602 3603 spec = self.expression( 3604 exp.WindowSpec, 3605 kind=kind, 3606 start=start["value"], 3607 start_side=start["side"], 3608 end=end["value"], 3609 end_side=end["side"], 3610 ) 3611 else: 3612 spec = None 3613 3614 self._match_r_paren() 3615 3616 return self.expression( 3617 exp.Window, 3618 this=this, 3619 partition_by=partition, 3620 order=order, 3621 spec=spec, 3622 alias=window_alias, 3623 ) 3624 3625 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3626 self._match(TokenType.BETWEEN) 3627 3628 return { 3629 "value": ( 3630 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text 3631 ) 3632 or self._parse_bitwise(), 3633 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, 3634 } 3635 3636 def _parse_alias( 3637 self, this: t.Optional[exp.Expression], explicit: bool = False 3638 ) -> t.Optional[exp.Expression]: 3639 any_token = self._match(TokenType.ALIAS) 3640 3641 if explicit and not any_token: 3642 return this 3643 3644 if self._match(TokenType.L_PAREN): 3645 aliases = self.expression( 3646 exp.Aliases, 3647 this=this, 3648 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3649 ) 3650 self._match_r_paren(aliases) 3651 return aliases 3652 3653 alias = self._parse_id_var(any_token) 3654 3655 if alias: 3656 return self.expression(exp.Alias, this=this, alias=alias) 3657 3658 return this 3659 3660 def _parse_id_var( 3661 self, 3662 any_token: bool = True, 3663 tokens: t.Optional[t.Collection[TokenType]] = None, 3664 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3665 ) -> t.Optional[exp.Expression]: 3666 identifier = self._parse_identifier() 3667 3668 if identifier: 3669 return identifier 3670 3671 prefix = "" 3672 3673 if prefix_tokens: 3674 while self._match_set(prefix_tokens): 3675 prefix += self._prev.text 3676 3677 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3678 quoted = self._prev.token_type == TokenType.STRING 3679 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3680 3681 return None 3682 3683 def _parse_string(self) -> t.Optional[exp.Expression]: 3684 if self._match(TokenType.STRING): 3685 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 3686 return self._parse_placeholder() 3687 3688 def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]: 3689 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 3690 3691 def _parse_number(self) -> t.Optional[exp.Expression]: 3692 if self._match(TokenType.NUMBER): 3693 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 3694 return self._parse_placeholder() 3695 3696 def _parse_identifier(self) -> t.Optional[exp.Expression]: 3697 if self._match(TokenType.IDENTIFIER): 3698 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 3699 return self._parse_placeholder() 3700 3701 def _parse_var( 3702 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 3703 ) -> t.Optional[exp.Expression]: 3704 if ( 3705 (any_token and self._advance_any()) 3706 or self._match(TokenType.VAR) 3707 or (self._match_set(tokens) if tokens else False) 3708 ): 3709 return self.expression(exp.Var, this=self._prev.text) 3710 return self._parse_placeholder() 3711 3712 def _advance_any(self) -> t.Optional[Token]: 3713 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 3714 self._advance() 3715 return self._prev 3716 return None 3717 3718 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 3719 return self._parse_var() or self._parse_string() 3720 3721 def _parse_null(self) -> t.Optional[exp.Expression]: 3722 if self._match(TokenType.NULL): 3723 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 3724 return None 3725 3726 def _parse_boolean(self) -> t.Optional[exp.Expression]: 3727 if self._match(TokenType.TRUE): 3728 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 3729 if self._match(TokenType.FALSE): 3730 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 3731 return None 3732 3733 def _parse_star(self) -> t.Optional[exp.Expression]: 3734 if self._match(TokenType.STAR): 3735 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 3736 return None 3737 3738 def _parse_parameter(self) -> exp.Expression: 3739 wrapped = self._match(TokenType.L_BRACE) 3740 this = self._parse_var() or self._parse_primary() 3741 self._match(TokenType.R_BRACE) 3742 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 3743 3744 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 3745 if self._match_set(self.PLACEHOLDER_PARSERS): 3746 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 3747 if placeholder: 3748 return placeholder 3749 self._advance(-1) 3750 return None 3751 3752 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3753 if not self._match(TokenType.EXCEPT): 3754 return None 3755 if self._match(TokenType.L_PAREN, advance=False): 3756 return self._parse_wrapped_csv(self._parse_column) 3757 return self._parse_csv(self._parse_column) 3758 3759 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3760 if not self._match(TokenType.REPLACE): 3761 return None 3762 if self._match(TokenType.L_PAREN, advance=False): 3763 return self._parse_wrapped_csv(self._parse_expression) 3764 return self._parse_csv(self._parse_expression) 3765 3766 def _parse_csv( 3767 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3768 ) -> t.List[t.Optional[exp.Expression]]: 3769 parse_result = parse_method() 3770 items = [parse_result] if parse_result is not None else [] 3771 3772 while self._match(sep): 3773 if parse_result and self._prev_comments: 3774 parse_result.comments = self._prev_comments 3775 3776 parse_result = parse_method() 3777 if parse_result is not None: 3778 items.append(parse_result) 3779 3780 return items 3781 3782 def _parse_tokens( 3783 self, parse_method: t.Callable, expressions: t.Dict 3784 ) -> t.Optional[exp.Expression]: 3785 this = parse_method() 3786 3787 while self._match_set(expressions): 3788 this = self.expression( 3789 expressions[self._prev.token_type], 3790 this=this, 3791 comments=self._prev_comments, 3792 expression=parse_method(), 3793 ) 3794 3795 return this 3796 3797 def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]: 3798 return self._parse_wrapped_csv(self._parse_id_var) 3799 3800 def _parse_wrapped_csv( 3801 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3802 ) -> t.List[t.Optional[exp.Expression]]: 3803 return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep)) 3804 3805 def _parse_wrapped(self, parse_method: t.Callable) -> t.Any: 3806 self._match_l_paren() 3807 parse_result = parse_method() 3808 self._match_r_paren() 3809 return parse_result 3810 3811 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: 3812 return self._parse_select() or self._parse_set_operations(self._parse_expression()) 3813 3814 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 3815 return self._parse_set_operations( 3816 self._parse_select(nested=True, parse_subquery_alias=False) 3817 ) 3818 3819 def _parse_transaction(self) -> exp.Expression: 3820 this = None 3821 if self._match_texts(self.TRANSACTION_KIND): 3822 this = self._prev.text 3823 3824 self._match_texts({"TRANSACTION", "WORK"}) 3825 3826 modes = [] 3827 while True: 3828 mode = [] 3829 while self._match(TokenType.VAR): 3830 mode.append(self._prev.text) 3831 3832 if mode: 3833 modes.append(" ".join(mode)) 3834 if not self._match(TokenType.COMMA): 3835 break 3836 3837 return self.expression(exp.Transaction, this=this, modes=modes) 3838 3839 def _parse_commit_or_rollback(self) -> exp.Expression: 3840 chain = None 3841 savepoint = None 3842 is_rollback = self._prev.token_type == TokenType.ROLLBACK 3843 3844 self._match_texts({"TRANSACTION", "WORK"}) 3845 3846 if self._match_text_seq("TO"): 3847 self._match_text_seq("SAVEPOINT") 3848 savepoint = self._parse_id_var() 3849 3850 if self._match(TokenType.AND): 3851 chain = not self._match_text_seq("NO") 3852 self._match_text_seq("CHAIN") 3853 3854 if is_rollback: 3855 return self.expression(exp.Rollback, savepoint=savepoint) 3856 return self.expression(exp.Commit, chain=chain) 3857 3858 def _parse_add_column(self) -> t.Optional[exp.Expression]: 3859 if not self._match_text_seq("ADD"): 3860 return None 3861 3862 self._match(TokenType.COLUMN) 3863 exists_column = self._parse_exists(not_=True) 3864 expression = self._parse_column_def(self._parse_field(any_token=True)) 3865 3866 if expression: 3867 expression.set("exists", exists_column) 3868 3869 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 3870 if self._match_texts(("FIRST", "AFTER")): 3871 position = self._prev.text 3872 column_position = self.expression( 3873 exp.ColumnPosition, this=self._parse_column(), position=position 3874 ) 3875 expression.set("position", column_position) 3876 3877 return expression 3878 3879 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 3880 return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN") 3881 3882 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 3883 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 3884 return self.expression( 3885 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 3886 ) 3887 3888 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 3889 this = None 3890 kind = self._prev.token_type 3891 3892 if kind == TokenType.CONSTRAINT: 3893 this = self._parse_id_var() 3894 3895 if self._match_text_seq("CHECK"): 3896 expression = self._parse_wrapped(self._parse_conjunction) 3897 enforced = self._match_text_seq("ENFORCED") 3898 3899 return self.expression( 3900 exp.AddConstraint, this=this, expression=expression, enforced=enforced 3901 ) 3902 3903 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 3904 expression = self._parse_foreign_key() 3905 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 3906 expression = self._parse_primary_key() 3907 else: 3908 expression = None 3909 3910 return self.expression(exp.AddConstraint, this=this, expression=expression) 3911 3912 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 3913 index = self._index - 1 3914 3915 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 3916 return self._parse_csv(self._parse_add_constraint) 3917 3918 self._retreat(index) 3919 return self._parse_csv(self._parse_add_column) 3920 3921 def _parse_alter_table_alter(self) -> exp.Expression: 3922 self._match(TokenType.COLUMN) 3923 column = self._parse_field(any_token=True) 3924 3925 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 3926 return self.expression(exp.AlterColumn, this=column, drop=True) 3927 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 3928 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 3929 3930 self._match_text_seq("SET", "DATA") 3931 return self.expression( 3932 exp.AlterColumn, 3933 this=column, 3934 dtype=self._match_text_seq("TYPE") and self._parse_types(), 3935 collate=self._match(TokenType.COLLATE) and self._parse_term(), 3936 using=self._match(TokenType.USING) and self._parse_conjunction(), 3937 ) 3938 3939 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 3940 index = self._index - 1 3941 3942 partition_exists = self._parse_exists() 3943 if self._match(TokenType.PARTITION, advance=False): 3944 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 3945 3946 self._retreat(index) 3947 return self._parse_csv(self._parse_drop_column) 3948 3949 def _parse_alter_table_rename(self) -> exp.Expression: 3950 self._match_text_seq("TO") 3951 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 3952 3953 def _parse_alter(self) -> t.Optional[exp.Expression]: 3954 start = self._prev 3955 3956 if not self._match(TokenType.TABLE): 3957 return self._parse_as_command(start) 3958 3959 exists = self._parse_exists() 3960 this = self._parse_table(schema=True) 3961 3962 if self._next: 3963 self._advance() 3964 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 3965 3966 if parser: 3967 actions = ensure_list(parser(self)) 3968 3969 if not self._curr: 3970 return self.expression( 3971 exp.AlterTable, 3972 this=this, 3973 exists=exists, 3974 actions=actions, 3975 ) 3976 return self._parse_as_command(start) 3977 3978 def _parse_merge(self) -> exp.Expression: 3979 self._match(TokenType.INTO) 3980 target = self._parse_table() 3981 3982 self._match(TokenType.USING) 3983 using = self._parse_table() 3984 3985 self._match(TokenType.ON) 3986 on = self._parse_conjunction() 3987 3988 whens = [] 3989 while self._match(TokenType.WHEN): 3990 matched = not self._match(TokenType.NOT) 3991 self._match_text_seq("MATCHED") 3992 source = ( 3993 False 3994 if self._match_text_seq("BY", "TARGET") 3995 else self._match_text_seq("BY", "SOURCE") 3996 ) 3997 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 3998 3999 self._match(TokenType.THEN) 4000 4001 if self._match(TokenType.INSERT): 4002 _this = self._parse_star() 4003 if _this: 4004 then = self.expression(exp.Insert, this=_this) 4005 else: 4006 then = self.expression( 4007 exp.Insert, 4008 this=self._parse_value(), 4009 expression=self._match(TokenType.VALUES) and self._parse_value(), 4010 ) 4011 elif self._match(TokenType.UPDATE): 4012 expressions = self._parse_star() 4013 if expressions: 4014 then = self.expression(exp.Update, expressions=expressions) 4015 else: 4016 then = self.expression( 4017 exp.Update, 4018 expressions=self._match(TokenType.SET) 4019 and self._parse_csv(self._parse_equality), 4020 ) 4021 elif self._match(TokenType.DELETE): 4022 then = self.expression(exp.Var, this=self._prev.text) 4023 else: 4024 then = None 4025 4026 whens.append( 4027 self.expression( 4028 exp.When, 4029 matched=matched, 4030 source=source, 4031 condition=condition, 4032 then=then, 4033 ) 4034 ) 4035 4036 return self.expression( 4037 exp.Merge, 4038 this=target, 4039 using=using, 4040 on=on, 4041 expressions=whens, 4042 ) 4043 4044 def _parse_show(self) -> t.Optional[exp.Expression]: 4045 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 4046 if parser: 4047 return parser(self) 4048 self._advance() 4049 return self.expression(exp.Show, this=self._prev.text.upper()) 4050 4051 def _parse_set_item_assignment( 4052 self, kind: t.Optional[str] = None 4053 ) -> t.Optional[exp.Expression]: 4054 index = self._index 4055 4056 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4057 return self._parse_set_transaction(global_=kind == "GLOBAL") 4058 4059 left = self._parse_primary() or self._parse_id_var() 4060 4061 if not self._match_texts(("=", "TO")): 4062 self._retreat(index) 4063 return None 4064 4065 right = self._parse_statement() or self._parse_id_var() 4066 this = self.expression( 4067 exp.EQ, 4068 this=left, 4069 expression=right, 4070 ) 4071 4072 return self.expression( 4073 exp.SetItem, 4074 this=this, 4075 kind=kind, 4076 ) 4077 4078 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4079 self._match_text_seq("TRANSACTION") 4080 characteristics = self._parse_csv( 4081 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4082 ) 4083 return self.expression( 4084 exp.SetItem, 4085 expressions=characteristics, 4086 kind="TRANSACTION", 4087 **{"global": global_}, # type: ignore 4088 ) 4089 4090 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4091 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 4092 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4093 4094 def _parse_set(self) -> exp.Expression: 4095 index = self._index 4096 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4097 4098 if self._curr: 4099 self._retreat(index) 4100 return self._parse_as_command(self._prev) 4101 4102 return set_ 4103 4104 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]: 4105 for option in options: 4106 if self._match_text_seq(*option.split(" ")): 4107 return exp.Var(this=option) 4108 return None 4109 4110 def _parse_as_command(self, start: Token) -> exp.Command: 4111 while self._curr: 4112 self._advance() 4113 text = self._find_sql(start, self._prev) 4114 size = len(start.text) 4115 return exp.Command(this=text[:size], expression=text[size:]) 4116 4117 def _find_parser( 4118 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4119 ) -> t.Optional[t.Callable]: 4120 if not self._curr: 4121 return None 4122 4123 index = self._index 4124 this = [] 4125 while True: 4126 # The current token might be multiple words 4127 curr = self._curr.text.upper() 4128 key = curr.split(" ") 4129 this.append(curr) 4130 self._advance() 4131 result, trie = in_trie(trie, key) 4132 if result == 0: 4133 break 4134 if result == 2: 4135 subparser = parsers[" ".join(this)] 4136 return subparser 4137 self._retreat(index) 4138 return None 4139 4140 def _match(self, token_type, advance=True): 4141 if not self._curr: 4142 return None 4143 4144 if self._curr.token_type == token_type: 4145 if advance: 4146 self._advance() 4147 return True 4148 4149 return None 4150 4151 def _match_set(self, types, advance=True): 4152 if not self._curr: 4153 return None 4154 4155 if self._curr.token_type in types: 4156 if advance: 4157 self._advance() 4158 return True 4159 4160 return None 4161 4162 def _match_pair(self, token_type_a, token_type_b, advance=True): 4163 if not self._curr or not self._next: 4164 return None 4165 4166 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4167 if advance: 4168 self._advance(2) 4169 return True 4170 4171 return None 4172 4173 def _match_l_paren(self, expression=None): 4174 if not self._match(TokenType.L_PAREN): 4175 self.raise_error("Expecting (") 4176 if expression and self._prev_comments: 4177 expression.comments = self._prev_comments 4178 4179 def _match_r_paren(self, expression=None): 4180 if not self._match(TokenType.R_PAREN): 4181 self.raise_error("Expecting )") 4182 if expression and self._prev_comments: 4183 expression.comments = self._prev_comments 4184 4185 def _match_texts(self, texts, advance=True): 4186 if self._curr and self._curr.text.upper() in texts: 4187 if advance: 4188 self._advance() 4189 return True 4190 return False 4191 4192 def _match_text_seq(self, *texts, advance=True): 4193 index = self._index 4194 for text in texts: 4195 if self._curr and self._curr.text.upper() == text: 4196 self._advance() 4197 else: 4198 self._retreat(index) 4199 return False 4200 4201 if not advance: 4202 self._retreat(index) 4203 4204 return True 4205 4206 def _replace_columns_with_dots(self, this): 4207 if isinstance(this, exp.Dot): 4208 exp.replace_children(this, self._replace_columns_with_dots) 4209 elif isinstance(this, exp.Column): 4210 exp.replace_children(this, self._replace_columns_with_dots) 4211 table = this.args.get("table") 4212 this = ( 4213 self.expression(exp.Dot, this=table, expression=this.this) 4214 if table 4215 else self.expression(exp.Var, this=this.name) 4216 ) 4217 elif isinstance(this, exp.Identifier): 4218 this = self.expression(exp.Var, this=this.name) 4219 return this 4220 4221 def _replace_lambda(self, node, lambda_variables): 4222 for column in node.find_all(exp.Column): 4223 if column.parts[0].name in lambda_variables: 4224 dot_or_id = column.to_dot() if column.table else column.this 4225 parent = column.parent 4226 4227 while isinstance(parent, exp.Dot): 4228 if not isinstance(parent.parent, exp.Dot): 4229 parent.replace(dot_or_id) 4230 break 4231 parent = parent.parent 4232 else: 4233 column.replace(dot_or_id) 4234 return node
Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer
and produces
a parsed syntax tree.
Arguments:
- error_level: the desired error level. Default: ErrorLevel.RAISE
- error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
- index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
- alias_post_tablesample: If the table alias comes after tablesample. Default: False
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
763 def __init__( 764 self, 765 error_level: t.Optional[ErrorLevel] = None, 766 error_message_context: int = 100, 767 index_offset: int = 0, 768 unnest_column_only: bool = False, 769 alias_post_tablesample: bool = False, 770 max_errors: int = 3, 771 null_ordering: t.Optional[str] = None, 772 ): 773 self.error_level = error_level or ErrorLevel.IMMEDIATE 774 self.error_message_context = error_message_context 775 self.index_offset = index_offset 776 self.unnest_column_only = unnest_column_only 777 self.alias_post_tablesample = alias_post_tablesample 778 self.max_errors = max_errors 779 self.null_ordering = null_ordering 780 self.reset()
792 def parse( 793 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 794 ) -> t.List[t.Optional[exp.Expression]]: 795 """ 796 Parses a list of tokens and returns a list of syntax trees, one tree 797 per parsed SQL statement. 798 799 Args: 800 raw_tokens: the list of tokens. 801 sql: the original SQL string, used to produce helpful debug messages. 802 803 Returns: 804 The list of syntax trees. 805 """ 806 return self._parse( 807 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 808 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The list of syntax trees.
810 def parse_into( 811 self, 812 expression_types: exp.IntoType, 813 raw_tokens: t.List[Token], 814 sql: t.Optional[str] = None, 815 ) -> t.List[t.Optional[exp.Expression]]: 816 """ 817 Parses a list of tokens into a given Expression type. If a collection of Expression 818 types is given instead, this method will try to parse the token list into each one 819 of them, stopping at the first for which the parsing succeeds. 820 821 Args: 822 expression_types: the expression type(s) to try and parse the token list into. 823 raw_tokens: the list of tokens. 824 sql: the original SQL string, used to produce helpful debug messages. 825 826 Returns: 827 The target Expression. 828 """ 829 errors = [] 830 for expression_type in ensure_collection(expression_types): 831 parser = self.EXPRESSION_PARSERS.get(expression_type) 832 if not parser: 833 raise TypeError(f"No parser registered for {expression_type}") 834 try: 835 return self._parse(parser, raw_tokens, sql) 836 except ParseError as e: 837 e.errors[0]["into_expression"] = expression_type 838 errors.append(e) 839 raise ParseError( 840 f"Failed to parse into {expression_types}", 841 errors=merge_errors(errors), 842 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: the expression type(s) to try and parse the token list into.
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
878 def check_errors(self) -> None: 879 """ 880 Logs or raises any found errors, depending on the chosen error level setting. 881 """ 882 if self.error_level == ErrorLevel.WARN: 883 for error in self.errors: 884 logger.error(str(error)) 885 elif self.error_level == ErrorLevel.RAISE and self.errors: 886 raise ParseError( 887 concat_messages(self.errors, self.max_errors), 888 errors=merge_errors(self.errors), 889 )
Logs or raises any found errors, depending on the chosen error level setting.
891 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 892 """ 893 Appends an error in the list of recorded errors or raises it, depending on the chosen 894 error level setting. 895 """ 896 token = token or self._curr or self._prev or Token.string("") 897 start = self._find_token(token) 898 end = start + len(token.text) 899 start_context = self.sql[max(start - self.error_message_context, 0) : start] 900 highlight = self.sql[start:end] 901 end_context = self.sql[end : end + self.error_message_context] 902 903 error = ParseError.new( 904 f"{message}. Line {token.line}, Col: {token.col}.\n" 905 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 906 description=message, 907 line=token.line, 908 col=token.col, 909 start_context=start_context, 910 highlight=highlight, 911 end_context=end_context, 912 ) 913 914 if self.error_level == ErrorLevel.IMMEDIATE: 915 raise error 916 917 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
919 def expression( 920 self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs 921 ) -> exp.Expression: 922 """ 923 Creates a new, validated Expression. 924 925 Args: 926 exp_class: the expression class to instantiate. 927 comments: an optional list of comments to attach to the expression. 928 kwargs: the arguments to set for the expression along with their respective values. 929 930 Returns: 931 The target expression. 932 """ 933 instance = exp_class(**kwargs) 934 if self._prev_comments: 935 instance.comments = self._prev_comments 936 self._prev_comments = None 937 if comments: 938 instance.comments = comments 939 self.validate_expression(instance) 940 return instance
Creates a new, validated Expression.
Arguments:
- exp_class: the expression class to instantiate.
- comments: an optional list of comments to attach to the expression.
- kwargs: the arguments to set for the expression along with their respective values.
Returns:
The target expression.
942 def validate_expression( 943 self, expression: exp.Expression, args: t.Optional[t.List] = None 944 ) -> None: 945 """ 946 Validates an already instantiated expression, making sure that all its mandatory arguments 947 are set. 948 949 Args: 950 expression: the expression to validate. 951 args: an optional list of items that was used to instantiate the expression, if it's a Func. 952 """ 953 if self.error_level == ErrorLevel.IGNORE: 954 return 955 956 for error_message in expression.error_messages(args): 957 self.raise_error(error_message)
Validates an already instantiated expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: the expression to validate.
- args: an optional list of items that was used to instantiate the expression, if it's a Func.