sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import ( 10 apply_index_offset, 11 count_params, 12 ensure_collection, 13 ensure_list, 14 seq_get, 15) 16from sqlglot.tokens import Token, Tokenizer, TokenType 17from sqlglot.trie import in_trie, new_trie 18 19logger = logging.getLogger("sqlglot") 20 21 22def parse_var_map(args): 23 keys = [] 24 values = [] 25 for i in range(0, len(args), 2): 26 keys.append(args[i]) 27 values.append(args[i + 1]) 28 return exp.VarMap( 29 keys=exp.Array(expressions=keys), 30 values=exp.Array(expressions=values), 31 ) 32 33 34class _Parser(type): 35 def __new__(cls, clsname, bases, attrs): 36 klass = super().__new__(cls, clsname, bases, attrs) 37 klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 38 klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS) 39 return klass 40 41 42class Parser(metaclass=_Parser): 43 """ 44 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 45 a parsed syntax tree. 46 47 Args: 48 error_level: the desired error level. 49 Default: ErrorLevel.RAISE 50 error_message_context: determines the amount of context to capture from a 51 query string when displaying the error message (in number of characters). 52 Default: 50. 53 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 54 Default: 0 55 alias_post_tablesample: If the table alias comes after tablesample. 56 Default: False 57 max_errors: Maximum number of error messages to include in a raised ParseError. 58 This is only relevant if error_level is ErrorLevel.RAISE. 59 Default: 3 60 null_ordering: Indicates the default null ordering method to use if not explicitly set. 61 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 62 Default: "nulls_are_small" 63 """ 64 65 FUNCTIONS: t.Dict[str, t.Callable] = { 66 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 67 "DATE_TO_DATE_STR": lambda args: exp.Cast( 68 this=seq_get(args, 0), 69 to=exp.DataType(this=exp.DataType.Type.TEXT), 70 ), 71 "TIME_TO_TIME_STR": lambda args: exp.Cast( 72 this=seq_get(args, 0), 73 to=exp.DataType(this=exp.DataType.Type.TEXT), 74 ), 75 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 76 this=exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 start=exp.Literal.number(1), 81 length=exp.Literal.number(10), 82 ), 83 "VAR_MAP": parse_var_map, 84 "IFNULL": exp.Coalesce.from_arg_list, 85 } 86 87 NO_PAREN_FUNCTIONS = { 88 TokenType.CURRENT_DATE: exp.CurrentDate, 89 TokenType.CURRENT_DATETIME: exp.CurrentDate, 90 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 91 } 92 93 NESTED_TYPE_TOKENS = { 94 TokenType.ARRAY, 95 TokenType.MAP, 96 TokenType.STRUCT, 97 TokenType.NULLABLE, 98 } 99 100 TYPE_TOKENS = { 101 TokenType.BOOLEAN, 102 TokenType.TINYINT, 103 TokenType.SMALLINT, 104 TokenType.INT, 105 TokenType.BIGINT, 106 TokenType.FLOAT, 107 TokenType.DOUBLE, 108 TokenType.CHAR, 109 TokenType.NCHAR, 110 TokenType.VARCHAR, 111 TokenType.NVARCHAR, 112 TokenType.TEXT, 113 TokenType.MEDIUMTEXT, 114 TokenType.LONGTEXT, 115 TokenType.MEDIUMBLOB, 116 TokenType.LONGBLOB, 117 TokenType.BINARY, 118 TokenType.VARBINARY, 119 TokenType.JSON, 120 TokenType.JSONB, 121 TokenType.INTERVAL, 122 TokenType.TIME, 123 TokenType.TIMESTAMP, 124 TokenType.TIMESTAMPTZ, 125 TokenType.TIMESTAMPLTZ, 126 TokenType.DATETIME, 127 TokenType.DATE, 128 TokenType.DECIMAL, 129 TokenType.UUID, 130 TokenType.GEOGRAPHY, 131 TokenType.GEOMETRY, 132 TokenType.HLLSKETCH, 133 TokenType.HSTORE, 134 TokenType.PSEUDO_TYPE, 135 TokenType.SUPER, 136 TokenType.SERIAL, 137 TokenType.SMALLSERIAL, 138 TokenType.BIGSERIAL, 139 TokenType.XML, 140 TokenType.UNIQUEIDENTIFIER, 141 TokenType.MONEY, 142 TokenType.SMALLMONEY, 143 TokenType.ROWVERSION, 144 TokenType.IMAGE, 145 TokenType.VARIANT, 146 TokenType.OBJECT, 147 *NESTED_TYPE_TOKENS, 148 } 149 150 SUBQUERY_PREDICATES = { 151 TokenType.ANY: exp.Any, 152 TokenType.ALL: exp.All, 153 TokenType.EXISTS: exp.Exists, 154 TokenType.SOME: exp.Any, 155 } 156 157 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 158 159 ID_VAR_TOKENS = { 160 TokenType.VAR, 161 TokenType.ANTI, 162 TokenType.APPLY, 163 TokenType.AUTO_INCREMENT, 164 TokenType.BEGIN, 165 TokenType.BOTH, 166 TokenType.BUCKET, 167 TokenType.CACHE, 168 TokenType.CASCADE, 169 TokenType.COLLATE, 170 TokenType.COLUMN, 171 TokenType.COMMAND, 172 TokenType.COMMIT, 173 TokenType.COMPOUND, 174 TokenType.CONSTRAINT, 175 TokenType.CURRENT_TIME, 176 TokenType.DEFAULT, 177 TokenType.DELETE, 178 TokenType.DESCRIBE, 179 TokenType.DIV, 180 TokenType.END, 181 TokenType.EXECUTE, 182 TokenType.ESCAPE, 183 TokenType.FALSE, 184 TokenType.FIRST, 185 TokenType.FILTER, 186 TokenType.FOLLOWING, 187 TokenType.FORMAT, 188 TokenType.FUNCTION, 189 TokenType.IF, 190 TokenType.INDEX, 191 TokenType.ISNULL, 192 TokenType.INTERVAL, 193 TokenType.LAZY, 194 TokenType.LEADING, 195 TokenType.LEFT, 196 TokenType.LOCAL, 197 TokenType.MATERIALIZED, 198 TokenType.MERGE, 199 TokenType.NATURAL, 200 TokenType.NEXT, 201 TokenType.OFFSET, 202 TokenType.ONLY, 203 TokenType.OPTIONS, 204 TokenType.ORDINALITY, 205 TokenType.PERCENT, 206 TokenType.PIVOT, 207 TokenType.PRECEDING, 208 TokenType.RANGE, 209 TokenType.REFERENCES, 210 TokenType.RIGHT, 211 TokenType.ROW, 212 TokenType.ROWS, 213 TokenType.SCHEMA, 214 TokenType.SEED, 215 TokenType.SEMI, 216 TokenType.SET, 217 TokenType.SHOW, 218 TokenType.SORTKEY, 219 TokenType.TABLE, 220 TokenType.TEMPORARY, 221 TokenType.TOP, 222 TokenType.TRAILING, 223 TokenType.TRUE, 224 TokenType.UNBOUNDED, 225 TokenType.UNIQUE, 226 TokenType.UNLOGGED, 227 TokenType.UNPIVOT, 228 TokenType.PROCEDURE, 229 TokenType.VIEW, 230 TokenType.VOLATILE, 231 TokenType.WINDOW, 232 *SUBQUERY_PREDICATES, 233 *TYPE_TOKENS, 234 *NO_PAREN_FUNCTIONS, 235 } 236 237 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 238 TokenType.APPLY, 239 TokenType.LEFT, 240 TokenType.NATURAL, 241 TokenType.OFFSET, 242 TokenType.RIGHT, 243 TokenType.WINDOW, 244 } 245 246 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 247 248 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} 249 250 FUNC_TOKENS = { 251 TokenType.COMMAND, 252 TokenType.CURRENT_DATE, 253 TokenType.CURRENT_DATETIME, 254 TokenType.CURRENT_TIMESTAMP, 255 TokenType.CURRENT_TIME, 256 TokenType.FILTER, 257 TokenType.FIRST, 258 TokenType.FORMAT, 259 TokenType.IDENTIFIER, 260 TokenType.INDEX, 261 TokenType.ISNULL, 262 TokenType.ILIKE, 263 TokenType.LIKE, 264 TokenType.MERGE, 265 TokenType.OFFSET, 266 TokenType.PRIMARY_KEY, 267 TokenType.REPLACE, 268 TokenType.ROW, 269 TokenType.UNNEST, 270 TokenType.VAR, 271 TokenType.LEFT, 272 TokenType.RIGHT, 273 TokenType.DATE, 274 TokenType.DATETIME, 275 TokenType.TABLE, 276 TokenType.TIMESTAMP, 277 TokenType.TIMESTAMPTZ, 278 TokenType.WINDOW, 279 *TYPE_TOKENS, 280 *SUBQUERY_PREDICATES, 281 } 282 283 CONJUNCTION = { 284 TokenType.AND: exp.And, 285 TokenType.OR: exp.Or, 286 } 287 288 EQUALITY = { 289 TokenType.EQ: exp.EQ, 290 TokenType.NEQ: exp.NEQ, 291 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 292 } 293 294 COMPARISON = { 295 TokenType.GT: exp.GT, 296 TokenType.GTE: exp.GTE, 297 TokenType.LT: exp.LT, 298 TokenType.LTE: exp.LTE, 299 } 300 301 BITWISE = { 302 TokenType.AMP: exp.BitwiseAnd, 303 TokenType.CARET: exp.BitwiseXor, 304 TokenType.PIPE: exp.BitwiseOr, 305 TokenType.DPIPE: exp.DPipe, 306 } 307 308 TERM = { 309 TokenType.DASH: exp.Sub, 310 TokenType.PLUS: exp.Add, 311 TokenType.MOD: exp.Mod, 312 TokenType.COLLATE: exp.Collate, 313 } 314 315 FACTOR = { 316 TokenType.DIV: exp.IntDiv, 317 TokenType.LR_ARROW: exp.Distance, 318 TokenType.SLASH: exp.Div, 319 TokenType.STAR: exp.Mul, 320 } 321 322 TIMESTAMPS = { 323 TokenType.TIME, 324 TokenType.TIMESTAMP, 325 TokenType.TIMESTAMPTZ, 326 TokenType.TIMESTAMPLTZ, 327 } 328 329 SET_OPERATIONS = { 330 TokenType.UNION, 331 TokenType.INTERSECT, 332 TokenType.EXCEPT, 333 } 334 335 JOIN_SIDES = { 336 TokenType.LEFT, 337 TokenType.RIGHT, 338 TokenType.FULL, 339 } 340 341 JOIN_KINDS = { 342 TokenType.INNER, 343 TokenType.OUTER, 344 TokenType.CROSS, 345 TokenType.SEMI, 346 TokenType.ANTI, 347 } 348 349 LAMBDAS = { 350 TokenType.ARROW: lambda self, expressions: self.expression( 351 exp.Lambda, 352 this=self._parse_conjunction().transform( 353 self._replace_lambda, {node.name for node in expressions} 354 ), 355 expressions=expressions, 356 ), 357 TokenType.FARROW: lambda self, expressions: self.expression( 358 exp.Kwarg, 359 this=exp.Var(this=expressions[0].name), 360 expression=self._parse_conjunction(), 361 ), 362 } 363 364 COLUMN_OPERATORS = { 365 TokenType.DOT: None, 366 TokenType.DCOLON: lambda self, this, to: self.expression( 367 exp.Cast, 368 this=this, 369 to=to, 370 ), 371 TokenType.ARROW: lambda self, this, path: self.expression( 372 exp.JSONExtract, 373 this=this, 374 expression=path, 375 ), 376 TokenType.DARROW: lambda self, this, path: self.expression( 377 exp.JSONExtractScalar, 378 this=this, 379 expression=path, 380 ), 381 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 382 exp.JSONBExtract, 383 this=this, 384 expression=path, 385 ), 386 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 387 exp.JSONBExtractScalar, 388 this=this, 389 expression=path, 390 ), 391 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 392 exp.JSONBContains, 393 this=this, 394 expression=key, 395 ), 396 } 397 398 EXPRESSION_PARSERS = { 399 exp.Column: lambda self: self._parse_column(), 400 exp.DataType: lambda self: self._parse_types(), 401 exp.From: lambda self: self._parse_from(), 402 exp.Group: lambda self: self._parse_group(), 403 exp.Identifier: lambda self: self._parse_id_var(), 404 exp.Lateral: lambda self: self._parse_lateral(), 405 exp.Join: lambda self: self._parse_join(), 406 exp.Order: lambda self: self._parse_order(), 407 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 408 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 409 exp.Lambda: lambda self: self._parse_lambda(), 410 exp.Limit: lambda self: self._parse_limit(), 411 exp.Offset: lambda self: self._parse_offset(), 412 exp.TableAlias: lambda self: self._parse_table_alias(), 413 exp.Table: lambda self: self._parse_table(), 414 exp.Condition: lambda self: self._parse_conjunction(), 415 exp.Expression: lambda self: self._parse_statement(), 416 exp.Properties: lambda self: self._parse_properties(), 417 exp.Where: lambda self: self._parse_where(), 418 exp.Ordered: lambda self: self._parse_ordered(), 419 exp.Having: lambda self: self._parse_having(), 420 exp.With: lambda self: self._parse_with(), 421 exp.Window: lambda self: self._parse_named_window(), 422 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 423 } 424 425 STATEMENT_PARSERS = { 426 TokenType.ALTER: lambda self: self._parse_alter(), 427 TokenType.BEGIN: lambda self: self._parse_transaction(), 428 TokenType.CACHE: lambda self: self._parse_cache(), 429 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 430 TokenType.CREATE: lambda self: self._parse_create(), 431 TokenType.DELETE: lambda self: self._parse_delete(), 432 TokenType.DESC: lambda self: self._parse_describe(), 433 TokenType.DESCRIBE: lambda self: self._parse_describe(), 434 TokenType.DROP: lambda self: self._parse_drop(), 435 TokenType.END: lambda self: self._parse_commit_or_rollback(), 436 TokenType.INSERT: lambda self: self._parse_insert(), 437 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), 438 TokenType.MERGE: lambda self: self._parse_merge(), 439 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 440 TokenType.UNCACHE: lambda self: self._parse_uncache(), 441 TokenType.UPDATE: lambda self: self._parse_update(), 442 TokenType.USE: lambda self: self.expression( 443 exp.Use, 444 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 445 and exp.Var(this=self._prev.text), 446 this=self._parse_table(schema=False), 447 ), 448 } 449 450 UNARY_PARSERS = { 451 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 452 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 453 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 454 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 455 } 456 457 PRIMARY_PARSERS = { 458 TokenType.STRING: lambda self, token: self.expression( 459 exp.Literal, this=token.text, is_string=True 460 ), 461 TokenType.NUMBER: lambda self, token: self.expression( 462 exp.Literal, this=token.text, is_string=False 463 ), 464 TokenType.STAR: lambda self, _: self.expression( 465 exp.Star, 466 **{"except": self._parse_except(), "replace": self._parse_replace()}, 467 ), 468 TokenType.NULL: lambda self, _: self.expression(exp.Null), 469 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 470 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 471 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 472 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 473 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 474 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 475 TokenType.NATIONAL: lambda self, token: self._parse_national(token), 476 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 477 } 478 479 PLACEHOLDER_PARSERS = { 480 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 481 TokenType.PARAMETER: lambda self: self._parse_parameter(), 482 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 483 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 484 else None, 485 } 486 487 RANGE_PARSERS = { 488 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 489 TokenType.GLOB: lambda self, this: self._parse_escape( 490 self.expression(exp.Glob, this=this, expression=self._parse_bitwise()) 491 ), 492 TokenType.IN: lambda self, this: self._parse_in(this), 493 TokenType.IS: lambda self, this: self._parse_is(this), 494 TokenType.LIKE: lambda self, this: self._parse_escape( 495 self.expression(exp.Like, this=this, expression=self._parse_bitwise()) 496 ), 497 TokenType.ILIKE: lambda self, this: self._parse_escape( 498 self.expression(exp.ILike, this=this, expression=self._parse_bitwise()) 499 ), 500 TokenType.IRLIKE: lambda self, this: self.expression( 501 exp.RegexpILike, this=this, expression=self._parse_bitwise() 502 ), 503 TokenType.RLIKE: lambda self, this: self.expression( 504 exp.RegexpLike, this=this, expression=self._parse_bitwise() 505 ), 506 TokenType.SIMILAR_TO: lambda self, this: self.expression( 507 exp.SimilarTo, this=this, expression=self._parse_bitwise() 508 ), 509 } 510 511 PROPERTY_PARSERS = { 512 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 513 "CHARACTER SET": lambda self: self._parse_character_set(), 514 "CLUSTER BY": lambda self: self.expression( 515 exp.Cluster, expressions=self._parse_csv(self._parse_ordered) 516 ), 517 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 518 "PARTITION BY": lambda self: self._parse_partitioned_by(), 519 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 520 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 521 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 522 "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 523 "DISTKEY": lambda self: self._parse_distkey(), 524 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 525 "SORTKEY": lambda self: self._parse_sortkey(), 526 "LIKE": lambda self: self._parse_create_like(), 527 "RETURNS": lambda self: self._parse_returns(), 528 "ROW": lambda self: self._parse_row(), 529 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 530 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 531 "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 532 "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 533 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 534 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 535 "DETERMINISTIC": lambda self: self.expression( 536 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 537 ), 538 "IMMUTABLE": lambda self: self.expression( 539 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 540 ), 541 "STABLE": lambda self: self.expression( 542 exp.VolatilityProperty, this=exp.Literal.string("STABLE") 543 ), 544 "VOLATILE": lambda self: self.expression( 545 exp.VolatilityProperty, this=exp.Literal.string("VOLATILE") 546 ), 547 "WITH": lambda self: self._parse_with_property(), 548 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 549 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), 550 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), 551 "BEFORE": lambda self: self._parse_journal( 552 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 553 ), 554 "JOURNAL": lambda self: self._parse_journal( 555 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 556 ), 557 "AFTER": lambda self: self._parse_afterjournal( 558 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 559 ), 560 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), 561 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), 562 "CHECKSUM": lambda self: self._parse_checksum(), 563 "FREESPACE": lambda self: self._parse_freespace(), 564 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( 565 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" 566 ), 567 "MIN": lambda self: self._parse_datablocksize(), 568 "MINIMUM": lambda self: self._parse_datablocksize(), 569 "MAX": lambda self: self._parse_datablocksize(), 570 "MAXIMUM": lambda self: self._parse_datablocksize(), 571 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( 572 default=self._prev.text.upper() == "DEFAULT" 573 ), 574 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 575 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 576 "DEFINER": lambda self: self._parse_definer(), 577 "LOCK": lambda self: self._parse_locking(), 578 "LOCKING": lambda self: self._parse_locking(), 579 } 580 581 CONSTRAINT_PARSERS = { 582 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 583 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 584 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 585 "CHARACTER SET": lambda self: self.expression( 586 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 587 ), 588 "CHECK": lambda self: self.expression( 589 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 590 ), 591 "COLLATE": lambda self: self.expression( 592 exp.CollateColumnConstraint, this=self._parse_var() 593 ), 594 "COMMENT": lambda self: self.expression( 595 exp.CommentColumnConstraint, this=self._parse_string() 596 ), 597 "DEFAULT": lambda self: self.expression( 598 exp.DefaultColumnConstraint, this=self._parse_bitwise() 599 ), 600 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 601 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 602 "FORMAT": lambda self: self.expression( 603 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 604 ), 605 "GENERATED": lambda self: self._parse_generated_as_identity(), 606 "IDENTITY": lambda self: self._parse_auto_increment(), 607 "LIKE": lambda self: self._parse_create_like(), 608 "NOT": lambda self: self._parse_not_constraint(), 609 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 610 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 611 "PRIMARY KEY": lambda self: self._parse_primary_key(), 612 "TITLE": lambda self: self.expression( 613 exp.TitleColumnConstraint, this=self._parse_var_or_string() 614 ), 615 "UNIQUE": lambda self: self._parse_unique(), 616 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 617 } 618 619 NO_PAREN_FUNCTION_PARSERS = { 620 TokenType.CASE: lambda self: self._parse_case(), 621 TokenType.IF: lambda self: self._parse_if(), 622 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 623 } 624 625 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 626 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 627 "TRY_CONVERT": lambda self: self._parse_convert(False), 628 "EXTRACT": lambda self: self._parse_extract(), 629 "POSITION": lambda self: self._parse_position(), 630 "SUBSTRING": lambda self: self._parse_substring(), 631 "TRIM": lambda self: self._parse_trim(), 632 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 633 "TRY_CAST": lambda self: self._parse_cast(False), 634 "STRING_AGG": lambda self: self._parse_string_agg(), 635 } 636 637 QUERY_MODIFIER_PARSERS = { 638 "match": lambda self: self._parse_match_recognize(), 639 "where": lambda self: self._parse_where(), 640 "group": lambda self: self._parse_group(), 641 "having": lambda self: self._parse_having(), 642 "qualify": lambda self: self._parse_qualify(), 643 "windows": lambda self: self._parse_window_clause(), 644 "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute), 645 "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 646 "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 647 "order": lambda self: self._parse_order(), 648 "limit": lambda self: self._parse_limit(), 649 "offset": lambda self: self._parse_offset(), 650 "lock": lambda self: self._parse_lock(), 651 } 652 653 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 654 SET_PARSERS: t.Dict[str, t.Callable] = {} 655 656 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 657 658 CREATABLES = { 659 TokenType.COLUMN, 660 TokenType.FUNCTION, 661 TokenType.INDEX, 662 TokenType.PROCEDURE, 663 TokenType.SCHEMA, 664 TokenType.TABLE, 665 TokenType.VIEW, 666 } 667 668 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 669 670 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 671 672 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 673 674 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 675 676 STRICT_CAST = True 677 678 __slots__ = ( 679 "error_level", 680 "error_message_context", 681 "sql", 682 "errors", 683 "index_offset", 684 "unnest_column_only", 685 "alias_post_tablesample", 686 "max_errors", 687 "null_ordering", 688 "_tokens", 689 "_index", 690 "_curr", 691 "_next", 692 "_prev", 693 "_prev_comments", 694 "_show_trie", 695 "_set_trie", 696 ) 697 698 def __init__( 699 self, 700 error_level: t.Optional[ErrorLevel] = None, 701 error_message_context: int = 100, 702 index_offset: int = 0, 703 unnest_column_only: bool = False, 704 alias_post_tablesample: bool = False, 705 max_errors: int = 3, 706 null_ordering: t.Optional[str] = None, 707 ): 708 self.error_level = error_level or ErrorLevel.IMMEDIATE 709 self.error_message_context = error_message_context 710 self.index_offset = index_offset 711 self.unnest_column_only = unnest_column_only 712 self.alias_post_tablesample = alias_post_tablesample 713 self.max_errors = max_errors 714 self.null_ordering = null_ordering 715 self.reset() 716 717 def reset(self): 718 self.sql = "" 719 self.errors = [] 720 self._tokens = [] 721 self._index = 0 722 self._curr = None 723 self._next = None 724 self._prev = None 725 self._prev_comments = None 726 727 def parse( 728 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 729 ) -> t.List[t.Optional[exp.Expression]]: 730 """ 731 Parses a list of tokens and returns a list of syntax trees, one tree 732 per parsed SQL statement. 733 734 Args: 735 raw_tokens: the list of tokens. 736 sql: the original SQL string, used to produce helpful debug messages. 737 738 Returns: 739 The list of syntax trees. 740 """ 741 return self._parse( 742 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 743 ) 744 745 def parse_into( 746 self, 747 expression_types: exp.IntoType, 748 raw_tokens: t.List[Token], 749 sql: t.Optional[str] = None, 750 ) -> t.List[t.Optional[exp.Expression]]: 751 """ 752 Parses a list of tokens into a given Expression type. If a collection of Expression 753 types is given instead, this method will try to parse the token list into each one 754 of them, stopping at the first for which the parsing succeeds. 755 756 Args: 757 expression_types: the expression type(s) to try and parse the token list into. 758 raw_tokens: the list of tokens. 759 sql: the original SQL string, used to produce helpful debug messages. 760 761 Returns: 762 The target Expression. 763 """ 764 errors = [] 765 for expression_type in ensure_collection(expression_types): 766 parser = self.EXPRESSION_PARSERS.get(expression_type) 767 if not parser: 768 raise TypeError(f"No parser registered for {expression_type}") 769 try: 770 return self._parse(parser, raw_tokens, sql) 771 except ParseError as e: 772 e.errors[0]["into_expression"] = expression_type 773 errors.append(e) 774 raise ParseError( 775 f"Failed to parse into {expression_types}", 776 errors=merge_errors(errors), 777 ) from errors[-1] 778 779 def _parse( 780 self, 781 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 782 raw_tokens: t.List[Token], 783 sql: t.Optional[str] = None, 784 ) -> t.List[t.Optional[exp.Expression]]: 785 self.reset() 786 self.sql = sql or "" 787 total = len(raw_tokens) 788 chunks: t.List[t.List[Token]] = [[]] 789 790 for i, token in enumerate(raw_tokens): 791 if token.token_type == TokenType.SEMICOLON: 792 if i < total - 1: 793 chunks.append([]) 794 else: 795 chunks[-1].append(token) 796 797 expressions = [] 798 799 for tokens in chunks: 800 self._index = -1 801 self._tokens = tokens 802 self._advance() 803 804 expressions.append(parse_method(self)) 805 806 if self._index < len(self._tokens): 807 self.raise_error("Invalid expression / Unexpected token") 808 809 self.check_errors() 810 811 return expressions 812 813 def check_errors(self) -> None: 814 """ 815 Logs or raises any found errors, depending on the chosen error level setting. 816 """ 817 if self.error_level == ErrorLevel.WARN: 818 for error in self.errors: 819 logger.error(str(error)) 820 elif self.error_level == ErrorLevel.RAISE and self.errors: 821 raise ParseError( 822 concat_messages(self.errors, self.max_errors), 823 errors=merge_errors(self.errors), 824 ) 825 826 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 827 """ 828 Appends an error in the list of recorded errors or raises it, depending on the chosen 829 error level setting. 830 """ 831 token = token or self._curr or self._prev or Token.string("") 832 start = self._find_token(token) 833 end = start + len(token.text) 834 start_context = self.sql[max(start - self.error_message_context, 0) : start] 835 highlight = self.sql[start:end] 836 end_context = self.sql[end : end + self.error_message_context] 837 838 error = ParseError.new( 839 f"{message}. Line {token.line}, Col: {token.col}.\n" 840 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 841 description=message, 842 line=token.line, 843 col=token.col, 844 start_context=start_context, 845 highlight=highlight, 846 end_context=end_context, 847 ) 848 849 if self.error_level == ErrorLevel.IMMEDIATE: 850 raise error 851 852 self.errors.append(error) 853 854 def expression( 855 self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs 856 ) -> exp.Expression: 857 """ 858 Creates a new, validated Expression. 859 860 Args: 861 exp_class: the expression class to instantiate. 862 comments: an optional list of comments to attach to the expression. 863 kwargs: the arguments to set for the expression along with their respective values. 864 865 Returns: 866 The target expression. 867 """ 868 instance = exp_class(**kwargs) 869 if self._prev_comments: 870 instance.comments = self._prev_comments 871 self._prev_comments = None 872 if comments: 873 instance.comments = comments 874 self.validate_expression(instance) 875 return instance 876 877 def validate_expression( 878 self, expression: exp.Expression, args: t.Optional[t.List] = None 879 ) -> None: 880 """ 881 Validates an already instantiated expression, making sure that all its mandatory arguments 882 are set. 883 884 Args: 885 expression: the expression to validate. 886 args: an optional list of items that was used to instantiate the expression, if it's a Func. 887 """ 888 if self.error_level == ErrorLevel.IGNORE: 889 return 890 891 for error_message in expression.error_messages(args): 892 self.raise_error(error_message) 893 894 def _find_sql(self, start: Token, end: Token) -> str: 895 return self.sql[self._find_token(start) : self._find_token(end) + len(end.text)] 896 897 def _find_token(self, token: Token) -> int: 898 line = 1 899 col = 1 900 index = 0 901 902 while line < token.line or col < token.col: 903 if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK: 904 line += 1 905 col = 1 906 else: 907 col += 1 908 index += 1 909 910 return index 911 912 def _advance(self, times: int = 1) -> None: 913 self._index += times 914 self._curr = seq_get(self._tokens, self._index) 915 self._next = seq_get(self._tokens, self._index + 1) 916 if self._index > 0: 917 self._prev = self._tokens[self._index - 1] 918 self._prev_comments = self._prev.comments 919 else: 920 self._prev = None 921 self._prev_comments = None 922 923 def _retreat(self, index: int) -> None: 924 self._advance(index - self._index) 925 926 def _parse_command(self) -> exp.Expression: 927 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 928 929 def _parse_statement(self) -> t.Optional[exp.Expression]: 930 if self._curr is None: 931 return None 932 933 if self._match_set(self.STATEMENT_PARSERS): 934 return self.STATEMENT_PARSERS[self._prev.token_type](self) 935 936 if self._match_set(Tokenizer.COMMANDS): 937 return self._parse_command() 938 939 expression = self._parse_expression() 940 expression = self._parse_set_operations(expression) if expression else self._parse_select() 941 942 self._parse_query_modifiers(expression) 943 return expression 944 945 def _parse_drop(self, default_kind: t.Optional[str] = None) -> t.Optional[exp.Expression]: 946 start = self._prev 947 temporary = self._match(TokenType.TEMPORARY) 948 materialized = self._match(TokenType.MATERIALIZED) 949 kind = self._match_set(self.CREATABLES) and self._prev.text 950 if not kind: 951 if default_kind: 952 kind = default_kind 953 else: 954 return self._parse_as_command(start) 955 956 return self.expression( 957 exp.Drop, 958 exists=self._parse_exists(), 959 this=self._parse_table(schema=True), 960 kind=kind, 961 temporary=temporary, 962 materialized=materialized, 963 cascade=self._match(TokenType.CASCADE), 964 ) 965 966 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 967 return ( 968 self._match(TokenType.IF) 969 and (not not_ or self._match(TokenType.NOT)) 970 and self._match(TokenType.EXISTS) 971 ) 972 973 def _parse_create(self) -> t.Optional[exp.Expression]: 974 start = self._prev 975 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 976 TokenType.OR, TokenType.REPLACE 977 ) 978 set_ = self._match(TokenType.SET) # Teradata 979 multiset = self._match_text_seq("MULTISET") # Teradata 980 global_temporary = self._match_text_seq("GLOBAL", "TEMPORARY") # Teradata 981 volatile = self._match(TokenType.VOLATILE) # Teradata 982 temporary = self._match(TokenType.TEMPORARY) 983 transient = self._match_text_seq("TRANSIENT") 984 external = self._match_text_seq("EXTERNAL") 985 unique = self._match(TokenType.UNIQUE) 986 materialized = self._match(TokenType.MATERIALIZED) 987 988 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 989 self._match(TokenType.TABLE) 990 991 properties = None 992 create_token = self._match_set(self.CREATABLES) and self._prev 993 994 if not create_token: 995 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 996 create_token = self._match_set(self.CREATABLES) and self._prev 997 998 if not properties or not create_token: 999 return self._parse_as_command(start) 1000 1001 exists = self._parse_exists(not_=True) 1002 this = None 1003 expression = None 1004 data = None 1005 statistics = None 1006 no_primary_index = None 1007 indexes = None 1008 no_schema_binding = None 1009 begin = None 1010 1011 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1012 this = self._parse_user_defined_function(kind=create_token.token_type) 1013 properties = self._parse_properties() 1014 1015 self._match(TokenType.ALIAS) 1016 begin = self._match(TokenType.BEGIN) 1017 return_ = self._match_text_seq("RETURN") 1018 expression = self._parse_statement() 1019 1020 if return_: 1021 expression = self.expression(exp.Return, this=expression) 1022 elif create_token.token_type == TokenType.INDEX: 1023 this = self._parse_index() 1024 elif create_token.token_type in ( 1025 TokenType.TABLE, 1026 TokenType.VIEW, 1027 TokenType.SCHEMA, 1028 ): 1029 table_parts = self._parse_table_parts(schema=True) 1030 1031 # exp.Properties.Location.POST_NAME 1032 if self._match(TokenType.COMMA): 1033 temp_properties = self._parse_properties(before=True) 1034 if properties and temp_properties: 1035 properties.expressions.append(temp_properties.expressions) 1036 elif temp_properties: 1037 properties = temp_properties 1038 1039 this = self._parse_schema(this=table_parts) 1040 1041 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1042 temp_properties = self._parse_properties() 1043 if properties and temp_properties: 1044 properties.expressions.append(temp_properties.expressions) 1045 elif temp_properties: 1046 properties = temp_properties 1047 1048 self._match(TokenType.ALIAS) 1049 1050 # exp.Properties.Location.POST_ALIAS 1051 if not ( 1052 self._match(TokenType.SELECT, advance=False) 1053 or self._match(TokenType.WITH, advance=False) 1054 or self._match(TokenType.L_PAREN, advance=False) 1055 ): 1056 temp_properties = self._parse_properties() 1057 if properties and temp_properties: 1058 properties.expressions.append(temp_properties.expressions) 1059 elif temp_properties: 1060 properties = temp_properties 1061 1062 expression = self._parse_ddl_select() 1063 1064 if create_token.token_type == TokenType.TABLE: 1065 if self._match_text_seq("WITH", "DATA"): 1066 data = True 1067 elif self._match_text_seq("WITH", "NO", "DATA"): 1068 data = False 1069 1070 if self._match_text_seq("AND", "STATISTICS"): 1071 statistics = True 1072 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1073 statistics = False 1074 1075 no_primary_index = self._match_text_seq("NO", "PRIMARY", "INDEX") 1076 1077 indexes = [] 1078 while True: 1079 index = self._parse_create_table_index() 1080 1081 # exp.Properties.Location.POST_INDEX 1082 if self._match(TokenType.PARTITION_BY, advance=False): 1083 temp_properties = self._parse_properties() 1084 if properties and temp_properties: 1085 properties.expressions.append(temp_properties.expressions) 1086 elif temp_properties: 1087 properties = temp_properties 1088 1089 if not index: 1090 break 1091 else: 1092 indexes.append(index) 1093 elif create_token.token_type == TokenType.VIEW: 1094 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1095 no_schema_binding = True 1096 1097 return self.expression( 1098 exp.Create, 1099 this=this, 1100 kind=create_token.text, 1101 expression=expression, 1102 set=set_, 1103 multiset=multiset, 1104 global_temporary=global_temporary, 1105 volatile=volatile, 1106 exists=exists, 1107 properties=properties, 1108 temporary=temporary, 1109 transient=transient, 1110 external=external, 1111 replace=replace, 1112 unique=unique, 1113 materialized=materialized, 1114 data=data, 1115 statistics=statistics, 1116 no_primary_index=no_primary_index, 1117 indexes=indexes, 1118 no_schema_binding=no_schema_binding, 1119 begin=begin, 1120 ) 1121 1122 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1123 self._match(TokenType.COMMA) 1124 1125 # parsers look to _prev for no/dual/default, so need to consume first 1126 self._match_text_seq("NO") 1127 self._match_text_seq("DUAL") 1128 self._match_text_seq("DEFAULT") 1129 1130 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): 1131 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) 1132 1133 return None 1134 1135 def _parse_property(self) -> t.Optional[exp.Expression]: 1136 if self._match_texts(self.PROPERTY_PARSERS): 1137 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1138 1139 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1140 return self._parse_character_set(default=True) 1141 1142 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): 1143 return self._parse_sortkey(compound=True) 1144 1145 if self._match_text_seq("SQL", "SECURITY"): 1146 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1147 1148 assignment = self._match_pair( 1149 TokenType.VAR, TokenType.EQ, advance=False 1150 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1151 1152 if assignment: 1153 key = self._parse_var_or_string() 1154 self._match(TokenType.EQ) 1155 return self.expression(exp.Property, this=key, value=self._parse_column()) 1156 1157 return None 1158 1159 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1160 self._match(TokenType.EQ) 1161 self._match(TokenType.ALIAS) 1162 return self.expression( 1163 exp_class, 1164 this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1165 ) 1166 1167 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: 1168 properties = [] 1169 1170 while True: 1171 if before: 1172 identified_property = self._parse_property_before() 1173 else: 1174 identified_property = self._parse_property() 1175 1176 if not identified_property: 1177 break 1178 for p in ensure_collection(identified_property): 1179 properties.append(p) 1180 1181 if properties: 1182 return self.expression(exp.Properties, expressions=properties) 1183 1184 return None 1185 1186 def _parse_fallback(self, no=False) -> exp.Expression: 1187 self._match_text_seq("FALLBACK") 1188 return self.expression( 1189 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1190 ) 1191 1192 def _parse_with_property( 1193 self, 1194 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1195 if self._match(TokenType.L_PAREN, advance=False): 1196 return self._parse_wrapped_csv(self._parse_property) 1197 1198 if not self._next: 1199 return None 1200 1201 if self._next.text.upper() == "JOURNAL": 1202 return self._parse_withjournaltable() 1203 1204 return self._parse_withisolatedloading() 1205 1206 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1207 def _parse_definer(self) -> t.Optional[exp.Expression]: 1208 self._match(TokenType.EQ) 1209 1210 user = self._parse_id_var() 1211 self._match(TokenType.PARAMETER) 1212 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1213 1214 if not user or not host: 1215 return None 1216 1217 return exp.DefinerProperty(this=f"{user}@{host}") 1218 1219 def _parse_withjournaltable(self) -> exp.Expression: 1220 self._match_text_seq("WITH", "JOURNAL", "TABLE") 1221 self._match(TokenType.EQ) 1222 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1223 1224 def _parse_log(self, no=False) -> exp.Expression: 1225 self._match_text_seq("LOG") 1226 return self.expression(exp.LogProperty, no=no) 1227 1228 def _parse_journal(self, no=False, dual=False) -> exp.Expression: 1229 before = self._match_text_seq("BEFORE") 1230 self._match_text_seq("JOURNAL") 1231 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) 1232 1233 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: 1234 self._match_text_seq("NOT") 1235 self._match_text_seq("LOCAL") 1236 self._match_text_seq("AFTER", "JOURNAL") 1237 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) 1238 1239 def _parse_checksum(self) -> exp.Expression: 1240 self._match_text_seq("CHECKSUM") 1241 self._match(TokenType.EQ) 1242 1243 on = None 1244 if self._match(TokenType.ON): 1245 on = True 1246 elif self._match_text_seq("OFF"): 1247 on = False 1248 default = self._match(TokenType.DEFAULT) 1249 1250 return self.expression( 1251 exp.ChecksumProperty, 1252 on=on, 1253 default=default, 1254 ) 1255 1256 def _parse_freespace(self) -> exp.Expression: 1257 self._match_text_seq("FREESPACE") 1258 self._match(TokenType.EQ) 1259 return self.expression( 1260 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1261 ) 1262 1263 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: 1264 self._match_text_seq("MERGEBLOCKRATIO") 1265 if self._match(TokenType.EQ): 1266 return self.expression( 1267 exp.MergeBlockRatioProperty, 1268 this=self._parse_number(), 1269 percent=self._match(TokenType.PERCENT), 1270 ) 1271 else: 1272 return self.expression( 1273 exp.MergeBlockRatioProperty, 1274 no=no, 1275 default=default, 1276 ) 1277 1278 def _parse_datablocksize(self, default=None) -> exp.Expression: 1279 if default: 1280 self._match_text_seq("DATABLOCKSIZE") 1281 return self.expression(exp.DataBlocksizeProperty, default=True) 1282 elif self._match_texts(("MIN", "MINIMUM")): 1283 self._match_text_seq("DATABLOCKSIZE") 1284 return self.expression(exp.DataBlocksizeProperty, min=True) 1285 elif self._match_texts(("MAX", "MAXIMUM")): 1286 self._match_text_seq("DATABLOCKSIZE") 1287 return self.expression(exp.DataBlocksizeProperty, min=False) 1288 1289 self._match_text_seq("DATABLOCKSIZE") 1290 self._match(TokenType.EQ) 1291 size = self._parse_number() 1292 units = None 1293 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1294 units = self._prev.text 1295 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) 1296 1297 def _parse_blockcompression(self) -> exp.Expression: 1298 self._match_text_seq("BLOCKCOMPRESSION") 1299 self._match(TokenType.EQ) 1300 always = self._match_text_seq("ALWAYS") 1301 manual = self._match_text_seq("MANUAL") 1302 never = self._match_text_seq("NEVER") 1303 default = self._match_text_seq("DEFAULT") 1304 autotemp = None 1305 if self._match_text_seq("AUTOTEMP"): 1306 autotemp = self._parse_schema() 1307 1308 return self.expression( 1309 exp.BlockCompressionProperty, 1310 always=always, 1311 manual=manual, 1312 never=never, 1313 default=default, 1314 autotemp=autotemp, 1315 ) 1316 1317 def _parse_withisolatedloading(self) -> exp.Expression: 1318 self._match(TokenType.WITH) 1319 no = self._match_text_seq("NO") 1320 concurrent = self._match_text_seq("CONCURRENT") 1321 self._match_text_seq("ISOLATED", "LOADING") 1322 for_all = self._match_text_seq("FOR", "ALL") 1323 for_insert = self._match_text_seq("FOR", "INSERT") 1324 for_none = self._match_text_seq("FOR", "NONE") 1325 return self.expression( 1326 exp.IsolatedLoadingProperty, 1327 no=no, 1328 concurrent=concurrent, 1329 for_all=for_all, 1330 for_insert=for_insert, 1331 for_none=for_none, 1332 ) 1333 1334 def _parse_locking(self) -> exp.Expression: 1335 if self._match(TokenType.TABLE): 1336 kind = "TABLE" 1337 elif self._match(TokenType.VIEW): 1338 kind = "VIEW" 1339 elif self._match(TokenType.ROW): 1340 kind = "ROW" 1341 elif self._match_text_seq("DATABASE"): 1342 kind = "DATABASE" 1343 else: 1344 kind = None 1345 1346 if kind in ("DATABASE", "TABLE", "VIEW"): 1347 this = self._parse_table_parts() 1348 else: 1349 this = None 1350 1351 if self._match(TokenType.FOR): 1352 for_or_in = "FOR" 1353 elif self._match(TokenType.IN): 1354 for_or_in = "IN" 1355 else: 1356 for_or_in = None 1357 1358 if self._match_text_seq("ACCESS"): 1359 lock_type = "ACCESS" 1360 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1361 lock_type = "EXCLUSIVE" 1362 elif self._match_text_seq("SHARE"): 1363 lock_type = "SHARE" 1364 elif self._match_text_seq("READ"): 1365 lock_type = "READ" 1366 elif self._match_text_seq("WRITE"): 1367 lock_type = "WRITE" 1368 elif self._match_text_seq("CHECKSUM"): 1369 lock_type = "CHECKSUM" 1370 else: 1371 lock_type = None 1372 1373 override = self._match_text_seq("OVERRIDE") 1374 1375 return self.expression( 1376 exp.LockingProperty, 1377 this=this, 1378 kind=kind, 1379 for_or_in=for_or_in, 1380 lock_type=lock_type, 1381 override=override, 1382 ) 1383 1384 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1385 if self._match(TokenType.PARTITION_BY): 1386 return self._parse_csv(self._parse_conjunction) 1387 return [] 1388 1389 def _parse_partitioned_by(self) -> exp.Expression: 1390 self._match(TokenType.EQ) 1391 return self.expression( 1392 exp.PartitionedByProperty, 1393 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1394 ) 1395 1396 def _parse_distkey(self) -> exp.Expression: 1397 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1398 1399 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1400 table = self._parse_table(schema=True) 1401 options = [] 1402 while self._match_texts(("INCLUDING", "EXCLUDING")): 1403 this = self._prev.text.upper() 1404 id_var = self._parse_id_var() 1405 1406 if not id_var: 1407 return None 1408 1409 options.append( 1410 self.expression( 1411 exp.Property, 1412 this=this, 1413 value=exp.Var(this=id_var.this.upper()), 1414 ) 1415 ) 1416 return self.expression(exp.LikeProperty, this=table, expressions=options) 1417 1418 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1419 return self.expression( 1420 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1421 ) 1422 1423 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1424 self._match(TokenType.EQ) 1425 return self.expression( 1426 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1427 ) 1428 1429 def _parse_returns(self) -> exp.Expression: 1430 value: t.Optional[exp.Expression] 1431 is_table = self._match(TokenType.TABLE) 1432 1433 if is_table: 1434 if self._match(TokenType.LT): 1435 value = self.expression( 1436 exp.Schema, 1437 this="TABLE", 1438 expressions=self._parse_csv(self._parse_struct_kwargs), 1439 ) 1440 if not self._match(TokenType.GT): 1441 self.raise_error("Expecting >") 1442 else: 1443 value = self._parse_schema(exp.Var(this="TABLE")) 1444 else: 1445 value = self._parse_types() 1446 1447 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1448 1449 def _parse_describe(self) -> exp.Expression: 1450 kind = self._match_set(self.CREATABLES) and self._prev.text 1451 this = self._parse_table() 1452 1453 return self.expression(exp.Describe, this=this, kind=kind) 1454 1455 def _parse_insert(self) -> exp.Expression: 1456 overwrite = self._match(TokenType.OVERWRITE) 1457 local = self._match(TokenType.LOCAL) 1458 1459 this: t.Optional[exp.Expression] 1460 1461 alternative = None 1462 if self._match_text_seq("DIRECTORY"): 1463 this = self.expression( 1464 exp.Directory, 1465 this=self._parse_var_or_string(), 1466 local=local, 1467 row_format=self._parse_row_format(match_row=True), 1468 ) 1469 else: 1470 if self._match(TokenType.OR): 1471 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1472 1473 self._match(TokenType.INTO) 1474 self._match(TokenType.TABLE) 1475 this = self._parse_table(schema=True) 1476 1477 return self.expression( 1478 exp.Insert, 1479 this=this, 1480 exists=self._parse_exists(), 1481 partition=self._parse_partition(), 1482 expression=self._parse_ddl_select(), 1483 overwrite=overwrite, 1484 alternative=alternative, 1485 ) 1486 1487 def _parse_row(self) -> t.Optional[exp.Expression]: 1488 if not self._match(TokenType.FORMAT): 1489 return None 1490 return self._parse_row_format() 1491 1492 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1493 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1494 return None 1495 1496 if self._match_text_seq("SERDE"): 1497 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1498 1499 self._match_text_seq("DELIMITED") 1500 1501 kwargs = {} 1502 1503 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1504 kwargs["fields"] = self._parse_string() 1505 if self._match_text_seq("ESCAPED", "BY"): 1506 kwargs["escaped"] = self._parse_string() 1507 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1508 kwargs["collection_items"] = self._parse_string() 1509 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1510 kwargs["map_keys"] = self._parse_string() 1511 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1512 kwargs["lines"] = self._parse_string() 1513 if self._match_text_seq("NULL", "DEFINED", "AS"): 1514 kwargs["null"] = self._parse_string() 1515 1516 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1517 1518 def _parse_load_data(self) -> exp.Expression: 1519 local = self._match(TokenType.LOCAL) 1520 self._match_text_seq("INPATH") 1521 inpath = self._parse_string() 1522 overwrite = self._match(TokenType.OVERWRITE) 1523 self._match_pair(TokenType.INTO, TokenType.TABLE) 1524 1525 return self.expression( 1526 exp.LoadData, 1527 this=self._parse_table(schema=True), 1528 local=local, 1529 overwrite=overwrite, 1530 inpath=inpath, 1531 partition=self._parse_partition(), 1532 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1533 serde=self._match_text_seq("SERDE") and self._parse_string(), 1534 ) 1535 1536 def _parse_delete(self) -> exp.Expression: 1537 self._match(TokenType.FROM) 1538 1539 return self.expression( 1540 exp.Delete, 1541 this=self._parse_table(schema=True), 1542 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1543 where=self._parse_where(), 1544 ) 1545 1546 def _parse_update(self) -> exp.Expression: 1547 return self.expression( 1548 exp.Update, 1549 **{ # type: ignore 1550 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1551 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1552 "from": self._parse_from(), 1553 "where": self._parse_where(), 1554 }, 1555 ) 1556 1557 def _parse_uncache(self) -> exp.Expression: 1558 if not self._match(TokenType.TABLE): 1559 self.raise_error("Expecting TABLE after UNCACHE") 1560 1561 return self.expression( 1562 exp.Uncache, 1563 exists=self._parse_exists(), 1564 this=self._parse_table(schema=True), 1565 ) 1566 1567 def _parse_cache(self) -> exp.Expression: 1568 lazy = self._match(TokenType.LAZY) 1569 self._match(TokenType.TABLE) 1570 table = self._parse_table(schema=True) 1571 options = [] 1572 1573 if self._match(TokenType.OPTIONS): 1574 self._match_l_paren() 1575 k = self._parse_string() 1576 self._match(TokenType.EQ) 1577 v = self._parse_string() 1578 options = [k, v] 1579 self._match_r_paren() 1580 1581 self._match(TokenType.ALIAS) 1582 return self.expression( 1583 exp.Cache, 1584 this=table, 1585 lazy=lazy, 1586 options=options, 1587 expression=self._parse_select(nested=True), 1588 ) 1589 1590 def _parse_partition(self) -> t.Optional[exp.Expression]: 1591 if not self._match(TokenType.PARTITION): 1592 return None 1593 1594 return self.expression( 1595 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1596 ) 1597 1598 def _parse_value(self) -> exp.Expression: 1599 if self._match(TokenType.L_PAREN): 1600 expressions = self._parse_csv(self._parse_conjunction) 1601 self._match_r_paren() 1602 return self.expression(exp.Tuple, expressions=expressions) 1603 1604 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1605 # Source: https://prestodb.io/docs/current/sql/values.html 1606 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1607 1608 def _parse_select( 1609 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1610 ) -> t.Optional[exp.Expression]: 1611 cte = self._parse_with() 1612 if cte: 1613 this = self._parse_statement() 1614 1615 if not this: 1616 self.raise_error("Failed to parse any statement following CTE") 1617 return cte 1618 1619 if "with" in this.arg_types: 1620 this.set("with", cte) 1621 else: 1622 self.raise_error(f"{this.key} does not support CTE") 1623 this = cte 1624 elif self._match(TokenType.SELECT): 1625 comments = self._prev_comments 1626 1627 hint = self._parse_hint() 1628 all_ = self._match(TokenType.ALL) 1629 distinct = self._match(TokenType.DISTINCT) 1630 1631 if distinct: 1632 distinct = self.expression( 1633 exp.Distinct, 1634 on=self._parse_value() if self._match(TokenType.ON) else None, 1635 ) 1636 1637 if all_ and distinct: 1638 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1639 1640 limit = self._parse_limit(top=True) 1641 expressions = self._parse_csv(self._parse_expression) 1642 1643 this = self.expression( 1644 exp.Select, 1645 hint=hint, 1646 distinct=distinct, 1647 expressions=expressions, 1648 limit=limit, 1649 ) 1650 this.comments = comments 1651 1652 into = self._parse_into() 1653 if into: 1654 this.set("into", into) 1655 1656 from_ = self._parse_from() 1657 if from_: 1658 this.set("from", from_) 1659 1660 self._parse_query_modifiers(this) 1661 elif (table or nested) and self._match(TokenType.L_PAREN): 1662 this = self._parse_table() if table else self._parse_select(nested=True) 1663 self._parse_query_modifiers(this) 1664 this = self._parse_set_operations(this) 1665 self._match_r_paren() 1666 1667 # early return so that subquery unions aren't parsed again 1668 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1669 # Union ALL should be a property of the top select node, not the subquery 1670 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1671 elif self._match(TokenType.VALUES): 1672 this = self.expression( 1673 exp.Values, 1674 expressions=self._parse_csv(self._parse_value), 1675 alias=self._parse_table_alias(), 1676 ) 1677 else: 1678 this = None 1679 1680 return self._parse_set_operations(this) 1681 1682 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1683 if not skip_with_token and not self._match(TokenType.WITH): 1684 return None 1685 1686 recursive = self._match(TokenType.RECURSIVE) 1687 1688 expressions = [] 1689 while True: 1690 expressions.append(self._parse_cte()) 1691 1692 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1693 break 1694 else: 1695 self._match(TokenType.WITH) 1696 1697 return self.expression(exp.With, expressions=expressions, recursive=recursive) 1698 1699 def _parse_cte(self) -> exp.Expression: 1700 alias = self._parse_table_alias() 1701 if not alias or not alias.this: 1702 self.raise_error("Expected CTE to have alias") 1703 1704 self._match(TokenType.ALIAS) 1705 1706 return self.expression( 1707 exp.CTE, 1708 this=self._parse_wrapped(self._parse_statement), 1709 alias=alias, 1710 ) 1711 1712 def _parse_table_alias( 1713 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1714 ) -> t.Optional[exp.Expression]: 1715 any_token = self._match(TokenType.ALIAS) 1716 alias = self._parse_id_var( 1717 any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 1718 ) 1719 index = self._index 1720 1721 if self._match(TokenType.L_PAREN): 1722 columns = self._parse_csv(self._parse_function_parameter) 1723 self._match_r_paren() if columns else self._retreat(index) 1724 else: 1725 columns = None 1726 1727 if not alias and not columns: 1728 return None 1729 1730 return self.expression(exp.TableAlias, this=alias, columns=columns) 1731 1732 def _parse_subquery( 1733 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1734 ) -> exp.Expression: 1735 return self.expression( 1736 exp.Subquery, 1737 this=this, 1738 pivots=self._parse_pivots(), 1739 alias=self._parse_table_alias() if parse_alias else None, 1740 ) 1741 1742 def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None: 1743 if not isinstance(this, self.MODIFIABLES): 1744 return 1745 1746 table = isinstance(this, exp.Table) 1747 1748 while True: 1749 lateral = self._parse_lateral() 1750 join = self._parse_join() 1751 comma = None if table else self._match(TokenType.COMMA) 1752 if lateral: 1753 this.append("laterals", lateral) 1754 if join: 1755 this.append("joins", join) 1756 if comma: 1757 this.args["from"].append("expressions", self._parse_table()) 1758 if not (lateral or join or comma): 1759 break 1760 1761 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1762 expression = parser(self) 1763 1764 if expression: 1765 this.set(key, expression) 1766 1767 def _parse_hint(self) -> t.Optional[exp.Expression]: 1768 if self._match(TokenType.HINT): 1769 hints = self._parse_csv(self._parse_function) 1770 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 1771 self.raise_error("Expected */ after HINT") 1772 return self.expression(exp.Hint, expressions=hints) 1773 1774 return None 1775 1776 def _parse_into(self) -> t.Optional[exp.Expression]: 1777 if not self._match(TokenType.INTO): 1778 return None 1779 1780 temp = self._match(TokenType.TEMPORARY) 1781 unlogged = self._match(TokenType.UNLOGGED) 1782 self._match(TokenType.TABLE) 1783 1784 return self.expression( 1785 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 1786 ) 1787 1788 def _parse_from(self) -> t.Optional[exp.Expression]: 1789 if not self._match(TokenType.FROM): 1790 return None 1791 1792 return self.expression( 1793 exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table) 1794 ) 1795 1796 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 1797 if not self._match(TokenType.MATCH_RECOGNIZE): 1798 return None 1799 self._match_l_paren() 1800 1801 partition = self._parse_partition_by() 1802 order = self._parse_order() 1803 measures = ( 1804 self._parse_alias(self._parse_conjunction()) 1805 if self._match_text_seq("MEASURES") 1806 else None 1807 ) 1808 1809 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 1810 rows = exp.Var(this="ONE ROW PER MATCH") 1811 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 1812 text = "ALL ROWS PER MATCH" 1813 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 1814 text += f" SHOW EMPTY MATCHES" 1815 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 1816 text += f" OMIT EMPTY MATCHES" 1817 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 1818 text += f" WITH UNMATCHED ROWS" 1819 rows = exp.Var(this=text) 1820 else: 1821 rows = None 1822 1823 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 1824 text = "AFTER MATCH SKIP" 1825 if self._match_text_seq("PAST", "LAST", "ROW"): 1826 text += f" PAST LAST ROW" 1827 elif self._match_text_seq("TO", "NEXT", "ROW"): 1828 text += f" TO NEXT ROW" 1829 elif self._match_text_seq("TO", "FIRST"): 1830 text += f" TO FIRST {self._advance_any().text}" # type: ignore 1831 elif self._match_text_seq("TO", "LAST"): 1832 text += f" TO LAST {self._advance_any().text}" # type: ignore 1833 after = exp.Var(this=text) 1834 else: 1835 after = None 1836 1837 if self._match_text_seq("PATTERN"): 1838 self._match_l_paren() 1839 1840 if not self._curr: 1841 self.raise_error("Expecting )", self._curr) 1842 1843 paren = 1 1844 start = self._curr 1845 1846 while self._curr and paren > 0: 1847 if self._curr.token_type == TokenType.L_PAREN: 1848 paren += 1 1849 if self._curr.token_type == TokenType.R_PAREN: 1850 paren -= 1 1851 end = self._prev 1852 self._advance() 1853 if paren > 0: 1854 self.raise_error("Expecting )", self._curr) 1855 pattern = exp.Var(this=self._find_sql(start, end)) 1856 else: 1857 pattern = None 1858 1859 define = ( 1860 self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None 1861 ) 1862 self._match_r_paren() 1863 1864 return self.expression( 1865 exp.MatchRecognize, 1866 partition_by=partition, 1867 order=order, 1868 measures=measures, 1869 rows=rows, 1870 after=after, 1871 pattern=pattern, 1872 define=define, 1873 ) 1874 1875 def _parse_lateral(self) -> t.Optional[exp.Expression]: 1876 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 1877 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 1878 1879 if outer_apply or cross_apply: 1880 this = self._parse_select(table=True) 1881 view = None 1882 outer = not cross_apply 1883 elif self._match(TokenType.LATERAL): 1884 this = self._parse_select(table=True) 1885 view = self._match(TokenType.VIEW) 1886 outer = self._match(TokenType.OUTER) 1887 else: 1888 return None 1889 1890 if not this: 1891 this = self._parse_function() or self._parse_id_var(any_token=False) 1892 while self._match(TokenType.DOT): 1893 this = exp.Dot( 1894 this=this, 1895 expression=self._parse_function() or self._parse_id_var(any_token=False), 1896 ) 1897 1898 table_alias: t.Optional[exp.Expression] 1899 1900 if view: 1901 table = self._parse_id_var(any_token=False) 1902 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 1903 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 1904 else: 1905 table_alias = self._parse_table_alias() 1906 1907 expression = self.expression( 1908 exp.Lateral, 1909 this=this, 1910 view=view, 1911 outer=outer, 1912 alias=table_alias, 1913 ) 1914 1915 if outer_apply or cross_apply: 1916 return self.expression(exp.Join, this=expression, side=None if cross_apply else "LEFT") 1917 1918 return expression 1919 1920 def _parse_join_side_and_kind( 1921 self, 1922 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 1923 return ( 1924 self._match(TokenType.NATURAL) and self._prev, 1925 self._match_set(self.JOIN_SIDES) and self._prev, 1926 self._match_set(self.JOIN_KINDS) and self._prev, 1927 ) 1928 1929 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 1930 natural, side, kind = self._parse_join_side_and_kind() 1931 1932 if not skip_join_token and not self._match(TokenType.JOIN): 1933 return None 1934 1935 kwargs: t.Dict[ 1936 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 1937 ] = {"this": self._parse_table()} 1938 1939 if natural: 1940 kwargs["natural"] = True 1941 if side: 1942 kwargs["side"] = side.text 1943 if kind: 1944 kwargs["kind"] = kind.text 1945 1946 if self._match(TokenType.ON): 1947 kwargs["on"] = self._parse_conjunction() 1948 elif self._match(TokenType.USING): 1949 kwargs["using"] = self._parse_wrapped_id_vars() 1950 1951 return self.expression(exp.Join, **kwargs) # type: ignore 1952 1953 def _parse_index(self) -> exp.Expression: 1954 index = self._parse_id_var() 1955 self._match(TokenType.ON) 1956 self._match(TokenType.TABLE) # hive 1957 1958 return self.expression( 1959 exp.Index, 1960 this=index, 1961 table=self.expression(exp.Table, this=self._parse_id_var()), 1962 columns=self._parse_expression(), 1963 ) 1964 1965 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: 1966 unique = self._match(TokenType.UNIQUE) 1967 primary = self._match_text_seq("PRIMARY") 1968 amp = self._match_text_seq("AMP") 1969 if not self._match(TokenType.INDEX): 1970 return None 1971 index = self._parse_id_var() 1972 columns = None 1973 if self._match(TokenType.L_PAREN, advance=False): 1974 columns = self._parse_wrapped_csv(self._parse_column) 1975 return self.expression( 1976 exp.Index, 1977 this=index, 1978 columns=columns, 1979 unique=unique, 1980 primary=primary, 1981 amp=amp, 1982 ) 1983 1984 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 1985 catalog = None 1986 db = None 1987 table = (not schema and self._parse_function()) or self._parse_id_var(any_token=False) 1988 1989 while self._match(TokenType.DOT): 1990 if catalog: 1991 # This allows nesting the table in arbitrarily many dot expressions if needed 1992 table = self.expression(exp.Dot, this=table, expression=self._parse_id_var()) 1993 else: 1994 catalog = db 1995 db = table 1996 table = self._parse_id_var() 1997 1998 if not table: 1999 self.raise_error(f"Expected table name but got {self._curr}") 2000 2001 return self.expression( 2002 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2003 ) 2004 2005 def _parse_table( 2006 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2007 ) -> t.Optional[exp.Expression]: 2008 lateral = self._parse_lateral() 2009 2010 if lateral: 2011 return lateral 2012 2013 unnest = self._parse_unnest() 2014 2015 if unnest: 2016 return unnest 2017 2018 values = self._parse_derived_table_values() 2019 2020 if values: 2021 return values 2022 2023 subquery = self._parse_select(table=True) 2024 2025 if subquery: 2026 return subquery 2027 2028 this = self._parse_table_parts(schema=schema) 2029 2030 if schema: 2031 return self._parse_schema(this=this) 2032 2033 if self.alias_post_tablesample: 2034 table_sample = self._parse_table_sample() 2035 2036 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2037 2038 if alias: 2039 this.set("alias", alias) 2040 2041 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2042 this.set( 2043 "hints", 2044 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2045 ) 2046 self._match_r_paren() 2047 2048 if not self.alias_post_tablesample: 2049 table_sample = self._parse_table_sample() 2050 2051 if table_sample: 2052 table_sample.set("this", this) 2053 this = table_sample 2054 2055 return this 2056 2057 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2058 if not self._match(TokenType.UNNEST): 2059 return None 2060 2061 expressions = self._parse_wrapped_csv(self._parse_column) 2062 ordinality = bool(self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY)) 2063 alias = self._parse_table_alias() 2064 2065 if alias and self.unnest_column_only: 2066 if alias.args.get("columns"): 2067 self.raise_error("Unexpected extra column alias in unnest.") 2068 alias.set("columns", [alias.this]) 2069 alias.set("this", None) 2070 2071 offset = None 2072 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2073 self._match(TokenType.ALIAS) 2074 offset = self._parse_conjunction() 2075 2076 return self.expression( 2077 exp.Unnest, 2078 expressions=expressions, 2079 ordinality=ordinality, 2080 alias=alias, 2081 offset=offset, 2082 ) 2083 2084 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2085 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2086 if not is_derived and not self._match(TokenType.VALUES): 2087 return None 2088 2089 expressions = self._parse_csv(self._parse_value) 2090 2091 if is_derived: 2092 self._match_r_paren() 2093 2094 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2095 2096 def _parse_table_sample(self) -> t.Optional[exp.Expression]: 2097 if not self._match(TokenType.TABLE_SAMPLE): 2098 return None 2099 2100 method = self._parse_var() 2101 bucket_numerator = None 2102 bucket_denominator = None 2103 bucket_field = None 2104 percent = None 2105 rows = None 2106 size = None 2107 seed = None 2108 2109 self._match_l_paren() 2110 2111 if self._match(TokenType.BUCKET): 2112 bucket_numerator = self._parse_number() 2113 self._match(TokenType.OUT_OF) 2114 bucket_denominator = bucket_denominator = self._parse_number() 2115 self._match(TokenType.ON) 2116 bucket_field = self._parse_field() 2117 else: 2118 num = self._parse_number() 2119 2120 if self._match(TokenType.PERCENT): 2121 percent = num 2122 elif self._match(TokenType.ROWS): 2123 rows = num 2124 else: 2125 size = num 2126 2127 self._match_r_paren() 2128 2129 if self._match(TokenType.SEED): 2130 seed = self._parse_wrapped(self._parse_number) 2131 2132 return self.expression( 2133 exp.TableSample, 2134 method=method, 2135 bucket_numerator=bucket_numerator, 2136 bucket_denominator=bucket_denominator, 2137 bucket_field=bucket_field, 2138 percent=percent, 2139 rows=rows, 2140 size=size, 2141 seed=seed, 2142 ) 2143 2144 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2145 return list(iter(self._parse_pivot, None)) 2146 2147 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2148 index = self._index 2149 2150 if self._match(TokenType.PIVOT): 2151 unpivot = False 2152 elif self._match(TokenType.UNPIVOT): 2153 unpivot = True 2154 else: 2155 return None 2156 2157 expressions = [] 2158 field = None 2159 2160 if not self._match(TokenType.L_PAREN): 2161 self._retreat(index) 2162 return None 2163 2164 if unpivot: 2165 expressions = self._parse_csv(self._parse_column) 2166 else: 2167 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2168 2169 if not self._match(TokenType.FOR): 2170 self.raise_error("Expecting FOR") 2171 2172 value = self._parse_column() 2173 2174 if not self._match(TokenType.IN): 2175 self.raise_error("Expecting IN") 2176 2177 field = self._parse_in(value) 2178 2179 self._match_r_paren() 2180 2181 return self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2182 2183 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2184 if not skip_where_token and not self._match(TokenType.WHERE): 2185 return None 2186 2187 return self.expression( 2188 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2189 ) 2190 2191 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2192 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2193 return None 2194 2195 elements = defaultdict(list) 2196 2197 while True: 2198 expressions = self._parse_csv(self._parse_conjunction) 2199 if expressions: 2200 elements["expressions"].extend(expressions) 2201 2202 grouping_sets = self._parse_grouping_sets() 2203 if grouping_sets: 2204 elements["grouping_sets"].extend(grouping_sets) 2205 2206 rollup = None 2207 cube = None 2208 2209 with_ = self._match(TokenType.WITH) 2210 if self._match(TokenType.ROLLUP): 2211 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2212 elements["rollup"].extend(ensure_list(rollup)) 2213 2214 if self._match(TokenType.CUBE): 2215 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2216 elements["cube"].extend(ensure_list(cube)) 2217 2218 if not (expressions or grouping_sets or rollup or cube): 2219 break 2220 2221 return self.expression(exp.Group, **elements) # type: ignore 2222 2223 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2224 if not self._match(TokenType.GROUPING_SETS): 2225 return None 2226 2227 return self._parse_wrapped_csv(self._parse_grouping_set) 2228 2229 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2230 if self._match(TokenType.L_PAREN): 2231 grouping_set = self._parse_csv(self._parse_column) 2232 self._match_r_paren() 2233 return self.expression(exp.Tuple, expressions=grouping_set) 2234 2235 return self._parse_column() 2236 2237 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2238 if not skip_having_token and not self._match(TokenType.HAVING): 2239 return None 2240 return self.expression(exp.Having, this=self._parse_conjunction()) 2241 2242 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2243 if not self._match(TokenType.QUALIFY): 2244 return None 2245 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2246 2247 def _parse_order( 2248 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2249 ) -> t.Optional[exp.Expression]: 2250 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2251 return this 2252 2253 return self.expression( 2254 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2255 ) 2256 2257 def _parse_sort( 2258 self, token_type: TokenType, exp_class: t.Type[exp.Expression] 2259 ) -> t.Optional[exp.Expression]: 2260 if not self._match(token_type): 2261 return None 2262 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2263 2264 def _parse_ordered(self) -> exp.Expression: 2265 this = self._parse_conjunction() 2266 self._match(TokenType.ASC) 2267 is_desc = self._match(TokenType.DESC) 2268 is_nulls_first = self._match(TokenType.NULLS_FIRST) 2269 is_nulls_last = self._match(TokenType.NULLS_LAST) 2270 desc = is_desc or False 2271 asc = not desc 2272 nulls_first = is_nulls_first or False 2273 explicitly_null_ordered = is_nulls_first or is_nulls_last 2274 if ( 2275 not explicitly_null_ordered 2276 and ( 2277 (asc and self.null_ordering == "nulls_are_small") 2278 or (desc and self.null_ordering != "nulls_are_small") 2279 ) 2280 and self.null_ordering != "nulls_are_last" 2281 ): 2282 nulls_first = True 2283 2284 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2285 2286 def _parse_limit( 2287 self, this: t.Optional[exp.Expression] = None, top: bool = False 2288 ) -> t.Optional[exp.Expression]: 2289 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2290 limit_paren = self._match(TokenType.L_PAREN) 2291 limit_exp = self.expression( 2292 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2293 ) 2294 2295 if limit_paren: 2296 self._match_r_paren() 2297 2298 return limit_exp 2299 2300 if self._match(TokenType.FETCH): 2301 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2302 direction = self._prev.text if direction else "FIRST" 2303 count = self._parse_number() 2304 self._match_set((TokenType.ROW, TokenType.ROWS)) 2305 self._match(TokenType.ONLY) 2306 return self.expression(exp.Fetch, direction=direction, count=count) 2307 2308 return this 2309 2310 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2311 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2312 return this 2313 2314 count = self._parse_number() 2315 self._match_set((TokenType.ROW, TokenType.ROWS)) 2316 return self.expression(exp.Offset, this=this, expression=count) 2317 2318 def _parse_lock(self) -> t.Optional[exp.Expression]: 2319 if self._match_text_seq("FOR", "UPDATE"): 2320 return self.expression(exp.Lock, update=True) 2321 if self._match_text_seq("FOR", "SHARE"): 2322 return self.expression(exp.Lock, update=False) 2323 2324 return None 2325 2326 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2327 if not self._match_set(self.SET_OPERATIONS): 2328 return this 2329 2330 token_type = self._prev.token_type 2331 2332 if token_type == TokenType.UNION: 2333 expression = exp.Union 2334 elif token_type == TokenType.EXCEPT: 2335 expression = exp.Except 2336 else: 2337 expression = exp.Intersect 2338 2339 return self.expression( 2340 expression, 2341 this=this, 2342 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2343 expression=self._parse_set_operations(self._parse_select(nested=True)), 2344 ) 2345 2346 def _parse_expression(self) -> t.Optional[exp.Expression]: 2347 return self._parse_alias(self._parse_conjunction()) 2348 2349 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2350 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2351 2352 def _parse_equality(self) -> t.Optional[exp.Expression]: 2353 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2354 2355 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2356 return self._parse_tokens(self._parse_range, self.COMPARISON) 2357 2358 def _parse_range(self) -> t.Optional[exp.Expression]: 2359 this = self._parse_bitwise() 2360 negate = self._match(TokenType.NOT) 2361 2362 if self._match_set(self.RANGE_PARSERS): 2363 this = self.RANGE_PARSERS[self._prev.token_type](self, this) 2364 elif self._match(TokenType.ISNULL): 2365 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2366 2367 # Postgres supports ISNULL and NOTNULL for conditions. 2368 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2369 if self._match(TokenType.NOTNULL): 2370 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2371 this = self.expression(exp.Not, this=this) 2372 2373 if negate: 2374 this = self.expression(exp.Not, this=this) 2375 2376 if self._match(TokenType.IS): 2377 this = self._parse_is(this) 2378 2379 return this 2380 2381 def _parse_is(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2382 negate = self._match(TokenType.NOT) 2383 if self._match(TokenType.DISTINCT_FROM): 2384 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2385 return self.expression(klass, this=this, expression=self._parse_expression()) 2386 2387 this = self.expression( 2388 exp.Is, 2389 this=this, 2390 expression=self._parse_null() or self._parse_boolean(), 2391 ) 2392 return self.expression(exp.Not, this=this) if negate else this 2393 2394 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2395 unnest = self._parse_unnest() 2396 if unnest: 2397 this = self.expression(exp.In, this=this, unnest=unnest) 2398 elif self._match(TokenType.L_PAREN): 2399 expressions = self._parse_csv(self._parse_select_or_expression) 2400 2401 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2402 this = self.expression(exp.In, this=this, query=expressions[0]) 2403 else: 2404 this = self.expression(exp.In, this=this, expressions=expressions) 2405 2406 self._match_r_paren() 2407 else: 2408 this = self.expression(exp.In, this=this, field=self._parse_field()) 2409 2410 return this 2411 2412 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2413 low = self._parse_bitwise() 2414 self._match(TokenType.AND) 2415 high = self._parse_bitwise() 2416 return self.expression(exp.Between, this=this, low=low, high=high) 2417 2418 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2419 if not self._match(TokenType.ESCAPE): 2420 return this 2421 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2422 2423 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2424 this = self._parse_term() 2425 2426 while True: 2427 if self._match_set(self.BITWISE): 2428 this = self.expression( 2429 self.BITWISE[self._prev.token_type], 2430 this=this, 2431 expression=self._parse_term(), 2432 ) 2433 elif self._match_pair(TokenType.LT, TokenType.LT): 2434 this = self.expression( 2435 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2436 ) 2437 elif self._match_pair(TokenType.GT, TokenType.GT): 2438 this = self.expression( 2439 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2440 ) 2441 else: 2442 break 2443 2444 return this 2445 2446 def _parse_term(self) -> t.Optional[exp.Expression]: 2447 return self._parse_tokens(self._parse_factor, self.TERM) 2448 2449 def _parse_factor(self) -> t.Optional[exp.Expression]: 2450 return self._parse_tokens(self._parse_unary, self.FACTOR) 2451 2452 def _parse_unary(self) -> t.Optional[exp.Expression]: 2453 if self._match_set(self.UNARY_PARSERS): 2454 return self.UNARY_PARSERS[self._prev.token_type](self) 2455 return self._parse_at_time_zone(self._parse_type()) 2456 2457 def _parse_type(self) -> t.Optional[exp.Expression]: 2458 if self._match(TokenType.INTERVAL): 2459 return self.expression(exp.Interval, this=self._parse_term(), unit=self._parse_var()) 2460 2461 index = self._index 2462 type_token = self._parse_types(check_func=True) 2463 this = self._parse_column() 2464 2465 if type_token: 2466 if this and not isinstance(this, exp.Star): 2467 return self.expression(exp.Cast, this=this, to=type_token) 2468 if not type_token.args.get("expressions"): 2469 self._retreat(index) 2470 return self._parse_column() 2471 return type_token 2472 2473 return this 2474 2475 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2476 index = self._index 2477 2478 prefix = self._match_text_seq("SYSUDTLIB", ".") 2479 2480 if not self._match_set(self.TYPE_TOKENS): 2481 return None 2482 2483 type_token = self._prev.token_type 2484 2485 if type_token == TokenType.PSEUDO_TYPE: 2486 return self.expression(exp.PseudoType, this=self._prev.text) 2487 2488 nested = type_token in self.NESTED_TYPE_TOKENS 2489 is_struct = type_token == TokenType.STRUCT 2490 expressions = None 2491 maybe_func = False 2492 2493 if self._match(TokenType.L_PAREN): 2494 if is_struct: 2495 expressions = self._parse_csv(self._parse_struct_kwargs) 2496 elif nested: 2497 expressions = self._parse_csv(self._parse_types) 2498 else: 2499 expressions = self._parse_csv(self._parse_conjunction) 2500 2501 if not expressions: 2502 self._retreat(index) 2503 return None 2504 2505 self._match_r_paren() 2506 maybe_func = True 2507 2508 if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2509 this = exp.DataType( 2510 this=exp.DataType.Type.ARRAY, 2511 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2512 nested=True, 2513 ) 2514 2515 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2516 this = exp.DataType( 2517 this=exp.DataType.Type.ARRAY, 2518 expressions=[this], 2519 nested=True, 2520 ) 2521 2522 return this 2523 2524 if self._match(TokenType.L_BRACKET): 2525 self._retreat(index) 2526 return None 2527 2528 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2529 if nested and self._match(TokenType.LT): 2530 if is_struct: 2531 expressions = self._parse_csv(self._parse_struct_kwargs) 2532 else: 2533 expressions = self._parse_csv(self._parse_types) 2534 2535 if not self._match(TokenType.GT): 2536 self.raise_error("Expecting >") 2537 2538 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2539 values = self._parse_csv(self._parse_conjunction) 2540 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2541 2542 value: t.Optional[exp.Expression] = None 2543 if type_token in self.TIMESTAMPS: 2544 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: 2545 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2546 elif ( 2547 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ 2548 ): 2549 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2550 elif self._match(TokenType.WITHOUT_TIME_ZONE): 2551 if type_token == TokenType.TIME: 2552 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2553 else: 2554 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2555 2556 maybe_func = maybe_func and value is None 2557 2558 if value is None: 2559 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2560 elif type_token == TokenType.INTERVAL: 2561 value = self.expression(exp.Interval, unit=self._parse_var()) 2562 2563 if maybe_func and check_func: 2564 index2 = self._index 2565 peek = self._parse_string() 2566 2567 if not peek: 2568 self._retreat(index) 2569 return None 2570 2571 self._retreat(index2) 2572 2573 if value: 2574 return value 2575 2576 return exp.DataType( 2577 this=exp.DataType.Type[type_token.value.upper()], 2578 expressions=expressions, 2579 nested=nested, 2580 values=values, 2581 prefix=prefix, 2582 ) 2583 2584 def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]: 2585 if self._curr and self._curr.token_type in self.TYPE_TOKENS: 2586 return self._parse_types() 2587 2588 this = self._parse_id_var() 2589 self._match(TokenType.COLON) 2590 data_type = self._parse_types() 2591 2592 if not data_type: 2593 return None 2594 return self.expression(exp.StructKwarg, this=this, expression=data_type) 2595 2596 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2597 if not self._match(TokenType.AT_TIME_ZONE): 2598 return this 2599 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 2600 2601 def _parse_column(self) -> t.Optional[exp.Expression]: 2602 this = self._parse_field() 2603 if isinstance(this, exp.Identifier): 2604 this = self.expression(exp.Column, this=this) 2605 elif not this: 2606 return self._parse_bracket(this) 2607 this = self._parse_bracket(this) 2608 2609 while self._match_set(self.COLUMN_OPERATORS): 2610 op_token = self._prev.token_type 2611 op = self.COLUMN_OPERATORS.get(op_token) 2612 2613 if op_token == TokenType.DCOLON: 2614 field = self._parse_types() 2615 if not field: 2616 self.raise_error("Expected type") 2617 elif op: 2618 self._advance() 2619 value = self._prev.text 2620 field = ( 2621 exp.Literal.number(value) 2622 if self._prev.token_type == TokenType.NUMBER 2623 else exp.Literal.string(value) 2624 ) 2625 else: 2626 field = self._parse_star() or self._parse_function() or self._parse_id_var() 2627 2628 if isinstance(field, exp.Func): 2629 # bigquery allows function calls like x.y.count(...) 2630 # SAFE.SUBSTR(...) 2631 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 2632 this = self._replace_columns_with_dots(this) 2633 2634 if op: 2635 this = op(self, this, field) 2636 elif isinstance(this, exp.Column) and not this.args.get("schema"): 2637 this = self.expression( 2638 exp.Column, this=field, table=this.this, schema=this.args.get("table") 2639 ) 2640 else: 2641 this = self.expression(exp.Dot, this=this, expression=field) 2642 this = self._parse_bracket(this) 2643 2644 return this 2645 2646 def _parse_primary(self) -> t.Optional[exp.Expression]: 2647 if self._match_set(self.PRIMARY_PARSERS): 2648 token_type = self._prev.token_type 2649 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 2650 2651 if token_type == TokenType.STRING: 2652 expressions = [primary] 2653 while self._match(TokenType.STRING): 2654 expressions.append(exp.Literal.string(self._prev.text)) 2655 if len(expressions) > 1: 2656 return self.expression(exp.Concat, expressions=expressions) 2657 return primary 2658 2659 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 2660 return exp.Literal.number(f"0.{self._prev.text}") 2661 2662 if self._match(TokenType.L_PAREN): 2663 comments = self._prev_comments 2664 query = self._parse_select() 2665 2666 if query: 2667 expressions = [query] 2668 else: 2669 expressions = self._parse_csv( 2670 lambda: self._parse_alias(self._parse_conjunction(), explicit=True) 2671 ) 2672 2673 this = seq_get(expressions, 0) 2674 self._parse_query_modifiers(this) 2675 self._match_r_paren() 2676 2677 if isinstance(this, exp.Subqueryable): 2678 this = self._parse_set_operations( 2679 self._parse_subquery(this=this, parse_alias=False) 2680 ) 2681 elif len(expressions) > 1: 2682 this = self.expression(exp.Tuple, expressions=expressions) 2683 else: 2684 this = self.expression(exp.Paren, this=this) 2685 2686 if this and comments: 2687 this.comments = comments 2688 2689 return this 2690 2691 return None 2692 2693 def _parse_field(self, any_token: bool = False) -> t.Optional[exp.Expression]: 2694 return self._parse_primary() or self._parse_function() or self._parse_id_var(any_token) 2695 2696 def _parse_function( 2697 self, functions: t.Optional[t.Dict[str, t.Callable]] = None 2698 ) -> t.Optional[exp.Expression]: 2699 if not self._curr: 2700 return None 2701 2702 token_type = self._curr.token_type 2703 2704 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 2705 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 2706 2707 if not self._next or self._next.token_type != TokenType.L_PAREN: 2708 if token_type in self.NO_PAREN_FUNCTIONS: 2709 self._advance() 2710 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 2711 2712 return None 2713 2714 if token_type not in self.FUNC_TOKENS: 2715 return None 2716 2717 this = self._curr.text 2718 upper = this.upper() 2719 self._advance(2) 2720 2721 parser = self.FUNCTION_PARSERS.get(upper) 2722 2723 if parser: 2724 this = parser(self) 2725 else: 2726 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 2727 2728 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 2729 this = self.expression(subquery_predicate, this=self._parse_select()) 2730 self._match_r_paren() 2731 return this 2732 2733 if functions is None: 2734 functions = self.FUNCTIONS 2735 2736 function = functions.get(upper) 2737 args = self._parse_csv(self._parse_lambda) 2738 2739 if function: 2740 # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the 2741 # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists. 2742 if count_params(function) == 2: 2743 params = None 2744 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 2745 params = self._parse_csv(self._parse_lambda) 2746 2747 this = function(args, params) 2748 else: 2749 this = function(args) 2750 2751 self.validate_expression(this, args) 2752 else: 2753 this = self.expression(exp.Anonymous, this=this, expressions=args) 2754 2755 self._match_r_paren(this) 2756 return self._parse_window(this) 2757 2758 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 2759 return self._parse_column_def(self._parse_id_var()) 2760 2761 def _parse_user_defined_function( 2762 self, kind: t.Optional[TokenType] = None 2763 ) -> t.Optional[exp.Expression]: 2764 this = self._parse_id_var() 2765 2766 while self._match(TokenType.DOT): 2767 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 2768 2769 if not self._match(TokenType.L_PAREN): 2770 return this 2771 2772 expressions = self._parse_csv(self._parse_function_parameter) 2773 self._match_r_paren() 2774 return self.expression( 2775 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 2776 ) 2777 2778 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 2779 literal = self._parse_primary() 2780 if literal: 2781 return self.expression(exp.Introducer, this=token.text, expression=literal) 2782 2783 return self.expression(exp.Identifier, this=token.text) 2784 2785 def _parse_national(self, token: Token) -> exp.Expression: 2786 return self.expression(exp.National, this=exp.Literal.string(token.text)) 2787 2788 def _parse_session_parameter(self) -> exp.Expression: 2789 kind = None 2790 this = self._parse_id_var() or self._parse_primary() 2791 2792 if this and self._match(TokenType.DOT): 2793 kind = this.name 2794 this = self._parse_var() or self._parse_primary() 2795 2796 return self.expression(exp.SessionParameter, this=this, kind=kind) 2797 2798 def _parse_lambda(self) -> t.Optional[exp.Expression]: 2799 index = self._index 2800 2801 if self._match(TokenType.L_PAREN): 2802 expressions = self._parse_csv(self._parse_id_var) 2803 2804 if not self._match(TokenType.R_PAREN): 2805 self._retreat(index) 2806 else: 2807 expressions = [self._parse_id_var()] 2808 2809 if self._match_set(self.LAMBDAS): 2810 return self.LAMBDAS[self._prev.token_type](self, expressions) 2811 2812 self._retreat(index) 2813 2814 this: t.Optional[exp.Expression] 2815 2816 if self._match(TokenType.DISTINCT): 2817 this = self.expression( 2818 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 2819 ) 2820 else: 2821 this = self._parse_select_or_expression() 2822 2823 if self._match(TokenType.IGNORE_NULLS): 2824 this = self.expression(exp.IgnoreNulls, this=this) 2825 else: 2826 self._match(TokenType.RESPECT_NULLS) 2827 2828 return self._parse_limit(self._parse_order(this)) 2829 2830 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2831 index = self._index 2832 if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT): 2833 self._retreat(index) 2834 return this 2835 2836 args = self._parse_csv( 2837 lambda: self._parse_constraint() 2838 or self._parse_column_def(self._parse_field(any_token=True)) 2839 ) 2840 self._match_r_paren() 2841 return self.expression(exp.Schema, this=this, expressions=args) 2842 2843 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2844 kind = self._parse_types() 2845 2846 if self._match_text_seq("FOR", "ORDINALITY"): 2847 return self.expression(exp.ColumnDef, this=this, ordinality=True) 2848 2849 constraints = [] 2850 while True: 2851 constraint = self._parse_column_constraint() 2852 if not constraint: 2853 break 2854 constraints.append(constraint) 2855 2856 if not kind and not constraints: 2857 return this 2858 2859 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 2860 2861 def _parse_auto_increment(self) -> exp.Expression: 2862 start = None 2863 increment = None 2864 2865 if self._match(TokenType.L_PAREN, advance=False): 2866 args = self._parse_wrapped_csv(self._parse_bitwise) 2867 start = seq_get(args, 0) 2868 increment = seq_get(args, 1) 2869 elif self._match_text_seq("START"): 2870 start = self._parse_bitwise() 2871 self._match_text_seq("INCREMENT") 2872 increment = self._parse_bitwise() 2873 2874 if start and increment: 2875 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 2876 2877 return exp.AutoIncrementColumnConstraint() 2878 2879 def _parse_generated_as_identity(self) -> exp.Expression: 2880 if self._match(TokenType.BY_DEFAULT): 2881 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False) 2882 else: 2883 self._match_text_seq("ALWAYS") 2884 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 2885 2886 self._match_text_seq("AS", "IDENTITY") 2887 if self._match(TokenType.L_PAREN): 2888 if self._match_text_seq("START", "WITH"): 2889 this.set("start", self._parse_bitwise()) 2890 if self._match_text_seq("INCREMENT", "BY"): 2891 this.set("increment", self._parse_bitwise()) 2892 if self._match_text_seq("MINVALUE"): 2893 this.set("minvalue", self._parse_bitwise()) 2894 if self._match_text_seq("MAXVALUE"): 2895 this.set("maxvalue", self._parse_bitwise()) 2896 2897 if self._match_text_seq("CYCLE"): 2898 this.set("cycle", True) 2899 elif self._match_text_seq("NO", "CYCLE"): 2900 this.set("cycle", False) 2901 2902 self._match_r_paren() 2903 2904 return this 2905 2906 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 2907 if self._match_text_seq("NULL"): 2908 return self.expression(exp.NotNullColumnConstraint) 2909 if self._match_text_seq("CASESPECIFIC"): 2910 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 2911 return None 2912 2913 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 2914 this = self._parse_references() 2915 if this: 2916 return this 2917 2918 if self._match(TokenType.CONSTRAINT): 2919 this = self._parse_id_var() 2920 2921 if self._match_texts(self.CONSTRAINT_PARSERS): 2922 return self.expression( 2923 exp.ColumnConstraint, 2924 this=this, 2925 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 2926 ) 2927 2928 return this 2929 2930 def _parse_constraint(self) -> t.Optional[exp.Expression]: 2931 if not self._match(TokenType.CONSTRAINT): 2932 return self._parse_unnamed_constraint() 2933 2934 this = self._parse_id_var() 2935 expressions = [] 2936 2937 while True: 2938 constraint = self._parse_unnamed_constraint() or self._parse_function() 2939 if not constraint: 2940 break 2941 expressions.append(constraint) 2942 2943 return self.expression(exp.Constraint, this=this, expressions=expressions) 2944 2945 def _parse_unnamed_constraint(self) -> t.Optional[exp.Expression]: 2946 if not self._match_texts(self.CONSTRAINT_PARSERS): 2947 return None 2948 return self.CONSTRAINT_PARSERS[self._prev.text.upper()](self) 2949 2950 def _parse_unique(self) -> exp.Expression: 2951 if not self._match(TokenType.L_PAREN, advance=False): 2952 return self.expression(exp.UniqueColumnConstraint) 2953 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 2954 2955 def _parse_key_constraint_options(self) -> t.List[str]: 2956 options = [] 2957 while True: 2958 if not self._curr: 2959 break 2960 2961 if self._match(TokenType.ON): 2962 action = None 2963 on = self._advance_any() and self._prev.text 2964 2965 if self._match(TokenType.NO_ACTION): 2966 action = "NO ACTION" 2967 elif self._match(TokenType.CASCADE): 2968 action = "CASCADE" 2969 elif self._match_pair(TokenType.SET, TokenType.NULL): 2970 action = "SET NULL" 2971 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 2972 action = "SET DEFAULT" 2973 else: 2974 self.raise_error("Invalid key constraint") 2975 2976 options.append(f"ON {on} {action}") 2977 elif self._match_text_seq("NOT", "ENFORCED"): 2978 options.append("NOT ENFORCED") 2979 elif self._match_text_seq("DEFERRABLE"): 2980 options.append("DEFERRABLE") 2981 elif self._match_text_seq("INITIALLY", "DEFERRED"): 2982 options.append("INITIALLY DEFERRED") 2983 elif self._match_text_seq("NORELY"): 2984 options.append("NORELY") 2985 elif self._match_text_seq("MATCH", "FULL"): 2986 options.append("MATCH FULL") 2987 else: 2988 break 2989 2990 return options 2991 2992 def _parse_references(self) -> t.Optional[exp.Expression]: 2993 if not self._match(TokenType.REFERENCES): 2994 return None 2995 2996 expressions = None 2997 this = self._parse_id_var() 2998 2999 if self._match(TokenType.L_PAREN, advance=False): 3000 expressions = self._parse_wrapped_id_vars() 3001 3002 options = self._parse_key_constraint_options() 3003 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3004 3005 def _parse_foreign_key(self) -> exp.Expression: 3006 expressions = self._parse_wrapped_id_vars() 3007 reference = self._parse_references() 3008 options = {} 3009 3010 while self._match(TokenType.ON): 3011 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3012 self.raise_error("Expected DELETE or UPDATE") 3013 3014 kind = self._prev.text.lower() 3015 3016 if self._match(TokenType.NO_ACTION): 3017 action = "NO ACTION" 3018 elif self._match(TokenType.SET): 3019 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3020 action = "SET " + self._prev.text.upper() 3021 else: 3022 self._advance() 3023 action = self._prev.text.upper() 3024 3025 options[kind] = action 3026 3027 return self.expression( 3028 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3029 ) 3030 3031 def _parse_primary_key(self) -> exp.Expression: 3032 desc = ( 3033 self._match_set((TokenType.ASC, TokenType.DESC)) 3034 and self._prev.token_type == TokenType.DESC 3035 ) 3036 3037 if not self._match(TokenType.L_PAREN, advance=False): 3038 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3039 3040 expressions = self._parse_wrapped_id_vars() 3041 options = self._parse_key_constraint_options() 3042 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3043 3044 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3045 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3046 return this 3047 3048 bracket_kind = self._prev.token_type 3049 expressions: t.List[t.Optional[exp.Expression]] 3050 3051 if self._match(TokenType.COLON): 3052 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3053 else: 3054 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3055 3056 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3057 if bracket_kind == TokenType.L_BRACE: 3058 this = self.expression(exp.Struct, expressions=expressions) 3059 elif not this or this.name.upper() == "ARRAY": 3060 this = self.expression(exp.Array, expressions=expressions) 3061 else: 3062 expressions = apply_index_offset(expressions, -self.index_offset) 3063 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3064 3065 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3066 self.raise_error("Expected ]") 3067 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3068 self.raise_error("Expected }") 3069 3070 this.comments = self._prev_comments 3071 return self._parse_bracket(this) 3072 3073 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3074 if self._match(TokenType.COLON): 3075 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3076 return this 3077 3078 def _parse_case(self) -> t.Optional[exp.Expression]: 3079 ifs = [] 3080 default = None 3081 3082 expression = self._parse_conjunction() 3083 3084 while self._match(TokenType.WHEN): 3085 this = self._parse_conjunction() 3086 self._match(TokenType.THEN) 3087 then = self._parse_conjunction() 3088 ifs.append(self.expression(exp.If, this=this, true=then)) 3089 3090 if self._match(TokenType.ELSE): 3091 default = self._parse_conjunction() 3092 3093 if not self._match(TokenType.END): 3094 self.raise_error("Expected END after CASE", self._prev) 3095 3096 return self._parse_window( 3097 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3098 ) 3099 3100 def _parse_if(self) -> t.Optional[exp.Expression]: 3101 if self._match(TokenType.L_PAREN): 3102 args = self._parse_csv(self._parse_conjunction) 3103 this = exp.If.from_arg_list(args) 3104 self.validate_expression(this, args) 3105 self._match_r_paren() 3106 else: 3107 condition = self._parse_conjunction() 3108 self._match(TokenType.THEN) 3109 true = self._parse_conjunction() 3110 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3111 self._match(TokenType.END) 3112 this = self.expression(exp.If, this=condition, true=true, false=false) 3113 3114 return self._parse_window(this) 3115 3116 def _parse_extract(self) -> exp.Expression: 3117 this = self._parse_function() or self._parse_var() or self._parse_type() 3118 3119 if self._match(TokenType.FROM): 3120 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3121 3122 if not self._match(TokenType.COMMA): 3123 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3124 3125 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3126 3127 def _parse_cast(self, strict: bool) -> exp.Expression: 3128 this = self._parse_conjunction() 3129 3130 if not self._match(TokenType.ALIAS): 3131 self.raise_error("Expected AS after CAST") 3132 3133 to = self._parse_types() 3134 3135 if not to: 3136 self.raise_error("Expected TYPE after CAST") 3137 elif to.this == exp.DataType.Type.CHAR: 3138 if self._match(TokenType.CHARACTER_SET): 3139 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3140 3141 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3142 3143 def _parse_string_agg(self) -> exp.Expression: 3144 expression: t.Optional[exp.Expression] 3145 3146 if self._match(TokenType.DISTINCT): 3147 args = self._parse_csv(self._parse_conjunction) 3148 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3149 else: 3150 args = self._parse_csv(self._parse_conjunction) 3151 expression = seq_get(args, 0) 3152 3153 index = self._index 3154 if not self._match(TokenType.R_PAREN): 3155 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3156 order = self._parse_order(this=expression) 3157 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3158 3159 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3160 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3161 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3162 if not self._match(TokenType.WITHIN_GROUP): 3163 self._retreat(index) 3164 this = exp.GroupConcat.from_arg_list(args) 3165 self.validate_expression(this, args) 3166 return this 3167 3168 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3169 order = self._parse_order(this=expression) 3170 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3171 3172 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3173 to: t.Optional[exp.Expression] 3174 this = self._parse_column() 3175 3176 if self._match(TokenType.USING): 3177 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3178 elif self._match(TokenType.COMMA): 3179 to = self._parse_types() 3180 else: 3181 to = None 3182 3183 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3184 3185 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3186 args = self._parse_csv(self._parse_bitwise) 3187 3188 if self._match(TokenType.IN): 3189 return self.expression( 3190 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3191 ) 3192 3193 if haystack_first: 3194 haystack = seq_get(args, 0) 3195 needle = seq_get(args, 1) 3196 else: 3197 needle = seq_get(args, 0) 3198 haystack = seq_get(args, 1) 3199 3200 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3201 3202 self.validate_expression(this, args) 3203 3204 return this 3205 3206 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3207 args = self._parse_csv(self._parse_table) 3208 return exp.JoinHint(this=func_name.upper(), expressions=args) 3209 3210 def _parse_substring(self) -> exp.Expression: 3211 # Postgres supports the form: substring(string [from int] [for int]) 3212 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3213 3214 args = self._parse_csv(self._parse_bitwise) 3215 3216 if self._match(TokenType.FROM): 3217 args.append(self._parse_bitwise()) 3218 if self._match(TokenType.FOR): 3219 args.append(self._parse_bitwise()) 3220 3221 this = exp.Substring.from_arg_list(args) 3222 self.validate_expression(this, args) 3223 3224 return this 3225 3226 def _parse_trim(self) -> exp.Expression: 3227 # https://www.w3resource.com/sql/character-functions/trim.php 3228 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3229 3230 position = None 3231 collation = None 3232 3233 if self._match_set(self.TRIM_TYPES): 3234 position = self._prev.text.upper() 3235 3236 expression = self._parse_term() 3237 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3238 this = self._parse_term() 3239 else: 3240 this = expression 3241 expression = None 3242 3243 if self._match(TokenType.COLLATE): 3244 collation = self._parse_term() 3245 3246 return self.expression( 3247 exp.Trim, 3248 this=this, 3249 position=position, 3250 expression=expression, 3251 collation=collation, 3252 ) 3253 3254 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3255 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3256 3257 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3258 return self._parse_window(self._parse_id_var(), alias=True) 3259 3260 def _parse_window( 3261 self, this: t.Optional[exp.Expression], alias: bool = False 3262 ) -> t.Optional[exp.Expression]: 3263 if self._match(TokenType.FILTER): 3264 where = self._parse_wrapped(self._parse_where) 3265 this = self.expression(exp.Filter, this=this, expression=where) 3266 3267 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3268 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3269 if self._match(TokenType.WITHIN_GROUP): 3270 order = self._parse_wrapped(self._parse_order) 3271 this = self.expression(exp.WithinGroup, this=this, expression=order) 3272 3273 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3274 # Some dialects choose to implement and some do not. 3275 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3276 3277 # There is some code above in _parse_lambda that handles 3278 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3279 3280 # The below changes handle 3281 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3282 3283 # Oracle allows both formats 3284 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3285 # and Snowflake chose to do the same for familiarity 3286 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3287 if self._match(TokenType.IGNORE_NULLS): 3288 this = self.expression(exp.IgnoreNulls, this=this) 3289 elif self._match(TokenType.RESPECT_NULLS): 3290 this = self.expression(exp.RespectNulls, this=this) 3291 3292 # bigquery select from window x AS (partition by ...) 3293 if alias: 3294 self._match(TokenType.ALIAS) 3295 elif not self._match(TokenType.OVER): 3296 return this 3297 3298 if not self._match(TokenType.L_PAREN): 3299 return self.expression(exp.Window, this=this, alias=self._parse_id_var(False)) 3300 3301 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3302 partition = self._parse_partition_by() 3303 order = self._parse_order() 3304 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3305 3306 if kind: 3307 self._match(TokenType.BETWEEN) 3308 start = self._parse_window_spec() 3309 self._match(TokenType.AND) 3310 end = self._parse_window_spec() 3311 3312 spec = self.expression( 3313 exp.WindowSpec, 3314 kind=kind, 3315 start=start["value"], 3316 start_side=start["side"], 3317 end=end["value"], 3318 end_side=end["side"], 3319 ) 3320 else: 3321 spec = None 3322 3323 self._match_r_paren() 3324 3325 return self.expression( 3326 exp.Window, 3327 this=this, 3328 partition_by=partition, 3329 order=order, 3330 spec=spec, 3331 alias=window_alias, 3332 ) 3333 3334 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3335 self._match(TokenType.BETWEEN) 3336 3337 return { 3338 "value": ( 3339 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text 3340 ) 3341 or self._parse_bitwise(), 3342 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, 3343 } 3344 3345 def _parse_alias( 3346 self, this: t.Optional[exp.Expression], explicit: bool = False 3347 ) -> t.Optional[exp.Expression]: 3348 any_token = self._match(TokenType.ALIAS) 3349 3350 if explicit and not any_token: 3351 return this 3352 3353 if self._match(TokenType.L_PAREN): 3354 aliases = self.expression( 3355 exp.Aliases, 3356 this=this, 3357 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3358 ) 3359 self._match_r_paren(aliases) 3360 return aliases 3361 3362 alias = self._parse_id_var(any_token) 3363 3364 if alias: 3365 return self.expression(exp.Alias, this=this, alias=alias) 3366 3367 return this 3368 3369 def _parse_id_var( 3370 self, 3371 any_token: bool = True, 3372 tokens: t.Optional[t.Collection[TokenType]] = None, 3373 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3374 ) -> t.Optional[exp.Expression]: 3375 identifier = self._parse_identifier() 3376 3377 if identifier: 3378 return identifier 3379 3380 prefix = "" 3381 3382 if prefix_tokens: 3383 while self._match_set(prefix_tokens): 3384 prefix += self._prev.text 3385 3386 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3387 quoted = self._prev.token_type == TokenType.STRING 3388 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3389 3390 return None 3391 3392 def _parse_string(self) -> t.Optional[exp.Expression]: 3393 if self._match(TokenType.STRING): 3394 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 3395 return self._parse_placeholder() 3396 3397 def _parse_number(self) -> t.Optional[exp.Expression]: 3398 if self._match(TokenType.NUMBER): 3399 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 3400 return self._parse_placeholder() 3401 3402 def _parse_identifier(self) -> t.Optional[exp.Expression]: 3403 if self._match(TokenType.IDENTIFIER): 3404 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 3405 return self._parse_placeholder() 3406 3407 def _parse_var(self, any_token: bool = False) -> t.Optional[exp.Expression]: 3408 if (any_token and self._advance_any()) or self._match(TokenType.VAR): 3409 return self.expression(exp.Var, this=self._prev.text) 3410 return self._parse_placeholder() 3411 3412 def _advance_any(self) -> t.Optional[Token]: 3413 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 3414 self._advance() 3415 return self._prev 3416 return None 3417 3418 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 3419 return self._parse_var() or self._parse_string() 3420 3421 def _parse_null(self) -> t.Optional[exp.Expression]: 3422 if self._match(TokenType.NULL): 3423 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 3424 return None 3425 3426 def _parse_boolean(self) -> t.Optional[exp.Expression]: 3427 if self._match(TokenType.TRUE): 3428 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 3429 if self._match(TokenType.FALSE): 3430 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 3431 return None 3432 3433 def _parse_star(self) -> t.Optional[exp.Expression]: 3434 if self._match(TokenType.STAR): 3435 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 3436 return None 3437 3438 def _parse_parameter(self) -> exp.Expression: 3439 wrapped = self._match(TokenType.L_BRACE) 3440 this = self._parse_var() or self._parse_primary() 3441 self._match(TokenType.R_BRACE) 3442 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 3443 3444 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 3445 if self._match_set(self.PLACEHOLDER_PARSERS): 3446 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 3447 if placeholder: 3448 return placeholder 3449 self._advance(-1) 3450 return None 3451 3452 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3453 if not self._match(TokenType.EXCEPT): 3454 return None 3455 if self._match(TokenType.L_PAREN, advance=False): 3456 return self._parse_wrapped_csv(self._parse_column) 3457 return self._parse_csv(self._parse_column) 3458 3459 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3460 if not self._match(TokenType.REPLACE): 3461 return None 3462 if self._match(TokenType.L_PAREN, advance=False): 3463 return self._parse_wrapped_csv(self._parse_expression) 3464 return self._parse_csv(self._parse_expression) 3465 3466 def _parse_csv( 3467 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3468 ) -> t.List[t.Optional[exp.Expression]]: 3469 parse_result = parse_method() 3470 items = [parse_result] if parse_result is not None else [] 3471 3472 while self._match(sep): 3473 if parse_result and self._prev_comments: 3474 parse_result.comments = self._prev_comments 3475 3476 parse_result = parse_method() 3477 if parse_result is not None: 3478 items.append(parse_result) 3479 3480 return items 3481 3482 def _parse_tokens( 3483 self, parse_method: t.Callable, expressions: t.Dict 3484 ) -> t.Optional[exp.Expression]: 3485 this = parse_method() 3486 3487 while self._match_set(expressions): 3488 this = self.expression( 3489 expressions[self._prev.token_type], 3490 this=this, 3491 comments=self._prev_comments, 3492 expression=parse_method(), 3493 ) 3494 3495 return this 3496 3497 def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]: 3498 return self._parse_wrapped_csv(self._parse_id_var) 3499 3500 def _parse_wrapped_csv( 3501 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3502 ) -> t.List[t.Optional[exp.Expression]]: 3503 return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep)) 3504 3505 def _parse_wrapped(self, parse_method: t.Callable) -> t.Any: 3506 self._match_l_paren() 3507 parse_result = parse_method() 3508 self._match_r_paren() 3509 return parse_result 3510 3511 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: 3512 return self._parse_select() or self._parse_expression() 3513 3514 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 3515 return self._parse_set_operations( 3516 self._parse_select(nested=True, parse_subquery_alias=False) 3517 ) 3518 3519 def _parse_transaction(self) -> exp.Expression: 3520 this = None 3521 if self._match_texts(self.TRANSACTION_KIND): 3522 this = self._prev.text 3523 3524 self._match_texts({"TRANSACTION", "WORK"}) 3525 3526 modes = [] 3527 while True: 3528 mode = [] 3529 while self._match(TokenType.VAR): 3530 mode.append(self._prev.text) 3531 3532 if mode: 3533 modes.append(" ".join(mode)) 3534 if not self._match(TokenType.COMMA): 3535 break 3536 3537 return self.expression(exp.Transaction, this=this, modes=modes) 3538 3539 def _parse_commit_or_rollback(self) -> exp.Expression: 3540 chain = None 3541 savepoint = None 3542 is_rollback = self._prev.token_type == TokenType.ROLLBACK 3543 3544 self._match_texts({"TRANSACTION", "WORK"}) 3545 3546 if self._match_text_seq("TO"): 3547 self._match_text_seq("SAVEPOINT") 3548 savepoint = self._parse_id_var() 3549 3550 if self._match(TokenType.AND): 3551 chain = not self._match_text_seq("NO") 3552 self._match_text_seq("CHAIN") 3553 3554 if is_rollback: 3555 return self.expression(exp.Rollback, savepoint=savepoint) 3556 return self.expression(exp.Commit, chain=chain) 3557 3558 def _parse_add_column(self) -> t.Optional[exp.Expression]: 3559 if not self._match_text_seq("ADD"): 3560 return None 3561 3562 self._match(TokenType.COLUMN) 3563 exists_column = self._parse_exists(not_=True) 3564 expression = self._parse_column_def(self._parse_field(any_token=True)) 3565 3566 if expression: 3567 expression.set("exists", exists_column) 3568 3569 return expression 3570 3571 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 3572 return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN") 3573 3574 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 3575 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 3576 return self.expression( 3577 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 3578 ) 3579 3580 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 3581 this = None 3582 kind = self._prev.token_type 3583 3584 if kind == TokenType.CONSTRAINT: 3585 this = self._parse_id_var() 3586 3587 if self._match_text_seq("CHECK"): 3588 expression = self._parse_wrapped(self._parse_conjunction) 3589 enforced = self._match_text_seq("ENFORCED") 3590 3591 return self.expression( 3592 exp.AddConstraint, this=this, expression=expression, enforced=enforced 3593 ) 3594 3595 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 3596 expression = self._parse_foreign_key() 3597 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 3598 expression = self._parse_primary_key() 3599 3600 return self.expression(exp.AddConstraint, this=this, expression=expression) 3601 3602 def _parse_alter(self) -> t.Optional[exp.Expression]: 3603 if not self._match(TokenType.TABLE): 3604 return self._parse_as_command(self._prev) 3605 3606 exists = self._parse_exists() 3607 this = self._parse_table(schema=True) 3608 3609 actions: t.Optional[exp.Expression | t.List[t.Optional[exp.Expression]]] = None 3610 3611 index = self._index 3612 if self._match(TokenType.DELETE): 3613 actions = [self.expression(exp.Delete, where=self._parse_where())] 3614 elif self._match_text_seq("ADD"): 3615 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 3616 actions = self._parse_csv(self._parse_add_constraint) 3617 else: 3618 self._retreat(index) 3619 actions = self._parse_csv(self._parse_add_column) 3620 elif self._match_text_seq("DROP"): 3621 partition_exists = self._parse_exists() 3622 3623 if self._match(TokenType.PARTITION, advance=False): 3624 actions = self._parse_csv( 3625 lambda: self._parse_drop_partition(exists=partition_exists) 3626 ) 3627 else: 3628 self._retreat(index) 3629 actions = self._parse_csv(self._parse_drop_column) 3630 elif self._match_text_seq("RENAME", "TO"): 3631 actions = self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 3632 elif self._match_text_seq("ALTER"): 3633 self._match(TokenType.COLUMN) 3634 column = self._parse_field(any_token=True) 3635 3636 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 3637 actions = self.expression(exp.AlterColumn, this=column, drop=True) 3638 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3639 actions = self.expression( 3640 exp.AlterColumn, this=column, default=self._parse_conjunction() 3641 ) 3642 else: 3643 self._match_text_seq("SET", "DATA") 3644 actions = self.expression( 3645 exp.AlterColumn, 3646 this=column, 3647 dtype=self._match_text_seq("TYPE") and self._parse_types(), 3648 collate=self._match(TokenType.COLLATE) and self._parse_term(), 3649 using=self._match(TokenType.USING) and self._parse_conjunction(), 3650 ) 3651 3652 actions = ensure_list(actions) 3653 return self.expression(exp.AlterTable, this=this, exists=exists, actions=actions) 3654 3655 def _parse_show(self) -> t.Optional[exp.Expression]: 3656 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 3657 if parser: 3658 return parser(self) 3659 self._advance() 3660 return self.expression(exp.Show, this=self._prev.text.upper()) 3661 3662 def _default_parse_set_item(self) -> exp.Expression: 3663 return self.expression( 3664 exp.SetItem, 3665 this=self._parse_statement(), 3666 ) 3667 3668 def _parse_set_item(self) -> t.Optional[exp.Expression]: 3669 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 3670 return parser(self) if parser else self._default_parse_set_item() 3671 3672 def _parse_merge(self) -> exp.Expression: 3673 self._match(TokenType.INTO) 3674 target = self._parse_table() 3675 3676 self._match(TokenType.USING) 3677 using = self._parse_table() 3678 3679 self._match(TokenType.ON) 3680 on = self._parse_conjunction() 3681 3682 whens = [] 3683 while self._match(TokenType.WHEN): 3684 this = self._parse_conjunction() 3685 self._match(TokenType.THEN) 3686 3687 if self._match(TokenType.INSERT): 3688 _this = self._parse_star() 3689 if _this: 3690 then = self.expression(exp.Insert, this=_this) 3691 else: 3692 then = self.expression( 3693 exp.Insert, 3694 this=self._parse_value(), 3695 expression=self._match(TokenType.VALUES) and self._parse_value(), 3696 ) 3697 elif self._match(TokenType.UPDATE): 3698 expressions = self._parse_star() 3699 if expressions: 3700 then = self.expression(exp.Update, expressions=expressions) 3701 else: 3702 then = self.expression( 3703 exp.Update, 3704 expressions=self._match(TokenType.SET) 3705 and self._parse_csv(self._parse_equality), 3706 ) 3707 elif self._match(TokenType.DELETE): 3708 then = self.expression(exp.Var, this=self._prev.text) 3709 3710 whens.append(self.expression(exp.When, this=this, then=then)) 3711 3712 return self.expression( 3713 exp.Merge, 3714 this=target, 3715 using=using, 3716 on=on, 3717 expressions=whens, 3718 ) 3719 3720 def _parse_set(self) -> exp.Expression: 3721 return self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 3722 3723 def _parse_as_command(self, start: Token) -> exp.Command: 3724 while self._curr: 3725 self._advance() 3726 return exp.Command(this=self._find_sql(start, self._prev)) 3727 3728 def _find_parser( 3729 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 3730 ) -> t.Optional[t.Callable]: 3731 index = self._index 3732 this = [] 3733 while True: 3734 # The current token might be multiple words 3735 curr = self._curr.text.upper() 3736 key = curr.split(" ") 3737 this.append(curr) 3738 self._advance() 3739 result, trie = in_trie(trie, key) 3740 if result == 0: 3741 break 3742 if result == 2: 3743 subparser = parsers[" ".join(this)] 3744 return subparser 3745 self._retreat(index) 3746 return None 3747 3748 def _match(self, token_type, advance=True): 3749 if not self._curr: 3750 return None 3751 3752 if self._curr.token_type == token_type: 3753 if advance: 3754 self._advance() 3755 return True 3756 3757 return None 3758 3759 def _match_set(self, types): 3760 if not self._curr: 3761 return None 3762 3763 if self._curr.token_type in types: 3764 self._advance() 3765 return True 3766 3767 return None 3768 3769 def _match_pair(self, token_type_a, token_type_b, advance=True): 3770 if not self._curr or not self._next: 3771 return None 3772 3773 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 3774 if advance: 3775 self._advance(2) 3776 return True 3777 3778 return None 3779 3780 def _match_l_paren(self, expression=None): 3781 if not self._match(TokenType.L_PAREN): 3782 self.raise_error("Expecting (") 3783 if expression and self._prev_comments: 3784 expression.comments = self._prev_comments 3785 3786 def _match_r_paren(self, expression=None): 3787 if not self._match(TokenType.R_PAREN): 3788 self.raise_error("Expecting )") 3789 if expression and self._prev_comments: 3790 expression.comments = self._prev_comments 3791 3792 def _match_texts(self, texts): 3793 if self._curr and self._curr.text.upper() in texts: 3794 self._advance() 3795 return True 3796 return False 3797 3798 def _match_text_seq(self, *texts, advance=True): 3799 index = self._index 3800 for text in texts: 3801 if self._curr and self._curr.text.upper() == text: 3802 self._advance() 3803 else: 3804 self._retreat(index) 3805 return False 3806 3807 if not advance: 3808 self._retreat(index) 3809 3810 return True 3811 3812 def _replace_columns_with_dots(self, this): 3813 if isinstance(this, exp.Dot): 3814 exp.replace_children(this, self._replace_columns_with_dots) 3815 elif isinstance(this, exp.Column): 3816 exp.replace_children(this, self._replace_columns_with_dots) 3817 table = this.args.get("table") 3818 this = ( 3819 self.expression(exp.Dot, this=table, expression=this.this) 3820 if table 3821 else self.expression(exp.Var, this=this.name) 3822 ) 3823 elif isinstance(this, exp.Identifier): 3824 this = self.expression(exp.Var, this=this.name) 3825 return this 3826 3827 def _replace_lambda(self, node, lambda_variables): 3828 if isinstance(node, exp.Column): 3829 if node.name in lambda_variables: 3830 return node.this 3831 return node
43class Parser(metaclass=_Parser): 44 """ 45 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 46 a parsed syntax tree. 47 48 Args: 49 error_level: the desired error level. 50 Default: ErrorLevel.RAISE 51 error_message_context: determines the amount of context to capture from a 52 query string when displaying the error message (in number of characters). 53 Default: 50. 54 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 55 Default: 0 56 alias_post_tablesample: If the table alias comes after tablesample. 57 Default: False 58 max_errors: Maximum number of error messages to include in a raised ParseError. 59 This is only relevant if error_level is ErrorLevel.RAISE. 60 Default: 3 61 null_ordering: Indicates the default null ordering method to use if not explicitly set. 62 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 63 Default: "nulls_are_small" 64 """ 65 66 FUNCTIONS: t.Dict[str, t.Callable] = { 67 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 68 "DATE_TO_DATE_STR": lambda args: exp.Cast( 69 this=seq_get(args, 0), 70 to=exp.DataType(this=exp.DataType.Type.TEXT), 71 ), 72 "TIME_TO_TIME_STR": lambda args: exp.Cast( 73 this=seq_get(args, 0), 74 to=exp.DataType(this=exp.DataType.Type.TEXT), 75 ), 76 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 77 this=exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 start=exp.Literal.number(1), 82 length=exp.Literal.number(10), 83 ), 84 "VAR_MAP": parse_var_map, 85 "IFNULL": exp.Coalesce.from_arg_list, 86 } 87 88 NO_PAREN_FUNCTIONS = { 89 TokenType.CURRENT_DATE: exp.CurrentDate, 90 TokenType.CURRENT_DATETIME: exp.CurrentDate, 91 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 92 } 93 94 NESTED_TYPE_TOKENS = { 95 TokenType.ARRAY, 96 TokenType.MAP, 97 TokenType.STRUCT, 98 TokenType.NULLABLE, 99 } 100 101 TYPE_TOKENS = { 102 TokenType.BOOLEAN, 103 TokenType.TINYINT, 104 TokenType.SMALLINT, 105 TokenType.INT, 106 TokenType.BIGINT, 107 TokenType.FLOAT, 108 TokenType.DOUBLE, 109 TokenType.CHAR, 110 TokenType.NCHAR, 111 TokenType.VARCHAR, 112 TokenType.NVARCHAR, 113 TokenType.TEXT, 114 TokenType.MEDIUMTEXT, 115 TokenType.LONGTEXT, 116 TokenType.MEDIUMBLOB, 117 TokenType.LONGBLOB, 118 TokenType.BINARY, 119 TokenType.VARBINARY, 120 TokenType.JSON, 121 TokenType.JSONB, 122 TokenType.INTERVAL, 123 TokenType.TIME, 124 TokenType.TIMESTAMP, 125 TokenType.TIMESTAMPTZ, 126 TokenType.TIMESTAMPLTZ, 127 TokenType.DATETIME, 128 TokenType.DATE, 129 TokenType.DECIMAL, 130 TokenType.UUID, 131 TokenType.GEOGRAPHY, 132 TokenType.GEOMETRY, 133 TokenType.HLLSKETCH, 134 TokenType.HSTORE, 135 TokenType.PSEUDO_TYPE, 136 TokenType.SUPER, 137 TokenType.SERIAL, 138 TokenType.SMALLSERIAL, 139 TokenType.BIGSERIAL, 140 TokenType.XML, 141 TokenType.UNIQUEIDENTIFIER, 142 TokenType.MONEY, 143 TokenType.SMALLMONEY, 144 TokenType.ROWVERSION, 145 TokenType.IMAGE, 146 TokenType.VARIANT, 147 TokenType.OBJECT, 148 *NESTED_TYPE_TOKENS, 149 } 150 151 SUBQUERY_PREDICATES = { 152 TokenType.ANY: exp.Any, 153 TokenType.ALL: exp.All, 154 TokenType.EXISTS: exp.Exists, 155 TokenType.SOME: exp.Any, 156 } 157 158 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 159 160 ID_VAR_TOKENS = { 161 TokenType.VAR, 162 TokenType.ANTI, 163 TokenType.APPLY, 164 TokenType.AUTO_INCREMENT, 165 TokenType.BEGIN, 166 TokenType.BOTH, 167 TokenType.BUCKET, 168 TokenType.CACHE, 169 TokenType.CASCADE, 170 TokenType.COLLATE, 171 TokenType.COLUMN, 172 TokenType.COMMAND, 173 TokenType.COMMIT, 174 TokenType.COMPOUND, 175 TokenType.CONSTRAINT, 176 TokenType.CURRENT_TIME, 177 TokenType.DEFAULT, 178 TokenType.DELETE, 179 TokenType.DESCRIBE, 180 TokenType.DIV, 181 TokenType.END, 182 TokenType.EXECUTE, 183 TokenType.ESCAPE, 184 TokenType.FALSE, 185 TokenType.FIRST, 186 TokenType.FILTER, 187 TokenType.FOLLOWING, 188 TokenType.FORMAT, 189 TokenType.FUNCTION, 190 TokenType.IF, 191 TokenType.INDEX, 192 TokenType.ISNULL, 193 TokenType.INTERVAL, 194 TokenType.LAZY, 195 TokenType.LEADING, 196 TokenType.LEFT, 197 TokenType.LOCAL, 198 TokenType.MATERIALIZED, 199 TokenType.MERGE, 200 TokenType.NATURAL, 201 TokenType.NEXT, 202 TokenType.OFFSET, 203 TokenType.ONLY, 204 TokenType.OPTIONS, 205 TokenType.ORDINALITY, 206 TokenType.PERCENT, 207 TokenType.PIVOT, 208 TokenType.PRECEDING, 209 TokenType.RANGE, 210 TokenType.REFERENCES, 211 TokenType.RIGHT, 212 TokenType.ROW, 213 TokenType.ROWS, 214 TokenType.SCHEMA, 215 TokenType.SEED, 216 TokenType.SEMI, 217 TokenType.SET, 218 TokenType.SHOW, 219 TokenType.SORTKEY, 220 TokenType.TABLE, 221 TokenType.TEMPORARY, 222 TokenType.TOP, 223 TokenType.TRAILING, 224 TokenType.TRUE, 225 TokenType.UNBOUNDED, 226 TokenType.UNIQUE, 227 TokenType.UNLOGGED, 228 TokenType.UNPIVOT, 229 TokenType.PROCEDURE, 230 TokenType.VIEW, 231 TokenType.VOLATILE, 232 TokenType.WINDOW, 233 *SUBQUERY_PREDICATES, 234 *TYPE_TOKENS, 235 *NO_PAREN_FUNCTIONS, 236 } 237 238 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 239 TokenType.APPLY, 240 TokenType.LEFT, 241 TokenType.NATURAL, 242 TokenType.OFFSET, 243 TokenType.RIGHT, 244 TokenType.WINDOW, 245 } 246 247 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 248 249 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} 250 251 FUNC_TOKENS = { 252 TokenType.COMMAND, 253 TokenType.CURRENT_DATE, 254 TokenType.CURRENT_DATETIME, 255 TokenType.CURRENT_TIMESTAMP, 256 TokenType.CURRENT_TIME, 257 TokenType.FILTER, 258 TokenType.FIRST, 259 TokenType.FORMAT, 260 TokenType.IDENTIFIER, 261 TokenType.INDEX, 262 TokenType.ISNULL, 263 TokenType.ILIKE, 264 TokenType.LIKE, 265 TokenType.MERGE, 266 TokenType.OFFSET, 267 TokenType.PRIMARY_KEY, 268 TokenType.REPLACE, 269 TokenType.ROW, 270 TokenType.UNNEST, 271 TokenType.VAR, 272 TokenType.LEFT, 273 TokenType.RIGHT, 274 TokenType.DATE, 275 TokenType.DATETIME, 276 TokenType.TABLE, 277 TokenType.TIMESTAMP, 278 TokenType.TIMESTAMPTZ, 279 TokenType.WINDOW, 280 *TYPE_TOKENS, 281 *SUBQUERY_PREDICATES, 282 } 283 284 CONJUNCTION = { 285 TokenType.AND: exp.And, 286 TokenType.OR: exp.Or, 287 } 288 289 EQUALITY = { 290 TokenType.EQ: exp.EQ, 291 TokenType.NEQ: exp.NEQ, 292 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 293 } 294 295 COMPARISON = { 296 TokenType.GT: exp.GT, 297 TokenType.GTE: exp.GTE, 298 TokenType.LT: exp.LT, 299 TokenType.LTE: exp.LTE, 300 } 301 302 BITWISE = { 303 TokenType.AMP: exp.BitwiseAnd, 304 TokenType.CARET: exp.BitwiseXor, 305 TokenType.PIPE: exp.BitwiseOr, 306 TokenType.DPIPE: exp.DPipe, 307 } 308 309 TERM = { 310 TokenType.DASH: exp.Sub, 311 TokenType.PLUS: exp.Add, 312 TokenType.MOD: exp.Mod, 313 TokenType.COLLATE: exp.Collate, 314 } 315 316 FACTOR = { 317 TokenType.DIV: exp.IntDiv, 318 TokenType.LR_ARROW: exp.Distance, 319 TokenType.SLASH: exp.Div, 320 TokenType.STAR: exp.Mul, 321 } 322 323 TIMESTAMPS = { 324 TokenType.TIME, 325 TokenType.TIMESTAMP, 326 TokenType.TIMESTAMPTZ, 327 TokenType.TIMESTAMPLTZ, 328 } 329 330 SET_OPERATIONS = { 331 TokenType.UNION, 332 TokenType.INTERSECT, 333 TokenType.EXCEPT, 334 } 335 336 JOIN_SIDES = { 337 TokenType.LEFT, 338 TokenType.RIGHT, 339 TokenType.FULL, 340 } 341 342 JOIN_KINDS = { 343 TokenType.INNER, 344 TokenType.OUTER, 345 TokenType.CROSS, 346 TokenType.SEMI, 347 TokenType.ANTI, 348 } 349 350 LAMBDAS = { 351 TokenType.ARROW: lambda self, expressions: self.expression( 352 exp.Lambda, 353 this=self._parse_conjunction().transform( 354 self._replace_lambda, {node.name for node in expressions} 355 ), 356 expressions=expressions, 357 ), 358 TokenType.FARROW: lambda self, expressions: self.expression( 359 exp.Kwarg, 360 this=exp.Var(this=expressions[0].name), 361 expression=self._parse_conjunction(), 362 ), 363 } 364 365 COLUMN_OPERATORS = { 366 TokenType.DOT: None, 367 TokenType.DCOLON: lambda self, this, to: self.expression( 368 exp.Cast, 369 this=this, 370 to=to, 371 ), 372 TokenType.ARROW: lambda self, this, path: self.expression( 373 exp.JSONExtract, 374 this=this, 375 expression=path, 376 ), 377 TokenType.DARROW: lambda self, this, path: self.expression( 378 exp.JSONExtractScalar, 379 this=this, 380 expression=path, 381 ), 382 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 383 exp.JSONBExtract, 384 this=this, 385 expression=path, 386 ), 387 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 388 exp.JSONBExtractScalar, 389 this=this, 390 expression=path, 391 ), 392 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 393 exp.JSONBContains, 394 this=this, 395 expression=key, 396 ), 397 } 398 399 EXPRESSION_PARSERS = { 400 exp.Column: lambda self: self._parse_column(), 401 exp.DataType: lambda self: self._parse_types(), 402 exp.From: lambda self: self._parse_from(), 403 exp.Group: lambda self: self._parse_group(), 404 exp.Identifier: lambda self: self._parse_id_var(), 405 exp.Lateral: lambda self: self._parse_lateral(), 406 exp.Join: lambda self: self._parse_join(), 407 exp.Order: lambda self: self._parse_order(), 408 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 409 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 410 exp.Lambda: lambda self: self._parse_lambda(), 411 exp.Limit: lambda self: self._parse_limit(), 412 exp.Offset: lambda self: self._parse_offset(), 413 exp.TableAlias: lambda self: self._parse_table_alias(), 414 exp.Table: lambda self: self._parse_table(), 415 exp.Condition: lambda self: self._parse_conjunction(), 416 exp.Expression: lambda self: self._parse_statement(), 417 exp.Properties: lambda self: self._parse_properties(), 418 exp.Where: lambda self: self._parse_where(), 419 exp.Ordered: lambda self: self._parse_ordered(), 420 exp.Having: lambda self: self._parse_having(), 421 exp.With: lambda self: self._parse_with(), 422 exp.Window: lambda self: self._parse_named_window(), 423 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 424 } 425 426 STATEMENT_PARSERS = { 427 TokenType.ALTER: lambda self: self._parse_alter(), 428 TokenType.BEGIN: lambda self: self._parse_transaction(), 429 TokenType.CACHE: lambda self: self._parse_cache(), 430 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 431 TokenType.CREATE: lambda self: self._parse_create(), 432 TokenType.DELETE: lambda self: self._parse_delete(), 433 TokenType.DESC: lambda self: self._parse_describe(), 434 TokenType.DESCRIBE: lambda self: self._parse_describe(), 435 TokenType.DROP: lambda self: self._parse_drop(), 436 TokenType.END: lambda self: self._parse_commit_or_rollback(), 437 TokenType.INSERT: lambda self: self._parse_insert(), 438 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), 439 TokenType.MERGE: lambda self: self._parse_merge(), 440 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 441 TokenType.UNCACHE: lambda self: self._parse_uncache(), 442 TokenType.UPDATE: lambda self: self._parse_update(), 443 TokenType.USE: lambda self: self.expression( 444 exp.Use, 445 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 446 and exp.Var(this=self._prev.text), 447 this=self._parse_table(schema=False), 448 ), 449 } 450 451 UNARY_PARSERS = { 452 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 453 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 454 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 455 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 456 } 457 458 PRIMARY_PARSERS = { 459 TokenType.STRING: lambda self, token: self.expression( 460 exp.Literal, this=token.text, is_string=True 461 ), 462 TokenType.NUMBER: lambda self, token: self.expression( 463 exp.Literal, this=token.text, is_string=False 464 ), 465 TokenType.STAR: lambda self, _: self.expression( 466 exp.Star, 467 **{"except": self._parse_except(), "replace": self._parse_replace()}, 468 ), 469 TokenType.NULL: lambda self, _: self.expression(exp.Null), 470 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 471 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 472 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 473 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 474 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 475 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 476 TokenType.NATIONAL: lambda self, token: self._parse_national(token), 477 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 478 } 479 480 PLACEHOLDER_PARSERS = { 481 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 482 TokenType.PARAMETER: lambda self: self._parse_parameter(), 483 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 484 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 485 else None, 486 } 487 488 RANGE_PARSERS = { 489 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 490 TokenType.GLOB: lambda self, this: self._parse_escape( 491 self.expression(exp.Glob, this=this, expression=self._parse_bitwise()) 492 ), 493 TokenType.IN: lambda self, this: self._parse_in(this), 494 TokenType.IS: lambda self, this: self._parse_is(this), 495 TokenType.LIKE: lambda self, this: self._parse_escape( 496 self.expression(exp.Like, this=this, expression=self._parse_bitwise()) 497 ), 498 TokenType.ILIKE: lambda self, this: self._parse_escape( 499 self.expression(exp.ILike, this=this, expression=self._parse_bitwise()) 500 ), 501 TokenType.IRLIKE: lambda self, this: self.expression( 502 exp.RegexpILike, this=this, expression=self._parse_bitwise() 503 ), 504 TokenType.RLIKE: lambda self, this: self.expression( 505 exp.RegexpLike, this=this, expression=self._parse_bitwise() 506 ), 507 TokenType.SIMILAR_TO: lambda self, this: self.expression( 508 exp.SimilarTo, this=this, expression=self._parse_bitwise() 509 ), 510 } 511 512 PROPERTY_PARSERS = { 513 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 514 "CHARACTER SET": lambda self: self._parse_character_set(), 515 "CLUSTER BY": lambda self: self.expression( 516 exp.Cluster, expressions=self._parse_csv(self._parse_ordered) 517 ), 518 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 519 "PARTITION BY": lambda self: self._parse_partitioned_by(), 520 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 521 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 522 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 523 "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 524 "DISTKEY": lambda self: self._parse_distkey(), 525 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 526 "SORTKEY": lambda self: self._parse_sortkey(), 527 "LIKE": lambda self: self._parse_create_like(), 528 "RETURNS": lambda self: self._parse_returns(), 529 "ROW": lambda self: self._parse_row(), 530 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 531 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 532 "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 533 "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 534 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 535 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 536 "DETERMINISTIC": lambda self: self.expression( 537 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 538 ), 539 "IMMUTABLE": lambda self: self.expression( 540 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 541 ), 542 "STABLE": lambda self: self.expression( 543 exp.VolatilityProperty, this=exp.Literal.string("STABLE") 544 ), 545 "VOLATILE": lambda self: self.expression( 546 exp.VolatilityProperty, this=exp.Literal.string("VOLATILE") 547 ), 548 "WITH": lambda self: self._parse_with_property(), 549 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 550 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), 551 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), 552 "BEFORE": lambda self: self._parse_journal( 553 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 554 ), 555 "JOURNAL": lambda self: self._parse_journal( 556 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 557 ), 558 "AFTER": lambda self: self._parse_afterjournal( 559 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 560 ), 561 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), 562 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), 563 "CHECKSUM": lambda self: self._parse_checksum(), 564 "FREESPACE": lambda self: self._parse_freespace(), 565 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( 566 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" 567 ), 568 "MIN": lambda self: self._parse_datablocksize(), 569 "MINIMUM": lambda self: self._parse_datablocksize(), 570 "MAX": lambda self: self._parse_datablocksize(), 571 "MAXIMUM": lambda self: self._parse_datablocksize(), 572 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( 573 default=self._prev.text.upper() == "DEFAULT" 574 ), 575 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 576 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 577 "DEFINER": lambda self: self._parse_definer(), 578 "LOCK": lambda self: self._parse_locking(), 579 "LOCKING": lambda self: self._parse_locking(), 580 } 581 582 CONSTRAINT_PARSERS = { 583 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 584 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 585 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 586 "CHARACTER SET": lambda self: self.expression( 587 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 588 ), 589 "CHECK": lambda self: self.expression( 590 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 591 ), 592 "COLLATE": lambda self: self.expression( 593 exp.CollateColumnConstraint, this=self._parse_var() 594 ), 595 "COMMENT": lambda self: self.expression( 596 exp.CommentColumnConstraint, this=self._parse_string() 597 ), 598 "DEFAULT": lambda self: self.expression( 599 exp.DefaultColumnConstraint, this=self._parse_bitwise() 600 ), 601 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 602 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 603 "FORMAT": lambda self: self.expression( 604 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 605 ), 606 "GENERATED": lambda self: self._parse_generated_as_identity(), 607 "IDENTITY": lambda self: self._parse_auto_increment(), 608 "LIKE": lambda self: self._parse_create_like(), 609 "NOT": lambda self: self._parse_not_constraint(), 610 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 611 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 612 "PRIMARY KEY": lambda self: self._parse_primary_key(), 613 "TITLE": lambda self: self.expression( 614 exp.TitleColumnConstraint, this=self._parse_var_or_string() 615 ), 616 "UNIQUE": lambda self: self._parse_unique(), 617 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 618 } 619 620 NO_PAREN_FUNCTION_PARSERS = { 621 TokenType.CASE: lambda self: self._parse_case(), 622 TokenType.IF: lambda self: self._parse_if(), 623 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 624 } 625 626 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 627 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 628 "TRY_CONVERT": lambda self: self._parse_convert(False), 629 "EXTRACT": lambda self: self._parse_extract(), 630 "POSITION": lambda self: self._parse_position(), 631 "SUBSTRING": lambda self: self._parse_substring(), 632 "TRIM": lambda self: self._parse_trim(), 633 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 634 "TRY_CAST": lambda self: self._parse_cast(False), 635 "STRING_AGG": lambda self: self._parse_string_agg(), 636 } 637 638 QUERY_MODIFIER_PARSERS = { 639 "match": lambda self: self._parse_match_recognize(), 640 "where": lambda self: self._parse_where(), 641 "group": lambda self: self._parse_group(), 642 "having": lambda self: self._parse_having(), 643 "qualify": lambda self: self._parse_qualify(), 644 "windows": lambda self: self._parse_window_clause(), 645 "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute), 646 "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 647 "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 648 "order": lambda self: self._parse_order(), 649 "limit": lambda self: self._parse_limit(), 650 "offset": lambda self: self._parse_offset(), 651 "lock": lambda self: self._parse_lock(), 652 } 653 654 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 655 SET_PARSERS: t.Dict[str, t.Callable] = {} 656 657 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 658 659 CREATABLES = { 660 TokenType.COLUMN, 661 TokenType.FUNCTION, 662 TokenType.INDEX, 663 TokenType.PROCEDURE, 664 TokenType.SCHEMA, 665 TokenType.TABLE, 666 TokenType.VIEW, 667 } 668 669 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 670 671 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 672 673 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 674 675 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 676 677 STRICT_CAST = True 678 679 __slots__ = ( 680 "error_level", 681 "error_message_context", 682 "sql", 683 "errors", 684 "index_offset", 685 "unnest_column_only", 686 "alias_post_tablesample", 687 "max_errors", 688 "null_ordering", 689 "_tokens", 690 "_index", 691 "_curr", 692 "_next", 693 "_prev", 694 "_prev_comments", 695 "_show_trie", 696 "_set_trie", 697 ) 698 699 def __init__( 700 self, 701 error_level: t.Optional[ErrorLevel] = None, 702 error_message_context: int = 100, 703 index_offset: int = 0, 704 unnest_column_only: bool = False, 705 alias_post_tablesample: bool = False, 706 max_errors: int = 3, 707 null_ordering: t.Optional[str] = None, 708 ): 709 self.error_level = error_level or ErrorLevel.IMMEDIATE 710 self.error_message_context = error_message_context 711 self.index_offset = index_offset 712 self.unnest_column_only = unnest_column_only 713 self.alias_post_tablesample = alias_post_tablesample 714 self.max_errors = max_errors 715 self.null_ordering = null_ordering 716 self.reset() 717 718 def reset(self): 719 self.sql = "" 720 self.errors = [] 721 self._tokens = [] 722 self._index = 0 723 self._curr = None 724 self._next = None 725 self._prev = None 726 self._prev_comments = None 727 728 def parse( 729 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 730 ) -> t.List[t.Optional[exp.Expression]]: 731 """ 732 Parses a list of tokens and returns a list of syntax trees, one tree 733 per parsed SQL statement. 734 735 Args: 736 raw_tokens: the list of tokens. 737 sql: the original SQL string, used to produce helpful debug messages. 738 739 Returns: 740 The list of syntax trees. 741 """ 742 return self._parse( 743 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 744 ) 745 746 def parse_into( 747 self, 748 expression_types: exp.IntoType, 749 raw_tokens: t.List[Token], 750 sql: t.Optional[str] = None, 751 ) -> t.List[t.Optional[exp.Expression]]: 752 """ 753 Parses a list of tokens into a given Expression type. If a collection of Expression 754 types is given instead, this method will try to parse the token list into each one 755 of them, stopping at the first for which the parsing succeeds. 756 757 Args: 758 expression_types: the expression type(s) to try and parse the token list into. 759 raw_tokens: the list of tokens. 760 sql: the original SQL string, used to produce helpful debug messages. 761 762 Returns: 763 The target Expression. 764 """ 765 errors = [] 766 for expression_type in ensure_collection(expression_types): 767 parser = self.EXPRESSION_PARSERS.get(expression_type) 768 if not parser: 769 raise TypeError(f"No parser registered for {expression_type}") 770 try: 771 return self._parse(parser, raw_tokens, sql) 772 except ParseError as e: 773 e.errors[0]["into_expression"] = expression_type 774 errors.append(e) 775 raise ParseError( 776 f"Failed to parse into {expression_types}", 777 errors=merge_errors(errors), 778 ) from errors[-1] 779 780 def _parse( 781 self, 782 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 783 raw_tokens: t.List[Token], 784 sql: t.Optional[str] = None, 785 ) -> t.List[t.Optional[exp.Expression]]: 786 self.reset() 787 self.sql = sql or "" 788 total = len(raw_tokens) 789 chunks: t.List[t.List[Token]] = [[]] 790 791 for i, token in enumerate(raw_tokens): 792 if token.token_type == TokenType.SEMICOLON: 793 if i < total - 1: 794 chunks.append([]) 795 else: 796 chunks[-1].append(token) 797 798 expressions = [] 799 800 for tokens in chunks: 801 self._index = -1 802 self._tokens = tokens 803 self._advance() 804 805 expressions.append(parse_method(self)) 806 807 if self._index < len(self._tokens): 808 self.raise_error("Invalid expression / Unexpected token") 809 810 self.check_errors() 811 812 return expressions 813 814 def check_errors(self) -> None: 815 """ 816 Logs or raises any found errors, depending on the chosen error level setting. 817 """ 818 if self.error_level == ErrorLevel.WARN: 819 for error in self.errors: 820 logger.error(str(error)) 821 elif self.error_level == ErrorLevel.RAISE and self.errors: 822 raise ParseError( 823 concat_messages(self.errors, self.max_errors), 824 errors=merge_errors(self.errors), 825 ) 826 827 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 828 """ 829 Appends an error in the list of recorded errors or raises it, depending on the chosen 830 error level setting. 831 """ 832 token = token or self._curr or self._prev or Token.string("") 833 start = self._find_token(token) 834 end = start + len(token.text) 835 start_context = self.sql[max(start - self.error_message_context, 0) : start] 836 highlight = self.sql[start:end] 837 end_context = self.sql[end : end + self.error_message_context] 838 839 error = ParseError.new( 840 f"{message}. Line {token.line}, Col: {token.col}.\n" 841 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 842 description=message, 843 line=token.line, 844 col=token.col, 845 start_context=start_context, 846 highlight=highlight, 847 end_context=end_context, 848 ) 849 850 if self.error_level == ErrorLevel.IMMEDIATE: 851 raise error 852 853 self.errors.append(error) 854 855 def expression( 856 self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs 857 ) -> exp.Expression: 858 """ 859 Creates a new, validated Expression. 860 861 Args: 862 exp_class: the expression class to instantiate. 863 comments: an optional list of comments to attach to the expression. 864 kwargs: the arguments to set for the expression along with their respective values. 865 866 Returns: 867 The target expression. 868 """ 869 instance = exp_class(**kwargs) 870 if self._prev_comments: 871 instance.comments = self._prev_comments 872 self._prev_comments = None 873 if comments: 874 instance.comments = comments 875 self.validate_expression(instance) 876 return instance 877 878 def validate_expression( 879 self, expression: exp.Expression, args: t.Optional[t.List] = None 880 ) -> None: 881 """ 882 Validates an already instantiated expression, making sure that all its mandatory arguments 883 are set. 884 885 Args: 886 expression: the expression to validate. 887 args: an optional list of items that was used to instantiate the expression, if it's a Func. 888 """ 889 if self.error_level == ErrorLevel.IGNORE: 890 return 891 892 for error_message in expression.error_messages(args): 893 self.raise_error(error_message) 894 895 def _find_sql(self, start: Token, end: Token) -> str: 896 return self.sql[self._find_token(start) : self._find_token(end) + len(end.text)] 897 898 def _find_token(self, token: Token) -> int: 899 line = 1 900 col = 1 901 index = 0 902 903 while line < token.line or col < token.col: 904 if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK: 905 line += 1 906 col = 1 907 else: 908 col += 1 909 index += 1 910 911 return index 912 913 def _advance(self, times: int = 1) -> None: 914 self._index += times 915 self._curr = seq_get(self._tokens, self._index) 916 self._next = seq_get(self._tokens, self._index + 1) 917 if self._index > 0: 918 self._prev = self._tokens[self._index - 1] 919 self._prev_comments = self._prev.comments 920 else: 921 self._prev = None 922 self._prev_comments = None 923 924 def _retreat(self, index: int) -> None: 925 self._advance(index - self._index) 926 927 def _parse_command(self) -> exp.Expression: 928 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 929 930 def _parse_statement(self) -> t.Optional[exp.Expression]: 931 if self._curr is None: 932 return None 933 934 if self._match_set(self.STATEMENT_PARSERS): 935 return self.STATEMENT_PARSERS[self._prev.token_type](self) 936 937 if self._match_set(Tokenizer.COMMANDS): 938 return self._parse_command() 939 940 expression = self._parse_expression() 941 expression = self._parse_set_operations(expression) if expression else self._parse_select() 942 943 self._parse_query_modifiers(expression) 944 return expression 945 946 def _parse_drop(self, default_kind: t.Optional[str] = None) -> t.Optional[exp.Expression]: 947 start = self._prev 948 temporary = self._match(TokenType.TEMPORARY) 949 materialized = self._match(TokenType.MATERIALIZED) 950 kind = self._match_set(self.CREATABLES) and self._prev.text 951 if not kind: 952 if default_kind: 953 kind = default_kind 954 else: 955 return self._parse_as_command(start) 956 957 return self.expression( 958 exp.Drop, 959 exists=self._parse_exists(), 960 this=self._parse_table(schema=True), 961 kind=kind, 962 temporary=temporary, 963 materialized=materialized, 964 cascade=self._match(TokenType.CASCADE), 965 ) 966 967 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 968 return ( 969 self._match(TokenType.IF) 970 and (not not_ or self._match(TokenType.NOT)) 971 and self._match(TokenType.EXISTS) 972 ) 973 974 def _parse_create(self) -> t.Optional[exp.Expression]: 975 start = self._prev 976 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 977 TokenType.OR, TokenType.REPLACE 978 ) 979 set_ = self._match(TokenType.SET) # Teradata 980 multiset = self._match_text_seq("MULTISET") # Teradata 981 global_temporary = self._match_text_seq("GLOBAL", "TEMPORARY") # Teradata 982 volatile = self._match(TokenType.VOLATILE) # Teradata 983 temporary = self._match(TokenType.TEMPORARY) 984 transient = self._match_text_seq("TRANSIENT") 985 external = self._match_text_seq("EXTERNAL") 986 unique = self._match(TokenType.UNIQUE) 987 materialized = self._match(TokenType.MATERIALIZED) 988 989 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 990 self._match(TokenType.TABLE) 991 992 properties = None 993 create_token = self._match_set(self.CREATABLES) and self._prev 994 995 if not create_token: 996 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 997 create_token = self._match_set(self.CREATABLES) and self._prev 998 999 if not properties or not create_token: 1000 return self._parse_as_command(start) 1001 1002 exists = self._parse_exists(not_=True) 1003 this = None 1004 expression = None 1005 data = None 1006 statistics = None 1007 no_primary_index = None 1008 indexes = None 1009 no_schema_binding = None 1010 begin = None 1011 1012 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1013 this = self._parse_user_defined_function(kind=create_token.token_type) 1014 properties = self._parse_properties() 1015 1016 self._match(TokenType.ALIAS) 1017 begin = self._match(TokenType.BEGIN) 1018 return_ = self._match_text_seq("RETURN") 1019 expression = self._parse_statement() 1020 1021 if return_: 1022 expression = self.expression(exp.Return, this=expression) 1023 elif create_token.token_type == TokenType.INDEX: 1024 this = self._parse_index() 1025 elif create_token.token_type in ( 1026 TokenType.TABLE, 1027 TokenType.VIEW, 1028 TokenType.SCHEMA, 1029 ): 1030 table_parts = self._parse_table_parts(schema=True) 1031 1032 # exp.Properties.Location.POST_NAME 1033 if self._match(TokenType.COMMA): 1034 temp_properties = self._parse_properties(before=True) 1035 if properties and temp_properties: 1036 properties.expressions.append(temp_properties.expressions) 1037 elif temp_properties: 1038 properties = temp_properties 1039 1040 this = self._parse_schema(this=table_parts) 1041 1042 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1043 temp_properties = self._parse_properties() 1044 if properties and temp_properties: 1045 properties.expressions.append(temp_properties.expressions) 1046 elif temp_properties: 1047 properties = temp_properties 1048 1049 self._match(TokenType.ALIAS) 1050 1051 # exp.Properties.Location.POST_ALIAS 1052 if not ( 1053 self._match(TokenType.SELECT, advance=False) 1054 or self._match(TokenType.WITH, advance=False) 1055 or self._match(TokenType.L_PAREN, advance=False) 1056 ): 1057 temp_properties = self._parse_properties() 1058 if properties and temp_properties: 1059 properties.expressions.append(temp_properties.expressions) 1060 elif temp_properties: 1061 properties = temp_properties 1062 1063 expression = self._parse_ddl_select() 1064 1065 if create_token.token_type == TokenType.TABLE: 1066 if self._match_text_seq("WITH", "DATA"): 1067 data = True 1068 elif self._match_text_seq("WITH", "NO", "DATA"): 1069 data = False 1070 1071 if self._match_text_seq("AND", "STATISTICS"): 1072 statistics = True 1073 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1074 statistics = False 1075 1076 no_primary_index = self._match_text_seq("NO", "PRIMARY", "INDEX") 1077 1078 indexes = [] 1079 while True: 1080 index = self._parse_create_table_index() 1081 1082 # exp.Properties.Location.POST_INDEX 1083 if self._match(TokenType.PARTITION_BY, advance=False): 1084 temp_properties = self._parse_properties() 1085 if properties and temp_properties: 1086 properties.expressions.append(temp_properties.expressions) 1087 elif temp_properties: 1088 properties = temp_properties 1089 1090 if not index: 1091 break 1092 else: 1093 indexes.append(index) 1094 elif create_token.token_type == TokenType.VIEW: 1095 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1096 no_schema_binding = True 1097 1098 return self.expression( 1099 exp.Create, 1100 this=this, 1101 kind=create_token.text, 1102 expression=expression, 1103 set=set_, 1104 multiset=multiset, 1105 global_temporary=global_temporary, 1106 volatile=volatile, 1107 exists=exists, 1108 properties=properties, 1109 temporary=temporary, 1110 transient=transient, 1111 external=external, 1112 replace=replace, 1113 unique=unique, 1114 materialized=materialized, 1115 data=data, 1116 statistics=statistics, 1117 no_primary_index=no_primary_index, 1118 indexes=indexes, 1119 no_schema_binding=no_schema_binding, 1120 begin=begin, 1121 ) 1122 1123 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1124 self._match(TokenType.COMMA) 1125 1126 # parsers look to _prev for no/dual/default, so need to consume first 1127 self._match_text_seq("NO") 1128 self._match_text_seq("DUAL") 1129 self._match_text_seq("DEFAULT") 1130 1131 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): 1132 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) 1133 1134 return None 1135 1136 def _parse_property(self) -> t.Optional[exp.Expression]: 1137 if self._match_texts(self.PROPERTY_PARSERS): 1138 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1139 1140 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1141 return self._parse_character_set(default=True) 1142 1143 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): 1144 return self._parse_sortkey(compound=True) 1145 1146 if self._match_text_seq("SQL", "SECURITY"): 1147 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1148 1149 assignment = self._match_pair( 1150 TokenType.VAR, TokenType.EQ, advance=False 1151 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1152 1153 if assignment: 1154 key = self._parse_var_or_string() 1155 self._match(TokenType.EQ) 1156 return self.expression(exp.Property, this=key, value=self._parse_column()) 1157 1158 return None 1159 1160 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1161 self._match(TokenType.EQ) 1162 self._match(TokenType.ALIAS) 1163 return self.expression( 1164 exp_class, 1165 this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1166 ) 1167 1168 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: 1169 properties = [] 1170 1171 while True: 1172 if before: 1173 identified_property = self._parse_property_before() 1174 else: 1175 identified_property = self._parse_property() 1176 1177 if not identified_property: 1178 break 1179 for p in ensure_collection(identified_property): 1180 properties.append(p) 1181 1182 if properties: 1183 return self.expression(exp.Properties, expressions=properties) 1184 1185 return None 1186 1187 def _parse_fallback(self, no=False) -> exp.Expression: 1188 self._match_text_seq("FALLBACK") 1189 return self.expression( 1190 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1191 ) 1192 1193 def _parse_with_property( 1194 self, 1195 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1196 if self._match(TokenType.L_PAREN, advance=False): 1197 return self._parse_wrapped_csv(self._parse_property) 1198 1199 if not self._next: 1200 return None 1201 1202 if self._next.text.upper() == "JOURNAL": 1203 return self._parse_withjournaltable() 1204 1205 return self._parse_withisolatedloading() 1206 1207 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1208 def _parse_definer(self) -> t.Optional[exp.Expression]: 1209 self._match(TokenType.EQ) 1210 1211 user = self._parse_id_var() 1212 self._match(TokenType.PARAMETER) 1213 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1214 1215 if not user or not host: 1216 return None 1217 1218 return exp.DefinerProperty(this=f"{user}@{host}") 1219 1220 def _parse_withjournaltable(self) -> exp.Expression: 1221 self._match_text_seq("WITH", "JOURNAL", "TABLE") 1222 self._match(TokenType.EQ) 1223 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1224 1225 def _parse_log(self, no=False) -> exp.Expression: 1226 self._match_text_seq("LOG") 1227 return self.expression(exp.LogProperty, no=no) 1228 1229 def _parse_journal(self, no=False, dual=False) -> exp.Expression: 1230 before = self._match_text_seq("BEFORE") 1231 self._match_text_seq("JOURNAL") 1232 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) 1233 1234 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: 1235 self._match_text_seq("NOT") 1236 self._match_text_seq("LOCAL") 1237 self._match_text_seq("AFTER", "JOURNAL") 1238 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) 1239 1240 def _parse_checksum(self) -> exp.Expression: 1241 self._match_text_seq("CHECKSUM") 1242 self._match(TokenType.EQ) 1243 1244 on = None 1245 if self._match(TokenType.ON): 1246 on = True 1247 elif self._match_text_seq("OFF"): 1248 on = False 1249 default = self._match(TokenType.DEFAULT) 1250 1251 return self.expression( 1252 exp.ChecksumProperty, 1253 on=on, 1254 default=default, 1255 ) 1256 1257 def _parse_freespace(self) -> exp.Expression: 1258 self._match_text_seq("FREESPACE") 1259 self._match(TokenType.EQ) 1260 return self.expression( 1261 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1262 ) 1263 1264 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: 1265 self._match_text_seq("MERGEBLOCKRATIO") 1266 if self._match(TokenType.EQ): 1267 return self.expression( 1268 exp.MergeBlockRatioProperty, 1269 this=self._parse_number(), 1270 percent=self._match(TokenType.PERCENT), 1271 ) 1272 else: 1273 return self.expression( 1274 exp.MergeBlockRatioProperty, 1275 no=no, 1276 default=default, 1277 ) 1278 1279 def _parse_datablocksize(self, default=None) -> exp.Expression: 1280 if default: 1281 self._match_text_seq("DATABLOCKSIZE") 1282 return self.expression(exp.DataBlocksizeProperty, default=True) 1283 elif self._match_texts(("MIN", "MINIMUM")): 1284 self._match_text_seq("DATABLOCKSIZE") 1285 return self.expression(exp.DataBlocksizeProperty, min=True) 1286 elif self._match_texts(("MAX", "MAXIMUM")): 1287 self._match_text_seq("DATABLOCKSIZE") 1288 return self.expression(exp.DataBlocksizeProperty, min=False) 1289 1290 self._match_text_seq("DATABLOCKSIZE") 1291 self._match(TokenType.EQ) 1292 size = self._parse_number() 1293 units = None 1294 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1295 units = self._prev.text 1296 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) 1297 1298 def _parse_blockcompression(self) -> exp.Expression: 1299 self._match_text_seq("BLOCKCOMPRESSION") 1300 self._match(TokenType.EQ) 1301 always = self._match_text_seq("ALWAYS") 1302 manual = self._match_text_seq("MANUAL") 1303 never = self._match_text_seq("NEVER") 1304 default = self._match_text_seq("DEFAULT") 1305 autotemp = None 1306 if self._match_text_seq("AUTOTEMP"): 1307 autotemp = self._parse_schema() 1308 1309 return self.expression( 1310 exp.BlockCompressionProperty, 1311 always=always, 1312 manual=manual, 1313 never=never, 1314 default=default, 1315 autotemp=autotemp, 1316 ) 1317 1318 def _parse_withisolatedloading(self) -> exp.Expression: 1319 self._match(TokenType.WITH) 1320 no = self._match_text_seq("NO") 1321 concurrent = self._match_text_seq("CONCURRENT") 1322 self._match_text_seq("ISOLATED", "LOADING") 1323 for_all = self._match_text_seq("FOR", "ALL") 1324 for_insert = self._match_text_seq("FOR", "INSERT") 1325 for_none = self._match_text_seq("FOR", "NONE") 1326 return self.expression( 1327 exp.IsolatedLoadingProperty, 1328 no=no, 1329 concurrent=concurrent, 1330 for_all=for_all, 1331 for_insert=for_insert, 1332 for_none=for_none, 1333 ) 1334 1335 def _parse_locking(self) -> exp.Expression: 1336 if self._match(TokenType.TABLE): 1337 kind = "TABLE" 1338 elif self._match(TokenType.VIEW): 1339 kind = "VIEW" 1340 elif self._match(TokenType.ROW): 1341 kind = "ROW" 1342 elif self._match_text_seq("DATABASE"): 1343 kind = "DATABASE" 1344 else: 1345 kind = None 1346 1347 if kind in ("DATABASE", "TABLE", "VIEW"): 1348 this = self._parse_table_parts() 1349 else: 1350 this = None 1351 1352 if self._match(TokenType.FOR): 1353 for_or_in = "FOR" 1354 elif self._match(TokenType.IN): 1355 for_or_in = "IN" 1356 else: 1357 for_or_in = None 1358 1359 if self._match_text_seq("ACCESS"): 1360 lock_type = "ACCESS" 1361 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1362 lock_type = "EXCLUSIVE" 1363 elif self._match_text_seq("SHARE"): 1364 lock_type = "SHARE" 1365 elif self._match_text_seq("READ"): 1366 lock_type = "READ" 1367 elif self._match_text_seq("WRITE"): 1368 lock_type = "WRITE" 1369 elif self._match_text_seq("CHECKSUM"): 1370 lock_type = "CHECKSUM" 1371 else: 1372 lock_type = None 1373 1374 override = self._match_text_seq("OVERRIDE") 1375 1376 return self.expression( 1377 exp.LockingProperty, 1378 this=this, 1379 kind=kind, 1380 for_or_in=for_or_in, 1381 lock_type=lock_type, 1382 override=override, 1383 ) 1384 1385 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1386 if self._match(TokenType.PARTITION_BY): 1387 return self._parse_csv(self._parse_conjunction) 1388 return [] 1389 1390 def _parse_partitioned_by(self) -> exp.Expression: 1391 self._match(TokenType.EQ) 1392 return self.expression( 1393 exp.PartitionedByProperty, 1394 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1395 ) 1396 1397 def _parse_distkey(self) -> exp.Expression: 1398 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1399 1400 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1401 table = self._parse_table(schema=True) 1402 options = [] 1403 while self._match_texts(("INCLUDING", "EXCLUDING")): 1404 this = self._prev.text.upper() 1405 id_var = self._parse_id_var() 1406 1407 if not id_var: 1408 return None 1409 1410 options.append( 1411 self.expression( 1412 exp.Property, 1413 this=this, 1414 value=exp.Var(this=id_var.this.upper()), 1415 ) 1416 ) 1417 return self.expression(exp.LikeProperty, this=table, expressions=options) 1418 1419 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1420 return self.expression( 1421 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1422 ) 1423 1424 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1425 self._match(TokenType.EQ) 1426 return self.expression( 1427 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1428 ) 1429 1430 def _parse_returns(self) -> exp.Expression: 1431 value: t.Optional[exp.Expression] 1432 is_table = self._match(TokenType.TABLE) 1433 1434 if is_table: 1435 if self._match(TokenType.LT): 1436 value = self.expression( 1437 exp.Schema, 1438 this="TABLE", 1439 expressions=self._parse_csv(self._parse_struct_kwargs), 1440 ) 1441 if not self._match(TokenType.GT): 1442 self.raise_error("Expecting >") 1443 else: 1444 value = self._parse_schema(exp.Var(this="TABLE")) 1445 else: 1446 value = self._parse_types() 1447 1448 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1449 1450 def _parse_describe(self) -> exp.Expression: 1451 kind = self._match_set(self.CREATABLES) and self._prev.text 1452 this = self._parse_table() 1453 1454 return self.expression(exp.Describe, this=this, kind=kind) 1455 1456 def _parse_insert(self) -> exp.Expression: 1457 overwrite = self._match(TokenType.OVERWRITE) 1458 local = self._match(TokenType.LOCAL) 1459 1460 this: t.Optional[exp.Expression] 1461 1462 alternative = None 1463 if self._match_text_seq("DIRECTORY"): 1464 this = self.expression( 1465 exp.Directory, 1466 this=self._parse_var_or_string(), 1467 local=local, 1468 row_format=self._parse_row_format(match_row=True), 1469 ) 1470 else: 1471 if self._match(TokenType.OR): 1472 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1473 1474 self._match(TokenType.INTO) 1475 self._match(TokenType.TABLE) 1476 this = self._parse_table(schema=True) 1477 1478 return self.expression( 1479 exp.Insert, 1480 this=this, 1481 exists=self._parse_exists(), 1482 partition=self._parse_partition(), 1483 expression=self._parse_ddl_select(), 1484 overwrite=overwrite, 1485 alternative=alternative, 1486 ) 1487 1488 def _parse_row(self) -> t.Optional[exp.Expression]: 1489 if not self._match(TokenType.FORMAT): 1490 return None 1491 return self._parse_row_format() 1492 1493 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1494 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1495 return None 1496 1497 if self._match_text_seq("SERDE"): 1498 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1499 1500 self._match_text_seq("DELIMITED") 1501 1502 kwargs = {} 1503 1504 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1505 kwargs["fields"] = self._parse_string() 1506 if self._match_text_seq("ESCAPED", "BY"): 1507 kwargs["escaped"] = self._parse_string() 1508 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1509 kwargs["collection_items"] = self._parse_string() 1510 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1511 kwargs["map_keys"] = self._parse_string() 1512 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1513 kwargs["lines"] = self._parse_string() 1514 if self._match_text_seq("NULL", "DEFINED", "AS"): 1515 kwargs["null"] = self._parse_string() 1516 1517 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1518 1519 def _parse_load_data(self) -> exp.Expression: 1520 local = self._match(TokenType.LOCAL) 1521 self._match_text_seq("INPATH") 1522 inpath = self._parse_string() 1523 overwrite = self._match(TokenType.OVERWRITE) 1524 self._match_pair(TokenType.INTO, TokenType.TABLE) 1525 1526 return self.expression( 1527 exp.LoadData, 1528 this=self._parse_table(schema=True), 1529 local=local, 1530 overwrite=overwrite, 1531 inpath=inpath, 1532 partition=self._parse_partition(), 1533 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1534 serde=self._match_text_seq("SERDE") and self._parse_string(), 1535 ) 1536 1537 def _parse_delete(self) -> exp.Expression: 1538 self._match(TokenType.FROM) 1539 1540 return self.expression( 1541 exp.Delete, 1542 this=self._parse_table(schema=True), 1543 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1544 where=self._parse_where(), 1545 ) 1546 1547 def _parse_update(self) -> exp.Expression: 1548 return self.expression( 1549 exp.Update, 1550 **{ # type: ignore 1551 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1552 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1553 "from": self._parse_from(), 1554 "where": self._parse_where(), 1555 }, 1556 ) 1557 1558 def _parse_uncache(self) -> exp.Expression: 1559 if not self._match(TokenType.TABLE): 1560 self.raise_error("Expecting TABLE after UNCACHE") 1561 1562 return self.expression( 1563 exp.Uncache, 1564 exists=self._parse_exists(), 1565 this=self._parse_table(schema=True), 1566 ) 1567 1568 def _parse_cache(self) -> exp.Expression: 1569 lazy = self._match(TokenType.LAZY) 1570 self._match(TokenType.TABLE) 1571 table = self._parse_table(schema=True) 1572 options = [] 1573 1574 if self._match(TokenType.OPTIONS): 1575 self._match_l_paren() 1576 k = self._parse_string() 1577 self._match(TokenType.EQ) 1578 v = self._parse_string() 1579 options = [k, v] 1580 self._match_r_paren() 1581 1582 self._match(TokenType.ALIAS) 1583 return self.expression( 1584 exp.Cache, 1585 this=table, 1586 lazy=lazy, 1587 options=options, 1588 expression=self._parse_select(nested=True), 1589 ) 1590 1591 def _parse_partition(self) -> t.Optional[exp.Expression]: 1592 if not self._match(TokenType.PARTITION): 1593 return None 1594 1595 return self.expression( 1596 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1597 ) 1598 1599 def _parse_value(self) -> exp.Expression: 1600 if self._match(TokenType.L_PAREN): 1601 expressions = self._parse_csv(self._parse_conjunction) 1602 self._match_r_paren() 1603 return self.expression(exp.Tuple, expressions=expressions) 1604 1605 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1606 # Source: https://prestodb.io/docs/current/sql/values.html 1607 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1608 1609 def _parse_select( 1610 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1611 ) -> t.Optional[exp.Expression]: 1612 cte = self._parse_with() 1613 if cte: 1614 this = self._parse_statement() 1615 1616 if not this: 1617 self.raise_error("Failed to parse any statement following CTE") 1618 return cte 1619 1620 if "with" in this.arg_types: 1621 this.set("with", cte) 1622 else: 1623 self.raise_error(f"{this.key} does not support CTE") 1624 this = cte 1625 elif self._match(TokenType.SELECT): 1626 comments = self._prev_comments 1627 1628 hint = self._parse_hint() 1629 all_ = self._match(TokenType.ALL) 1630 distinct = self._match(TokenType.DISTINCT) 1631 1632 if distinct: 1633 distinct = self.expression( 1634 exp.Distinct, 1635 on=self._parse_value() if self._match(TokenType.ON) else None, 1636 ) 1637 1638 if all_ and distinct: 1639 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1640 1641 limit = self._parse_limit(top=True) 1642 expressions = self._parse_csv(self._parse_expression) 1643 1644 this = self.expression( 1645 exp.Select, 1646 hint=hint, 1647 distinct=distinct, 1648 expressions=expressions, 1649 limit=limit, 1650 ) 1651 this.comments = comments 1652 1653 into = self._parse_into() 1654 if into: 1655 this.set("into", into) 1656 1657 from_ = self._parse_from() 1658 if from_: 1659 this.set("from", from_) 1660 1661 self._parse_query_modifiers(this) 1662 elif (table or nested) and self._match(TokenType.L_PAREN): 1663 this = self._parse_table() if table else self._parse_select(nested=True) 1664 self._parse_query_modifiers(this) 1665 this = self._parse_set_operations(this) 1666 self._match_r_paren() 1667 1668 # early return so that subquery unions aren't parsed again 1669 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1670 # Union ALL should be a property of the top select node, not the subquery 1671 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1672 elif self._match(TokenType.VALUES): 1673 this = self.expression( 1674 exp.Values, 1675 expressions=self._parse_csv(self._parse_value), 1676 alias=self._parse_table_alias(), 1677 ) 1678 else: 1679 this = None 1680 1681 return self._parse_set_operations(this) 1682 1683 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1684 if not skip_with_token and not self._match(TokenType.WITH): 1685 return None 1686 1687 recursive = self._match(TokenType.RECURSIVE) 1688 1689 expressions = [] 1690 while True: 1691 expressions.append(self._parse_cte()) 1692 1693 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1694 break 1695 else: 1696 self._match(TokenType.WITH) 1697 1698 return self.expression(exp.With, expressions=expressions, recursive=recursive) 1699 1700 def _parse_cte(self) -> exp.Expression: 1701 alias = self._parse_table_alias() 1702 if not alias or not alias.this: 1703 self.raise_error("Expected CTE to have alias") 1704 1705 self._match(TokenType.ALIAS) 1706 1707 return self.expression( 1708 exp.CTE, 1709 this=self._parse_wrapped(self._parse_statement), 1710 alias=alias, 1711 ) 1712 1713 def _parse_table_alias( 1714 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1715 ) -> t.Optional[exp.Expression]: 1716 any_token = self._match(TokenType.ALIAS) 1717 alias = self._parse_id_var( 1718 any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 1719 ) 1720 index = self._index 1721 1722 if self._match(TokenType.L_PAREN): 1723 columns = self._parse_csv(self._parse_function_parameter) 1724 self._match_r_paren() if columns else self._retreat(index) 1725 else: 1726 columns = None 1727 1728 if not alias and not columns: 1729 return None 1730 1731 return self.expression(exp.TableAlias, this=alias, columns=columns) 1732 1733 def _parse_subquery( 1734 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1735 ) -> exp.Expression: 1736 return self.expression( 1737 exp.Subquery, 1738 this=this, 1739 pivots=self._parse_pivots(), 1740 alias=self._parse_table_alias() if parse_alias else None, 1741 ) 1742 1743 def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None: 1744 if not isinstance(this, self.MODIFIABLES): 1745 return 1746 1747 table = isinstance(this, exp.Table) 1748 1749 while True: 1750 lateral = self._parse_lateral() 1751 join = self._parse_join() 1752 comma = None if table else self._match(TokenType.COMMA) 1753 if lateral: 1754 this.append("laterals", lateral) 1755 if join: 1756 this.append("joins", join) 1757 if comma: 1758 this.args["from"].append("expressions", self._parse_table()) 1759 if not (lateral or join or comma): 1760 break 1761 1762 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1763 expression = parser(self) 1764 1765 if expression: 1766 this.set(key, expression) 1767 1768 def _parse_hint(self) -> t.Optional[exp.Expression]: 1769 if self._match(TokenType.HINT): 1770 hints = self._parse_csv(self._parse_function) 1771 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 1772 self.raise_error("Expected */ after HINT") 1773 return self.expression(exp.Hint, expressions=hints) 1774 1775 return None 1776 1777 def _parse_into(self) -> t.Optional[exp.Expression]: 1778 if not self._match(TokenType.INTO): 1779 return None 1780 1781 temp = self._match(TokenType.TEMPORARY) 1782 unlogged = self._match(TokenType.UNLOGGED) 1783 self._match(TokenType.TABLE) 1784 1785 return self.expression( 1786 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 1787 ) 1788 1789 def _parse_from(self) -> t.Optional[exp.Expression]: 1790 if not self._match(TokenType.FROM): 1791 return None 1792 1793 return self.expression( 1794 exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table) 1795 ) 1796 1797 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 1798 if not self._match(TokenType.MATCH_RECOGNIZE): 1799 return None 1800 self._match_l_paren() 1801 1802 partition = self._parse_partition_by() 1803 order = self._parse_order() 1804 measures = ( 1805 self._parse_alias(self._parse_conjunction()) 1806 if self._match_text_seq("MEASURES") 1807 else None 1808 ) 1809 1810 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 1811 rows = exp.Var(this="ONE ROW PER MATCH") 1812 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 1813 text = "ALL ROWS PER MATCH" 1814 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 1815 text += f" SHOW EMPTY MATCHES" 1816 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 1817 text += f" OMIT EMPTY MATCHES" 1818 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 1819 text += f" WITH UNMATCHED ROWS" 1820 rows = exp.Var(this=text) 1821 else: 1822 rows = None 1823 1824 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 1825 text = "AFTER MATCH SKIP" 1826 if self._match_text_seq("PAST", "LAST", "ROW"): 1827 text += f" PAST LAST ROW" 1828 elif self._match_text_seq("TO", "NEXT", "ROW"): 1829 text += f" TO NEXT ROW" 1830 elif self._match_text_seq("TO", "FIRST"): 1831 text += f" TO FIRST {self._advance_any().text}" # type: ignore 1832 elif self._match_text_seq("TO", "LAST"): 1833 text += f" TO LAST {self._advance_any().text}" # type: ignore 1834 after = exp.Var(this=text) 1835 else: 1836 after = None 1837 1838 if self._match_text_seq("PATTERN"): 1839 self._match_l_paren() 1840 1841 if not self._curr: 1842 self.raise_error("Expecting )", self._curr) 1843 1844 paren = 1 1845 start = self._curr 1846 1847 while self._curr and paren > 0: 1848 if self._curr.token_type == TokenType.L_PAREN: 1849 paren += 1 1850 if self._curr.token_type == TokenType.R_PAREN: 1851 paren -= 1 1852 end = self._prev 1853 self._advance() 1854 if paren > 0: 1855 self.raise_error("Expecting )", self._curr) 1856 pattern = exp.Var(this=self._find_sql(start, end)) 1857 else: 1858 pattern = None 1859 1860 define = ( 1861 self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None 1862 ) 1863 self._match_r_paren() 1864 1865 return self.expression( 1866 exp.MatchRecognize, 1867 partition_by=partition, 1868 order=order, 1869 measures=measures, 1870 rows=rows, 1871 after=after, 1872 pattern=pattern, 1873 define=define, 1874 ) 1875 1876 def _parse_lateral(self) -> t.Optional[exp.Expression]: 1877 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 1878 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 1879 1880 if outer_apply or cross_apply: 1881 this = self._parse_select(table=True) 1882 view = None 1883 outer = not cross_apply 1884 elif self._match(TokenType.LATERAL): 1885 this = self._parse_select(table=True) 1886 view = self._match(TokenType.VIEW) 1887 outer = self._match(TokenType.OUTER) 1888 else: 1889 return None 1890 1891 if not this: 1892 this = self._parse_function() or self._parse_id_var(any_token=False) 1893 while self._match(TokenType.DOT): 1894 this = exp.Dot( 1895 this=this, 1896 expression=self._parse_function() or self._parse_id_var(any_token=False), 1897 ) 1898 1899 table_alias: t.Optional[exp.Expression] 1900 1901 if view: 1902 table = self._parse_id_var(any_token=False) 1903 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 1904 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 1905 else: 1906 table_alias = self._parse_table_alias() 1907 1908 expression = self.expression( 1909 exp.Lateral, 1910 this=this, 1911 view=view, 1912 outer=outer, 1913 alias=table_alias, 1914 ) 1915 1916 if outer_apply or cross_apply: 1917 return self.expression(exp.Join, this=expression, side=None if cross_apply else "LEFT") 1918 1919 return expression 1920 1921 def _parse_join_side_and_kind( 1922 self, 1923 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 1924 return ( 1925 self._match(TokenType.NATURAL) and self._prev, 1926 self._match_set(self.JOIN_SIDES) and self._prev, 1927 self._match_set(self.JOIN_KINDS) and self._prev, 1928 ) 1929 1930 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 1931 natural, side, kind = self._parse_join_side_and_kind() 1932 1933 if not skip_join_token and not self._match(TokenType.JOIN): 1934 return None 1935 1936 kwargs: t.Dict[ 1937 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 1938 ] = {"this": self._parse_table()} 1939 1940 if natural: 1941 kwargs["natural"] = True 1942 if side: 1943 kwargs["side"] = side.text 1944 if kind: 1945 kwargs["kind"] = kind.text 1946 1947 if self._match(TokenType.ON): 1948 kwargs["on"] = self._parse_conjunction() 1949 elif self._match(TokenType.USING): 1950 kwargs["using"] = self._parse_wrapped_id_vars() 1951 1952 return self.expression(exp.Join, **kwargs) # type: ignore 1953 1954 def _parse_index(self) -> exp.Expression: 1955 index = self._parse_id_var() 1956 self._match(TokenType.ON) 1957 self._match(TokenType.TABLE) # hive 1958 1959 return self.expression( 1960 exp.Index, 1961 this=index, 1962 table=self.expression(exp.Table, this=self._parse_id_var()), 1963 columns=self._parse_expression(), 1964 ) 1965 1966 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: 1967 unique = self._match(TokenType.UNIQUE) 1968 primary = self._match_text_seq("PRIMARY") 1969 amp = self._match_text_seq("AMP") 1970 if not self._match(TokenType.INDEX): 1971 return None 1972 index = self._parse_id_var() 1973 columns = None 1974 if self._match(TokenType.L_PAREN, advance=False): 1975 columns = self._parse_wrapped_csv(self._parse_column) 1976 return self.expression( 1977 exp.Index, 1978 this=index, 1979 columns=columns, 1980 unique=unique, 1981 primary=primary, 1982 amp=amp, 1983 ) 1984 1985 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 1986 catalog = None 1987 db = None 1988 table = (not schema and self._parse_function()) or self._parse_id_var(any_token=False) 1989 1990 while self._match(TokenType.DOT): 1991 if catalog: 1992 # This allows nesting the table in arbitrarily many dot expressions if needed 1993 table = self.expression(exp.Dot, this=table, expression=self._parse_id_var()) 1994 else: 1995 catalog = db 1996 db = table 1997 table = self._parse_id_var() 1998 1999 if not table: 2000 self.raise_error(f"Expected table name but got {self._curr}") 2001 2002 return self.expression( 2003 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2004 ) 2005 2006 def _parse_table( 2007 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2008 ) -> t.Optional[exp.Expression]: 2009 lateral = self._parse_lateral() 2010 2011 if lateral: 2012 return lateral 2013 2014 unnest = self._parse_unnest() 2015 2016 if unnest: 2017 return unnest 2018 2019 values = self._parse_derived_table_values() 2020 2021 if values: 2022 return values 2023 2024 subquery = self._parse_select(table=True) 2025 2026 if subquery: 2027 return subquery 2028 2029 this = self._parse_table_parts(schema=schema) 2030 2031 if schema: 2032 return self._parse_schema(this=this) 2033 2034 if self.alias_post_tablesample: 2035 table_sample = self._parse_table_sample() 2036 2037 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2038 2039 if alias: 2040 this.set("alias", alias) 2041 2042 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2043 this.set( 2044 "hints", 2045 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2046 ) 2047 self._match_r_paren() 2048 2049 if not self.alias_post_tablesample: 2050 table_sample = self._parse_table_sample() 2051 2052 if table_sample: 2053 table_sample.set("this", this) 2054 this = table_sample 2055 2056 return this 2057 2058 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2059 if not self._match(TokenType.UNNEST): 2060 return None 2061 2062 expressions = self._parse_wrapped_csv(self._parse_column) 2063 ordinality = bool(self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY)) 2064 alias = self._parse_table_alias() 2065 2066 if alias and self.unnest_column_only: 2067 if alias.args.get("columns"): 2068 self.raise_error("Unexpected extra column alias in unnest.") 2069 alias.set("columns", [alias.this]) 2070 alias.set("this", None) 2071 2072 offset = None 2073 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2074 self._match(TokenType.ALIAS) 2075 offset = self._parse_conjunction() 2076 2077 return self.expression( 2078 exp.Unnest, 2079 expressions=expressions, 2080 ordinality=ordinality, 2081 alias=alias, 2082 offset=offset, 2083 ) 2084 2085 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2086 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2087 if not is_derived and not self._match(TokenType.VALUES): 2088 return None 2089 2090 expressions = self._parse_csv(self._parse_value) 2091 2092 if is_derived: 2093 self._match_r_paren() 2094 2095 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2096 2097 def _parse_table_sample(self) -> t.Optional[exp.Expression]: 2098 if not self._match(TokenType.TABLE_SAMPLE): 2099 return None 2100 2101 method = self._parse_var() 2102 bucket_numerator = None 2103 bucket_denominator = None 2104 bucket_field = None 2105 percent = None 2106 rows = None 2107 size = None 2108 seed = None 2109 2110 self._match_l_paren() 2111 2112 if self._match(TokenType.BUCKET): 2113 bucket_numerator = self._parse_number() 2114 self._match(TokenType.OUT_OF) 2115 bucket_denominator = bucket_denominator = self._parse_number() 2116 self._match(TokenType.ON) 2117 bucket_field = self._parse_field() 2118 else: 2119 num = self._parse_number() 2120 2121 if self._match(TokenType.PERCENT): 2122 percent = num 2123 elif self._match(TokenType.ROWS): 2124 rows = num 2125 else: 2126 size = num 2127 2128 self._match_r_paren() 2129 2130 if self._match(TokenType.SEED): 2131 seed = self._parse_wrapped(self._parse_number) 2132 2133 return self.expression( 2134 exp.TableSample, 2135 method=method, 2136 bucket_numerator=bucket_numerator, 2137 bucket_denominator=bucket_denominator, 2138 bucket_field=bucket_field, 2139 percent=percent, 2140 rows=rows, 2141 size=size, 2142 seed=seed, 2143 ) 2144 2145 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2146 return list(iter(self._parse_pivot, None)) 2147 2148 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2149 index = self._index 2150 2151 if self._match(TokenType.PIVOT): 2152 unpivot = False 2153 elif self._match(TokenType.UNPIVOT): 2154 unpivot = True 2155 else: 2156 return None 2157 2158 expressions = [] 2159 field = None 2160 2161 if not self._match(TokenType.L_PAREN): 2162 self._retreat(index) 2163 return None 2164 2165 if unpivot: 2166 expressions = self._parse_csv(self._parse_column) 2167 else: 2168 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2169 2170 if not self._match(TokenType.FOR): 2171 self.raise_error("Expecting FOR") 2172 2173 value = self._parse_column() 2174 2175 if not self._match(TokenType.IN): 2176 self.raise_error("Expecting IN") 2177 2178 field = self._parse_in(value) 2179 2180 self._match_r_paren() 2181 2182 return self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2183 2184 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2185 if not skip_where_token and not self._match(TokenType.WHERE): 2186 return None 2187 2188 return self.expression( 2189 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2190 ) 2191 2192 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2193 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2194 return None 2195 2196 elements = defaultdict(list) 2197 2198 while True: 2199 expressions = self._parse_csv(self._parse_conjunction) 2200 if expressions: 2201 elements["expressions"].extend(expressions) 2202 2203 grouping_sets = self._parse_grouping_sets() 2204 if grouping_sets: 2205 elements["grouping_sets"].extend(grouping_sets) 2206 2207 rollup = None 2208 cube = None 2209 2210 with_ = self._match(TokenType.WITH) 2211 if self._match(TokenType.ROLLUP): 2212 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2213 elements["rollup"].extend(ensure_list(rollup)) 2214 2215 if self._match(TokenType.CUBE): 2216 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2217 elements["cube"].extend(ensure_list(cube)) 2218 2219 if not (expressions or grouping_sets or rollup or cube): 2220 break 2221 2222 return self.expression(exp.Group, **elements) # type: ignore 2223 2224 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2225 if not self._match(TokenType.GROUPING_SETS): 2226 return None 2227 2228 return self._parse_wrapped_csv(self._parse_grouping_set) 2229 2230 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2231 if self._match(TokenType.L_PAREN): 2232 grouping_set = self._parse_csv(self._parse_column) 2233 self._match_r_paren() 2234 return self.expression(exp.Tuple, expressions=grouping_set) 2235 2236 return self._parse_column() 2237 2238 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2239 if not skip_having_token and not self._match(TokenType.HAVING): 2240 return None 2241 return self.expression(exp.Having, this=self._parse_conjunction()) 2242 2243 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2244 if not self._match(TokenType.QUALIFY): 2245 return None 2246 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2247 2248 def _parse_order( 2249 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2250 ) -> t.Optional[exp.Expression]: 2251 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2252 return this 2253 2254 return self.expression( 2255 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2256 ) 2257 2258 def _parse_sort( 2259 self, token_type: TokenType, exp_class: t.Type[exp.Expression] 2260 ) -> t.Optional[exp.Expression]: 2261 if not self._match(token_type): 2262 return None 2263 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2264 2265 def _parse_ordered(self) -> exp.Expression: 2266 this = self._parse_conjunction() 2267 self._match(TokenType.ASC) 2268 is_desc = self._match(TokenType.DESC) 2269 is_nulls_first = self._match(TokenType.NULLS_FIRST) 2270 is_nulls_last = self._match(TokenType.NULLS_LAST) 2271 desc = is_desc or False 2272 asc = not desc 2273 nulls_first = is_nulls_first or False 2274 explicitly_null_ordered = is_nulls_first or is_nulls_last 2275 if ( 2276 not explicitly_null_ordered 2277 and ( 2278 (asc and self.null_ordering == "nulls_are_small") 2279 or (desc and self.null_ordering != "nulls_are_small") 2280 ) 2281 and self.null_ordering != "nulls_are_last" 2282 ): 2283 nulls_first = True 2284 2285 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2286 2287 def _parse_limit( 2288 self, this: t.Optional[exp.Expression] = None, top: bool = False 2289 ) -> t.Optional[exp.Expression]: 2290 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2291 limit_paren = self._match(TokenType.L_PAREN) 2292 limit_exp = self.expression( 2293 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2294 ) 2295 2296 if limit_paren: 2297 self._match_r_paren() 2298 2299 return limit_exp 2300 2301 if self._match(TokenType.FETCH): 2302 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2303 direction = self._prev.text if direction else "FIRST" 2304 count = self._parse_number() 2305 self._match_set((TokenType.ROW, TokenType.ROWS)) 2306 self._match(TokenType.ONLY) 2307 return self.expression(exp.Fetch, direction=direction, count=count) 2308 2309 return this 2310 2311 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2312 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2313 return this 2314 2315 count = self._parse_number() 2316 self._match_set((TokenType.ROW, TokenType.ROWS)) 2317 return self.expression(exp.Offset, this=this, expression=count) 2318 2319 def _parse_lock(self) -> t.Optional[exp.Expression]: 2320 if self._match_text_seq("FOR", "UPDATE"): 2321 return self.expression(exp.Lock, update=True) 2322 if self._match_text_seq("FOR", "SHARE"): 2323 return self.expression(exp.Lock, update=False) 2324 2325 return None 2326 2327 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2328 if not self._match_set(self.SET_OPERATIONS): 2329 return this 2330 2331 token_type = self._prev.token_type 2332 2333 if token_type == TokenType.UNION: 2334 expression = exp.Union 2335 elif token_type == TokenType.EXCEPT: 2336 expression = exp.Except 2337 else: 2338 expression = exp.Intersect 2339 2340 return self.expression( 2341 expression, 2342 this=this, 2343 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2344 expression=self._parse_set_operations(self._parse_select(nested=True)), 2345 ) 2346 2347 def _parse_expression(self) -> t.Optional[exp.Expression]: 2348 return self._parse_alias(self._parse_conjunction()) 2349 2350 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2351 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2352 2353 def _parse_equality(self) -> t.Optional[exp.Expression]: 2354 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2355 2356 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2357 return self._parse_tokens(self._parse_range, self.COMPARISON) 2358 2359 def _parse_range(self) -> t.Optional[exp.Expression]: 2360 this = self._parse_bitwise() 2361 negate = self._match(TokenType.NOT) 2362 2363 if self._match_set(self.RANGE_PARSERS): 2364 this = self.RANGE_PARSERS[self._prev.token_type](self, this) 2365 elif self._match(TokenType.ISNULL): 2366 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2367 2368 # Postgres supports ISNULL and NOTNULL for conditions. 2369 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2370 if self._match(TokenType.NOTNULL): 2371 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2372 this = self.expression(exp.Not, this=this) 2373 2374 if negate: 2375 this = self.expression(exp.Not, this=this) 2376 2377 if self._match(TokenType.IS): 2378 this = self._parse_is(this) 2379 2380 return this 2381 2382 def _parse_is(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2383 negate = self._match(TokenType.NOT) 2384 if self._match(TokenType.DISTINCT_FROM): 2385 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2386 return self.expression(klass, this=this, expression=self._parse_expression()) 2387 2388 this = self.expression( 2389 exp.Is, 2390 this=this, 2391 expression=self._parse_null() or self._parse_boolean(), 2392 ) 2393 return self.expression(exp.Not, this=this) if negate else this 2394 2395 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2396 unnest = self._parse_unnest() 2397 if unnest: 2398 this = self.expression(exp.In, this=this, unnest=unnest) 2399 elif self._match(TokenType.L_PAREN): 2400 expressions = self._parse_csv(self._parse_select_or_expression) 2401 2402 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2403 this = self.expression(exp.In, this=this, query=expressions[0]) 2404 else: 2405 this = self.expression(exp.In, this=this, expressions=expressions) 2406 2407 self._match_r_paren() 2408 else: 2409 this = self.expression(exp.In, this=this, field=self._parse_field()) 2410 2411 return this 2412 2413 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2414 low = self._parse_bitwise() 2415 self._match(TokenType.AND) 2416 high = self._parse_bitwise() 2417 return self.expression(exp.Between, this=this, low=low, high=high) 2418 2419 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2420 if not self._match(TokenType.ESCAPE): 2421 return this 2422 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2423 2424 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2425 this = self._parse_term() 2426 2427 while True: 2428 if self._match_set(self.BITWISE): 2429 this = self.expression( 2430 self.BITWISE[self._prev.token_type], 2431 this=this, 2432 expression=self._parse_term(), 2433 ) 2434 elif self._match_pair(TokenType.LT, TokenType.LT): 2435 this = self.expression( 2436 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2437 ) 2438 elif self._match_pair(TokenType.GT, TokenType.GT): 2439 this = self.expression( 2440 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2441 ) 2442 else: 2443 break 2444 2445 return this 2446 2447 def _parse_term(self) -> t.Optional[exp.Expression]: 2448 return self._parse_tokens(self._parse_factor, self.TERM) 2449 2450 def _parse_factor(self) -> t.Optional[exp.Expression]: 2451 return self._parse_tokens(self._parse_unary, self.FACTOR) 2452 2453 def _parse_unary(self) -> t.Optional[exp.Expression]: 2454 if self._match_set(self.UNARY_PARSERS): 2455 return self.UNARY_PARSERS[self._prev.token_type](self) 2456 return self._parse_at_time_zone(self._parse_type()) 2457 2458 def _parse_type(self) -> t.Optional[exp.Expression]: 2459 if self._match(TokenType.INTERVAL): 2460 return self.expression(exp.Interval, this=self._parse_term(), unit=self._parse_var()) 2461 2462 index = self._index 2463 type_token = self._parse_types(check_func=True) 2464 this = self._parse_column() 2465 2466 if type_token: 2467 if this and not isinstance(this, exp.Star): 2468 return self.expression(exp.Cast, this=this, to=type_token) 2469 if not type_token.args.get("expressions"): 2470 self._retreat(index) 2471 return self._parse_column() 2472 return type_token 2473 2474 return this 2475 2476 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2477 index = self._index 2478 2479 prefix = self._match_text_seq("SYSUDTLIB", ".") 2480 2481 if not self._match_set(self.TYPE_TOKENS): 2482 return None 2483 2484 type_token = self._prev.token_type 2485 2486 if type_token == TokenType.PSEUDO_TYPE: 2487 return self.expression(exp.PseudoType, this=self._prev.text) 2488 2489 nested = type_token in self.NESTED_TYPE_TOKENS 2490 is_struct = type_token == TokenType.STRUCT 2491 expressions = None 2492 maybe_func = False 2493 2494 if self._match(TokenType.L_PAREN): 2495 if is_struct: 2496 expressions = self._parse_csv(self._parse_struct_kwargs) 2497 elif nested: 2498 expressions = self._parse_csv(self._parse_types) 2499 else: 2500 expressions = self._parse_csv(self._parse_conjunction) 2501 2502 if not expressions: 2503 self._retreat(index) 2504 return None 2505 2506 self._match_r_paren() 2507 maybe_func = True 2508 2509 if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2510 this = exp.DataType( 2511 this=exp.DataType.Type.ARRAY, 2512 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2513 nested=True, 2514 ) 2515 2516 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2517 this = exp.DataType( 2518 this=exp.DataType.Type.ARRAY, 2519 expressions=[this], 2520 nested=True, 2521 ) 2522 2523 return this 2524 2525 if self._match(TokenType.L_BRACKET): 2526 self._retreat(index) 2527 return None 2528 2529 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2530 if nested and self._match(TokenType.LT): 2531 if is_struct: 2532 expressions = self._parse_csv(self._parse_struct_kwargs) 2533 else: 2534 expressions = self._parse_csv(self._parse_types) 2535 2536 if not self._match(TokenType.GT): 2537 self.raise_error("Expecting >") 2538 2539 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2540 values = self._parse_csv(self._parse_conjunction) 2541 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2542 2543 value: t.Optional[exp.Expression] = None 2544 if type_token in self.TIMESTAMPS: 2545 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: 2546 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2547 elif ( 2548 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ 2549 ): 2550 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2551 elif self._match(TokenType.WITHOUT_TIME_ZONE): 2552 if type_token == TokenType.TIME: 2553 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2554 else: 2555 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2556 2557 maybe_func = maybe_func and value is None 2558 2559 if value is None: 2560 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2561 elif type_token == TokenType.INTERVAL: 2562 value = self.expression(exp.Interval, unit=self._parse_var()) 2563 2564 if maybe_func and check_func: 2565 index2 = self._index 2566 peek = self._parse_string() 2567 2568 if not peek: 2569 self._retreat(index) 2570 return None 2571 2572 self._retreat(index2) 2573 2574 if value: 2575 return value 2576 2577 return exp.DataType( 2578 this=exp.DataType.Type[type_token.value.upper()], 2579 expressions=expressions, 2580 nested=nested, 2581 values=values, 2582 prefix=prefix, 2583 ) 2584 2585 def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]: 2586 if self._curr and self._curr.token_type in self.TYPE_TOKENS: 2587 return self._parse_types() 2588 2589 this = self._parse_id_var() 2590 self._match(TokenType.COLON) 2591 data_type = self._parse_types() 2592 2593 if not data_type: 2594 return None 2595 return self.expression(exp.StructKwarg, this=this, expression=data_type) 2596 2597 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2598 if not self._match(TokenType.AT_TIME_ZONE): 2599 return this 2600 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 2601 2602 def _parse_column(self) -> t.Optional[exp.Expression]: 2603 this = self._parse_field() 2604 if isinstance(this, exp.Identifier): 2605 this = self.expression(exp.Column, this=this) 2606 elif not this: 2607 return self._parse_bracket(this) 2608 this = self._parse_bracket(this) 2609 2610 while self._match_set(self.COLUMN_OPERATORS): 2611 op_token = self._prev.token_type 2612 op = self.COLUMN_OPERATORS.get(op_token) 2613 2614 if op_token == TokenType.DCOLON: 2615 field = self._parse_types() 2616 if not field: 2617 self.raise_error("Expected type") 2618 elif op: 2619 self._advance() 2620 value = self._prev.text 2621 field = ( 2622 exp.Literal.number(value) 2623 if self._prev.token_type == TokenType.NUMBER 2624 else exp.Literal.string(value) 2625 ) 2626 else: 2627 field = self._parse_star() or self._parse_function() or self._parse_id_var() 2628 2629 if isinstance(field, exp.Func): 2630 # bigquery allows function calls like x.y.count(...) 2631 # SAFE.SUBSTR(...) 2632 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 2633 this = self._replace_columns_with_dots(this) 2634 2635 if op: 2636 this = op(self, this, field) 2637 elif isinstance(this, exp.Column) and not this.args.get("schema"): 2638 this = self.expression( 2639 exp.Column, this=field, table=this.this, schema=this.args.get("table") 2640 ) 2641 else: 2642 this = self.expression(exp.Dot, this=this, expression=field) 2643 this = self._parse_bracket(this) 2644 2645 return this 2646 2647 def _parse_primary(self) -> t.Optional[exp.Expression]: 2648 if self._match_set(self.PRIMARY_PARSERS): 2649 token_type = self._prev.token_type 2650 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 2651 2652 if token_type == TokenType.STRING: 2653 expressions = [primary] 2654 while self._match(TokenType.STRING): 2655 expressions.append(exp.Literal.string(self._prev.text)) 2656 if len(expressions) > 1: 2657 return self.expression(exp.Concat, expressions=expressions) 2658 return primary 2659 2660 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 2661 return exp.Literal.number(f"0.{self._prev.text}") 2662 2663 if self._match(TokenType.L_PAREN): 2664 comments = self._prev_comments 2665 query = self._parse_select() 2666 2667 if query: 2668 expressions = [query] 2669 else: 2670 expressions = self._parse_csv( 2671 lambda: self._parse_alias(self._parse_conjunction(), explicit=True) 2672 ) 2673 2674 this = seq_get(expressions, 0) 2675 self._parse_query_modifiers(this) 2676 self._match_r_paren() 2677 2678 if isinstance(this, exp.Subqueryable): 2679 this = self._parse_set_operations( 2680 self._parse_subquery(this=this, parse_alias=False) 2681 ) 2682 elif len(expressions) > 1: 2683 this = self.expression(exp.Tuple, expressions=expressions) 2684 else: 2685 this = self.expression(exp.Paren, this=this) 2686 2687 if this and comments: 2688 this.comments = comments 2689 2690 return this 2691 2692 return None 2693 2694 def _parse_field(self, any_token: bool = False) -> t.Optional[exp.Expression]: 2695 return self._parse_primary() or self._parse_function() or self._parse_id_var(any_token) 2696 2697 def _parse_function( 2698 self, functions: t.Optional[t.Dict[str, t.Callable]] = None 2699 ) -> t.Optional[exp.Expression]: 2700 if not self._curr: 2701 return None 2702 2703 token_type = self._curr.token_type 2704 2705 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 2706 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 2707 2708 if not self._next or self._next.token_type != TokenType.L_PAREN: 2709 if token_type in self.NO_PAREN_FUNCTIONS: 2710 self._advance() 2711 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 2712 2713 return None 2714 2715 if token_type not in self.FUNC_TOKENS: 2716 return None 2717 2718 this = self._curr.text 2719 upper = this.upper() 2720 self._advance(2) 2721 2722 parser = self.FUNCTION_PARSERS.get(upper) 2723 2724 if parser: 2725 this = parser(self) 2726 else: 2727 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 2728 2729 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 2730 this = self.expression(subquery_predicate, this=self._parse_select()) 2731 self._match_r_paren() 2732 return this 2733 2734 if functions is None: 2735 functions = self.FUNCTIONS 2736 2737 function = functions.get(upper) 2738 args = self._parse_csv(self._parse_lambda) 2739 2740 if function: 2741 # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the 2742 # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists. 2743 if count_params(function) == 2: 2744 params = None 2745 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 2746 params = self._parse_csv(self._parse_lambda) 2747 2748 this = function(args, params) 2749 else: 2750 this = function(args) 2751 2752 self.validate_expression(this, args) 2753 else: 2754 this = self.expression(exp.Anonymous, this=this, expressions=args) 2755 2756 self._match_r_paren(this) 2757 return self._parse_window(this) 2758 2759 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 2760 return self._parse_column_def(self._parse_id_var()) 2761 2762 def _parse_user_defined_function( 2763 self, kind: t.Optional[TokenType] = None 2764 ) -> t.Optional[exp.Expression]: 2765 this = self._parse_id_var() 2766 2767 while self._match(TokenType.DOT): 2768 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 2769 2770 if not self._match(TokenType.L_PAREN): 2771 return this 2772 2773 expressions = self._parse_csv(self._parse_function_parameter) 2774 self._match_r_paren() 2775 return self.expression( 2776 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 2777 ) 2778 2779 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 2780 literal = self._parse_primary() 2781 if literal: 2782 return self.expression(exp.Introducer, this=token.text, expression=literal) 2783 2784 return self.expression(exp.Identifier, this=token.text) 2785 2786 def _parse_national(self, token: Token) -> exp.Expression: 2787 return self.expression(exp.National, this=exp.Literal.string(token.text)) 2788 2789 def _parse_session_parameter(self) -> exp.Expression: 2790 kind = None 2791 this = self._parse_id_var() or self._parse_primary() 2792 2793 if this and self._match(TokenType.DOT): 2794 kind = this.name 2795 this = self._parse_var() or self._parse_primary() 2796 2797 return self.expression(exp.SessionParameter, this=this, kind=kind) 2798 2799 def _parse_lambda(self) -> t.Optional[exp.Expression]: 2800 index = self._index 2801 2802 if self._match(TokenType.L_PAREN): 2803 expressions = self._parse_csv(self._parse_id_var) 2804 2805 if not self._match(TokenType.R_PAREN): 2806 self._retreat(index) 2807 else: 2808 expressions = [self._parse_id_var()] 2809 2810 if self._match_set(self.LAMBDAS): 2811 return self.LAMBDAS[self._prev.token_type](self, expressions) 2812 2813 self._retreat(index) 2814 2815 this: t.Optional[exp.Expression] 2816 2817 if self._match(TokenType.DISTINCT): 2818 this = self.expression( 2819 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 2820 ) 2821 else: 2822 this = self._parse_select_or_expression() 2823 2824 if self._match(TokenType.IGNORE_NULLS): 2825 this = self.expression(exp.IgnoreNulls, this=this) 2826 else: 2827 self._match(TokenType.RESPECT_NULLS) 2828 2829 return self._parse_limit(self._parse_order(this)) 2830 2831 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2832 index = self._index 2833 if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT): 2834 self._retreat(index) 2835 return this 2836 2837 args = self._parse_csv( 2838 lambda: self._parse_constraint() 2839 or self._parse_column_def(self._parse_field(any_token=True)) 2840 ) 2841 self._match_r_paren() 2842 return self.expression(exp.Schema, this=this, expressions=args) 2843 2844 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2845 kind = self._parse_types() 2846 2847 if self._match_text_seq("FOR", "ORDINALITY"): 2848 return self.expression(exp.ColumnDef, this=this, ordinality=True) 2849 2850 constraints = [] 2851 while True: 2852 constraint = self._parse_column_constraint() 2853 if not constraint: 2854 break 2855 constraints.append(constraint) 2856 2857 if not kind and not constraints: 2858 return this 2859 2860 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 2861 2862 def _parse_auto_increment(self) -> exp.Expression: 2863 start = None 2864 increment = None 2865 2866 if self._match(TokenType.L_PAREN, advance=False): 2867 args = self._parse_wrapped_csv(self._parse_bitwise) 2868 start = seq_get(args, 0) 2869 increment = seq_get(args, 1) 2870 elif self._match_text_seq("START"): 2871 start = self._parse_bitwise() 2872 self._match_text_seq("INCREMENT") 2873 increment = self._parse_bitwise() 2874 2875 if start and increment: 2876 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 2877 2878 return exp.AutoIncrementColumnConstraint() 2879 2880 def _parse_generated_as_identity(self) -> exp.Expression: 2881 if self._match(TokenType.BY_DEFAULT): 2882 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False) 2883 else: 2884 self._match_text_seq("ALWAYS") 2885 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 2886 2887 self._match_text_seq("AS", "IDENTITY") 2888 if self._match(TokenType.L_PAREN): 2889 if self._match_text_seq("START", "WITH"): 2890 this.set("start", self._parse_bitwise()) 2891 if self._match_text_seq("INCREMENT", "BY"): 2892 this.set("increment", self._parse_bitwise()) 2893 if self._match_text_seq("MINVALUE"): 2894 this.set("minvalue", self._parse_bitwise()) 2895 if self._match_text_seq("MAXVALUE"): 2896 this.set("maxvalue", self._parse_bitwise()) 2897 2898 if self._match_text_seq("CYCLE"): 2899 this.set("cycle", True) 2900 elif self._match_text_seq("NO", "CYCLE"): 2901 this.set("cycle", False) 2902 2903 self._match_r_paren() 2904 2905 return this 2906 2907 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 2908 if self._match_text_seq("NULL"): 2909 return self.expression(exp.NotNullColumnConstraint) 2910 if self._match_text_seq("CASESPECIFIC"): 2911 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 2912 return None 2913 2914 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 2915 this = self._parse_references() 2916 if this: 2917 return this 2918 2919 if self._match(TokenType.CONSTRAINT): 2920 this = self._parse_id_var() 2921 2922 if self._match_texts(self.CONSTRAINT_PARSERS): 2923 return self.expression( 2924 exp.ColumnConstraint, 2925 this=this, 2926 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 2927 ) 2928 2929 return this 2930 2931 def _parse_constraint(self) -> t.Optional[exp.Expression]: 2932 if not self._match(TokenType.CONSTRAINT): 2933 return self._parse_unnamed_constraint() 2934 2935 this = self._parse_id_var() 2936 expressions = [] 2937 2938 while True: 2939 constraint = self._parse_unnamed_constraint() or self._parse_function() 2940 if not constraint: 2941 break 2942 expressions.append(constraint) 2943 2944 return self.expression(exp.Constraint, this=this, expressions=expressions) 2945 2946 def _parse_unnamed_constraint(self) -> t.Optional[exp.Expression]: 2947 if not self._match_texts(self.CONSTRAINT_PARSERS): 2948 return None 2949 return self.CONSTRAINT_PARSERS[self._prev.text.upper()](self) 2950 2951 def _parse_unique(self) -> exp.Expression: 2952 if not self._match(TokenType.L_PAREN, advance=False): 2953 return self.expression(exp.UniqueColumnConstraint) 2954 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 2955 2956 def _parse_key_constraint_options(self) -> t.List[str]: 2957 options = [] 2958 while True: 2959 if not self._curr: 2960 break 2961 2962 if self._match(TokenType.ON): 2963 action = None 2964 on = self._advance_any() and self._prev.text 2965 2966 if self._match(TokenType.NO_ACTION): 2967 action = "NO ACTION" 2968 elif self._match(TokenType.CASCADE): 2969 action = "CASCADE" 2970 elif self._match_pair(TokenType.SET, TokenType.NULL): 2971 action = "SET NULL" 2972 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 2973 action = "SET DEFAULT" 2974 else: 2975 self.raise_error("Invalid key constraint") 2976 2977 options.append(f"ON {on} {action}") 2978 elif self._match_text_seq("NOT", "ENFORCED"): 2979 options.append("NOT ENFORCED") 2980 elif self._match_text_seq("DEFERRABLE"): 2981 options.append("DEFERRABLE") 2982 elif self._match_text_seq("INITIALLY", "DEFERRED"): 2983 options.append("INITIALLY DEFERRED") 2984 elif self._match_text_seq("NORELY"): 2985 options.append("NORELY") 2986 elif self._match_text_seq("MATCH", "FULL"): 2987 options.append("MATCH FULL") 2988 else: 2989 break 2990 2991 return options 2992 2993 def _parse_references(self) -> t.Optional[exp.Expression]: 2994 if not self._match(TokenType.REFERENCES): 2995 return None 2996 2997 expressions = None 2998 this = self._parse_id_var() 2999 3000 if self._match(TokenType.L_PAREN, advance=False): 3001 expressions = self._parse_wrapped_id_vars() 3002 3003 options = self._parse_key_constraint_options() 3004 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3005 3006 def _parse_foreign_key(self) -> exp.Expression: 3007 expressions = self._parse_wrapped_id_vars() 3008 reference = self._parse_references() 3009 options = {} 3010 3011 while self._match(TokenType.ON): 3012 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3013 self.raise_error("Expected DELETE or UPDATE") 3014 3015 kind = self._prev.text.lower() 3016 3017 if self._match(TokenType.NO_ACTION): 3018 action = "NO ACTION" 3019 elif self._match(TokenType.SET): 3020 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3021 action = "SET " + self._prev.text.upper() 3022 else: 3023 self._advance() 3024 action = self._prev.text.upper() 3025 3026 options[kind] = action 3027 3028 return self.expression( 3029 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3030 ) 3031 3032 def _parse_primary_key(self) -> exp.Expression: 3033 desc = ( 3034 self._match_set((TokenType.ASC, TokenType.DESC)) 3035 and self._prev.token_type == TokenType.DESC 3036 ) 3037 3038 if not self._match(TokenType.L_PAREN, advance=False): 3039 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3040 3041 expressions = self._parse_wrapped_id_vars() 3042 options = self._parse_key_constraint_options() 3043 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3044 3045 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3046 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3047 return this 3048 3049 bracket_kind = self._prev.token_type 3050 expressions: t.List[t.Optional[exp.Expression]] 3051 3052 if self._match(TokenType.COLON): 3053 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3054 else: 3055 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3056 3057 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3058 if bracket_kind == TokenType.L_BRACE: 3059 this = self.expression(exp.Struct, expressions=expressions) 3060 elif not this or this.name.upper() == "ARRAY": 3061 this = self.expression(exp.Array, expressions=expressions) 3062 else: 3063 expressions = apply_index_offset(expressions, -self.index_offset) 3064 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3065 3066 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3067 self.raise_error("Expected ]") 3068 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3069 self.raise_error("Expected }") 3070 3071 this.comments = self._prev_comments 3072 return self._parse_bracket(this) 3073 3074 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3075 if self._match(TokenType.COLON): 3076 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3077 return this 3078 3079 def _parse_case(self) -> t.Optional[exp.Expression]: 3080 ifs = [] 3081 default = None 3082 3083 expression = self._parse_conjunction() 3084 3085 while self._match(TokenType.WHEN): 3086 this = self._parse_conjunction() 3087 self._match(TokenType.THEN) 3088 then = self._parse_conjunction() 3089 ifs.append(self.expression(exp.If, this=this, true=then)) 3090 3091 if self._match(TokenType.ELSE): 3092 default = self._parse_conjunction() 3093 3094 if not self._match(TokenType.END): 3095 self.raise_error("Expected END after CASE", self._prev) 3096 3097 return self._parse_window( 3098 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3099 ) 3100 3101 def _parse_if(self) -> t.Optional[exp.Expression]: 3102 if self._match(TokenType.L_PAREN): 3103 args = self._parse_csv(self._parse_conjunction) 3104 this = exp.If.from_arg_list(args) 3105 self.validate_expression(this, args) 3106 self._match_r_paren() 3107 else: 3108 condition = self._parse_conjunction() 3109 self._match(TokenType.THEN) 3110 true = self._parse_conjunction() 3111 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3112 self._match(TokenType.END) 3113 this = self.expression(exp.If, this=condition, true=true, false=false) 3114 3115 return self._parse_window(this) 3116 3117 def _parse_extract(self) -> exp.Expression: 3118 this = self._parse_function() or self._parse_var() or self._parse_type() 3119 3120 if self._match(TokenType.FROM): 3121 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3122 3123 if not self._match(TokenType.COMMA): 3124 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3125 3126 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3127 3128 def _parse_cast(self, strict: bool) -> exp.Expression: 3129 this = self._parse_conjunction() 3130 3131 if not self._match(TokenType.ALIAS): 3132 self.raise_error("Expected AS after CAST") 3133 3134 to = self._parse_types() 3135 3136 if not to: 3137 self.raise_error("Expected TYPE after CAST") 3138 elif to.this == exp.DataType.Type.CHAR: 3139 if self._match(TokenType.CHARACTER_SET): 3140 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3141 3142 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3143 3144 def _parse_string_agg(self) -> exp.Expression: 3145 expression: t.Optional[exp.Expression] 3146 3147 if self._match(TokenType.DISTINCT): 3148 args = self._parse_csv(self._parse_conjunction) 3149 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3150 else: 3151 args = self._parse_csv(self._parse_conjunction) 3152 expression = seq_get(args, 0) 3153 3154 index = self._index 3155 if not self._match(TokenType.R_PAREN): 3156 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3157 order = self._parse_order(this=expression) 3158 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3159 3160 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3161 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3162 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3163 if not self._match(TokenType.WITHIN_GROUP): 3164 self._retreat(index) 3165 this = exp.GroupConcat.from_arg_list(args) 3166 self.validate_expression(this, args) 3167 return this 3168 3169 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3170 order = self._parse_order(this=expression) 3171 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3172 3173 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3174 to: t.Optional[exp.Expression] 3175 this = self._parse_column() 3176 3177 if self._match(TokenType.USING): 3178 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3179 elif self._match(TokenType.COMMA): 3180 to = self._parse_types() 3181 else: 3182 to = None 3183 3184 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3185 3186 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3187 args = self._parse_csv(self._parse_bitwise) 3188 3189 if self._match(TokenType.IN): 3190 return self.expression( 3191 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3192 ) 3193 3194 if haystack_first: 3195 haystack = seq_get(args, 0) 3196 needle = seq_get(args, 1) 3197 else: 3198 needle = seq_get(args, 0) 3199 haystack = seq_get(args, 1) 3200 3201 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3202 3203 self.validate_expression(this, args) 3204 3205 return this 3206 3207 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3208 args = self._parse_csv(self._parse_table) 3209 return exp.JoinHint(this=func_name.upper(), expressions=args) 3210 3211 def _parse_substring(self) -> exp.Expression: 3212 # Postgres supports the form: substring(string [from int] [for int]) 3213 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3214 3215 args = self._parse_csv(self._parse_bitwise) 3216 3217 if self._match(TokenType.FROM): 3218 args.append(self._parse_bitwise()) 3219 if self._match(TokenType.FOR): 3220 args.append(self._parse_bitwise()) 3221 3222 this = exp.Substring.from_arg_list(args) 3223 self.validate_expression(this, args) 3224 3225 return this 3226 3227 def _parse_trim(self) -> exp.Expression: 3228 # https://www.w3resource.com/sql/character-functions/trim.php 3229 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3230 3231 position = None 3232 collation = None 3233 3234 if self._match_set(self.TRIM_TYPES): 3235 position = self._prev.text.upper() 3236 3237 expression = self._parse_term() 3238 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3239 this = self._parse_term() 3240 else: 3241 this = expression 3242 expression = None 3243 3244 if self._match(TokenType.COLLATE): 3245 collation = self._parse_term() 3246 3247 return self.expression( 3248 exp.Trim, 3249 this=this, 3250 position=position, 3251 expression=expression, 3252 collation=collation, 3253 ) 3254 3255 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3256 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3257 3258 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3259 return self._parse_window(self._parse_id_var(), alias=True) 3260 3261 def _parse_window( 3262 self, this: t.Optional[exp.Expression], alias: bool = False 3263 ) -> t.Optional[exp.Expression]: 3264 if self._match(TokenType.FILTER): 3265 where = self._parse_wrapped(self._parse_where) 3266 this = self.expression(exp.Filter, this=this, expression=where) 3267 3268 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3269 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3270 if self._match(TokenType.WITHIN_GROUP): 3271 order = self._parse_wrapped(self._parse_order) 3272 this = self.expression(exp.WithinGroup, this=this, expression=order) 3273 3274 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3275 # Some dialects choose to implement and some do not. 3276 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3277 3278 # There is some code above in _parse_lambda that handles 3279 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3280 3281 # The below changes handle 3282 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3283 3284 # Oracle allows both formats 3285 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3286 # and Snowflake chose to do the same for familiarity 3287 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3288 if self._match(TokenType.IGNORE_NULLS): 3289 this = self.expression(exp.IgnoreNulls, this=this) 3290 elif self._match(TokenType.RESPECT_NULLS): 3291 this = self.expression(exp.RespectNulls, this=this) 3292 3293 # bigquery select from window x AS (partition by ...) 3294 if alias: 3295 self._match(TokenType.ALIAS) 3296 elif not self._match(TokenType.OVER): 3297 return this 3298 3299 if not self._match(TokenType.L_PAREN): 3300 return self.expression(exp.Window, this=this, alias=self._parse_id_var(False)) 3301 3302 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3303 partition = self._parse_partition_by() 3304 order = self._parse_order() 3305 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3306 3307 if kind: 3308 self._match(TokenType.BETWEEN) 3309 start = self._parse_window_spec() 3310 self._match(TokenType.AND) 3311 end = self._parse_window_spec() 3312 3313 spec = self.expression( 3314 exp.WindowSpec, 3315 kind=kind, 3316 start=start["value"], 3317 start_side=start["side"], 3318 end=end["value"], 3319 end_side=end["side"], 3320 ) 3321 else: 3322 spec = None 3323 3324 self._match_r_paren() 3325 3326 return self.expression( 3327 exp.Window, 3328 this=this, 3329 partition_by=partition, 3330 order=order, 3331 spec=spec, 3332 alias=window_alias, 3333 ) 3334 3335 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3336 self._match(TokenType.BETWEEN) 3337 3338 return { 3339 "value": ( 3340 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text 3341 ) 3342 or self._parse_bitwise(), 3343 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, 3344 } 3345 3346 def _parse_alias( 3347 self, this: t.Optional[exp.Expression], explicit: bool = False 3348 ) -> t.Optional[exp.Expression]: 3349 any_token = self._match(TokenType.ALIAS) 3350 3351 if explicit and not any_token: 3352 return this 3353 3354 if self._match(TokenType.L_PAREN): 3355 aliases = self.expression( 3356 exp.Aliases, 3357 this=this, 3358 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3359 ) 3360 self._match_r_paren(aliases) 3361 return aliases 3362 3363 alias = self._parse_id_var(any_token) 3364 3365 if alias: 3366 return self.expression(exp.Alias, this=this, alias=alias) 3367 3368 return this 3369 3370 def _parse_id_var( 3371 self, 3372 any_token: bool = True, 3373 tokens: t.Optional[t.Collection[TokenType]] = None, 3374 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3375 ) -> t.Optional[exp.Expression]: 3376 identifier = self._parse_identifier() 3377 3378 if identifier: 3379 return identifier 3380 3381 prefix = "" 3382 3383 if prefix_tokens: 3384 while self._match_set(prefix_tokens): 3385 prefix += self._prev.text 3386 3387 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3388 quoted = self._prev.token_type == TokenType.STRING 3389 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3390 3391 return None 3392 3393 def _parse_string(self) -> t.Optional[exp.Expression]: 3394 if self._match(TokenType.STRING): 3395 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 3396 return self._parse_placeholder() 3397 3398 def _parse_number(self) -> t.Optional[exp.Expression]: 3399 if self._match(TokenType.NUMBER): 3400 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 3401 return self._parse_placeholder() 3402 3403 def _parse_identifier(self) -> t.Optional[exp.Expression]: 3404 if self._match(TokenType.IDENTIFIER): 3405 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 3406 return self._parse_placeholder() 3407 3408 def _parse_var(self, any_token: bool = False) -> t.Optional[exp.Expression]: 3409 if (any_token and self._advance_any()) or self._match(TokenType.VAR): 3410 return self.expression(exp.Var, this=self._prev.text) 3411 return self._parse_placeholder() 3412 3413 def _advance_any(self) -> t.Optional[Token]: 3414 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 3415 self._advance() 3416 return self._prev 3417 return None 3418 3419 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 3420 return self._parse_var() or self._parse_string() 3421 3422 def _parse_null(self) -> t.Optional[exp.Expression]: 3423 if self._match(TokenType.NULL): 3424 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 3425 return None 3426 3427 def _parse_boolean(self) -> t.Optional[exp.Expression]: 3428 if self._match(TokenType.TRUE): 3429 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 3430 if self._match(TokenType.FALSE): 3431 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 3432 return None 3433 3434 def _parse_star(self) -> t.Optional[exp.Expression]: 3435 if self._match(TokenType.STAR): 3436 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 3437 return None 3438 3439 def _parse_parameter(self) -> exp.Expression: 3440 wrapped = self._match(TokenType.L_BRACE) 3441 this = self._parse_var() or self._parse_primary() 3442 self._match(TokenType.R_BRACE) 3443 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 3444 3445 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 3446 if self._match_set(self.PLACEHOLDER_PARSERS): 3447 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 3448 if placeholder: 3449 return placeholder 3450 self._advance(-1) 3451 return None 3452 3453 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3454 if not self._match(TokenType.EXCEPT): 3455 return None 3456 if self._match(TokenType.L_PAREN, advance=False): 3457 return self._parse_wrapped_csv(self._parse_column) 3458 return self._parse_csv(self._parse_column) 3459 3460 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3461 if not self._match(TokenType.REPLACE): 3462 return None 3463 if self._match(TokenType.L_PAREN, advance=False): 3464 return self._parse_wrapped_csv(self._parse_expression) 3465 return self._parse_csv(self._parse_expression) 3466 3467 def _parse_csv( 3468 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3469 ) -> t.List[t.Optional[exp.Expression]]: 3470 parse_result = parse_method() 3471 items = [parse_result] if parse_result is not None else [] 3472 3473 while self._match(sep): 3474 if parse_result and self._prev_comments: 3475 parse_result.comments = self._prev_comments 3476 3477 parse_result = parse_method() 3478 if parse_result is not None: 3479 items.append(parse_result) 3480 3481 return items 3482 3483 def _parse_tokens( 3484 self, parse_method: t.Callable, expressions: t.Dict 3485 ) -> t.Optional[exp.Expression]: 3486 this = parse_method() 3487 3488 while self._match_set(expressions): 3489 this = self.expression( 3490 expressions[self._prev.token_type], 3491 this=this, 3492 comments=self._prev_comments, 3493 expression=parse_method(), 3494 ) 3495 3496 return this 3497 3498 def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]: 3499 return self._parse_wrapped_csv(self._parse_id_var) 3500 3501 def _parse_wrapped_csv( 3502 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3503 ) -> t.List[t.Optional[exp.Expression]]: 3504 return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep)) 3505 3506 def _parse_wrapped(self, parse_method: t.Callable) -> t.Any: 3507 self._match_l_paren() 3508 parse_result = parse_method() 3509 self._match_r_paren() 3510 return parse_result 3511 3512 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: 3513 return self._parse_select() or self._parse_expression() 3514 3515 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 3516 return self._parse_set_operations( 3517 self._parse_select(nested=True, parse_subquery_alias=False) 3518 ) 3519 3520 def _parse_transaction(self) -> exp.Expression: 3521 this = None 3522 if self._match_texts(self.TRANSACTION_KIND): 3523 this = self._prev.text 3524 3525 self._match_texts({"TRANSACTION", "WORK"}) 3526 3527 modes = [] 3528 while True: 3529 mode = [] 3530 while self._match(TokenType.VAR): 3531 mode.append(self._prev.text) 3532 3533 if mode: 3534 modes.append(" ".join(mode)) 3535 if not self._match(TokenType.COMMA): 3536 break 3537 3538 return self.expression(exp.Transaction, this=this, modes=modes) 3539 3540 def _parse_commit_or_rollback(self) -> exp.Expression: 3541 chain = None 3542 savepoint = None 3543 is_rollback = self._prev.token_type == TokenType.ROLLBACK 3544 3545 self._match_texts({"TRANSACTION", "WORK"}) 3546 3547 if self._match_text_seq("TO"): 3548 self._match_text_seq("SAVEPOINT") 3549 savepoint = self._parse_id_var() 3550 3551 if self._match(TokenType.AND): 3552 chain = not self._match_text_seq("NO") 3553 self._match_text_seq("CHAIN") 3554 3555 if is_rollback: 3556 return self.expression(exp.Rollback, savepoint=savepoint) 3557 return self.expression(exp.Commit, chain=chain) 3558 3559 def _parse_add_column(self) -> t.Optional[exp.Expression]: 3560 if not self._match_text_seq("ADD"): 3561 return None 3562 3563 self._match(TokenType.COLUMN) 3564 exists_column = self._parse_exists(not_=True) 3565 expression = self._parse_column_def(self._parse_field(any_token=True)) 3566 3567 if expression: 3568 expression.set("exists", exists_column) 3569 3570 return expression 3571 3572 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 3573 return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN") 3574 3575 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 3576 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 3577 return self.expression( 3578 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 3579 ) 3580 3581 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 3582 this = None 3583 kind = self._prev.token_type 3584 3585 if kind == TokenType.CONSTRAINT: 3586 this = self._parse_id_var() 3587 3588 if self._match_text_seq("CHECK"): 3589 expression = self._parse_wrapped(self._parse_conjunction) 3590 enforced = self._match_text_seq("ENFORCED") 3591 3592 return self.expression( 3593 exp.AddConstraint, this=this, expression=expression, enforced=enforced 3594 ) 3595 3596 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 3597 expression = self._parse_foreign_key() 3598 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 3599 expression = self._parse_primary_key() 3600 3601 return self.expression(exp.AddConstraint, this=this, expression=expression) 3602 3603 def _parse_alter(self) -> t.Optional[exp.Expression]: 3604 if not self._match(TokenType.TABLE): 3605 return self._parse_as_command(self._prev) 3606 3607 exists = self._parse_exists() 3608 this = self._parse_table(schema=True) 3609 3610 actions: t.Optional[exp.Expression | t.List[t.Optional[exp.Expression]]] = None 3611 3612 index = self._index 3613 if self._match(TokenType.DELETE): 3614 actions = [self.expression(exp.Delete, where=self._parse_where())] 3615 elif self._match_text_seq("ADD"): 3616 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 3617 actions = self._parse_csv(self._parse_add_constraint) 3618 else: 3619 self._retreat(index) 3620 actions = self._parse_csv(self._parse_add_column) 3621 elif self._match_text_seq("DROP"): 3622 partition_exists = self._parse_exists() 3623 3624 if self._match(TokenType.PARTITION, advance=False): 3625 actions = self._parse_csv( 3626 lambda: self._parse_drop_partition(exists=partition_exists) 3627 ) 3628 else: 3629 self._retreat(index) 3630 actions = self._parse_csv(self._parse_drop_column) 3631 elif self._match_text_seq("RENAME", "TO"): 3632 actions = self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 3633 elif self._match_text_seq("ALTER"): 3634 self._match(TokenType.COLUMN) 3635 column = self._parse_field(any_token=True) 3636 3637 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 3638 actions = self.expression(exp.AlterColumn, this=column, drop=True) 3639 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3640 actions = self.expression( 3641 exp.AlterColumn, this=column, default=self._parse_conjunction() 3642 ) 3643 else: 3644 self._match_text_seq("SET", "DATA") 3645 actions = self.expression( 3646 exp.AlterColumn, 3647 this=column, 3648 dtype=self._match_text_seq("TYPE") and self._parse_types(), 3649 collate=self._match(TokenType.COLLATE) and self._parse_term(), 3650 using=self._match(TokenType.USING) and self._parse_conjunction(), 3651 ) 3652 3653 actions = ensure_list(actions) 3654 return self.expression(exp.AlterTable, this=this, exists=exists, actions=actions) 3655 3656 def _parse_show(self) -> t.Optional[exp.Expression]: 3657 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 3658 if parser: 3659 return parser(self) 3660 self._advance() 3661 return self.expression(exp.Show, this=self._prev.text.upper()) 3662 3663 def _default_parse_set_item(self) -> exp.Expression: 3664 return self.expression( 3665 exp.SetItem, 3666 this=self._parse_statement(), 3667 ) 3668 3669 def _parse_set_item(self) -> t.Optional[exp.Expression]: 3670 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 3671 return parser(self) if parser else self._default_parse_set_item() 3672 3673 def _parse_merge(self) -> exp.Expression: 3674 self._match(TokenType.INTO) 3675 target = self._parse_table() 3676 3677 self._match(TokenType.USING) 3678 using = self._parse_table() 3679 3680 self._match(TokenType.ON) 3681 on = self._parse_conjunction() 3682 3683 whens = [] 3684 while self._match(TokenType.WHEN): 3685 this = self._parse_conjunction() 3686 self._match(TokenType.THEN) 3687 3688 if self._match(TokenType.INSERT): 3689 _this = self._parse_star() 3690 if _this: 3691 then = self.expression(exp.Insert, this=_this) 3692 else: 3693 then = self.expression( 3694 exp.Insert, 3695 this=self._parse_value(), 3696 expression=self._match(TokenType.VALUES) and self._parse_value(), 3697 ) 3698 elif self._match(TokenType.UPDATE): 3699 expressions = self._parse_star() 3700 if expressions: 3701 then = self.expression(exp.Update, expressions=expressions) 3702 else: 3703 then = self.expression( 3704 exp.Update, 3705 expressions=self._match(TokenType.SET) 3706 and self._parse_csv(self._parse_equality), 3707 ) 3708 elif self._match(TokenType.DELETE): 3709 then = self.expression(exp.Var, this=self._prev.text) 3710 3711 whens.append(self.expression(exp.When, this=this, then=then)) 3712 3713 return self.expression( 3714 exp.Merge, 3715 this=target, 3716 using=using, 3717 on=on, 3718 expressions=whens, 3719 ) 3720 3721 def _parse_set(self) -> exp.Expression: 3722 return self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 3723 3724 def _parse_as_command(self, start: Token) -> exp.Command: 3725 while self._curr: 3726 self._advance() 3727 return exp.Command(this=self._find_sql(start, self._prev)) 3728 3729 def _find_parser( 3730 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 3731 ) -> t.Optional[t.Callable]: 3732 index = self._index 3733 this = [] 3734 while True: 3735 # The current token might be multiple words 3736 curr = self._curr.text.upper() 3737 key = curr.split(" ") 3738 this.append(curr) 3739 self._advance() 3740 result, trie = in_trie(trie, key) 3741 if result == 0: 3742 break 3743 if result == 2: 3744 subparser = parsers[" ".join(this)] 3745 return subparser 3746 self._retreat(index) 3747 return None 3748 3749 def _match(self, token_type, advance=True): 3750 if not self._curr: 3751 return None 3752 3753 if self._curr.token_type == token_type: 3754 if advance: 3755 self._advance() 3756 return True 3757 3758 return None 3759 3760 def _match_set(self, types): 3761 if not self._curr: 3762 return None 3763 3764 if self._curr.token_type in types: 3765 self._advance() 3766 return True 3767 3768 return None 3769 3770 def _match_pair(self, token_type_a, token_type_b, advance=True): 3771 if not self._curr or not self._next: 3772 return None 3773 3774 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 3775 if advance: 3776 self._advance(2) 3777 return True 3778 3779 return None 3780 3781 def _match_l_paren(self, expression=None): 3782 if not self._match(TokenType.L_PAREN): 3783 self.raise_error("Expecting (") 3784 if expression and self._prev_comments: 3785 expression.comments = self._prev_comments 3786 3787 def _match_r_paren(self, expression=None): 3788 if not self._match(TokenType.R_PAREN): 3789 self.raise_error("Expecting )") 3790 if expression and self._prev_comments: 3791 expression.comments = self._prev_comments 3792 3793 def _match_texts(self, texts): 3794 if self._curr and self._curr.text.upper() in texts: 3795 self._advance() 3796 return True 3797 return False 3798 3799 def _match_text_seq(self, *texts, advance=True): 3800 index = self._index 3801 for text in texts: 3802 if self._curr and self._curr.text.upper() == text: 3803 self._advance() 3804 else: 3805 self._retreat(index) 3806 return False 3807 3808 if not advance: 3809 self._retreat(index) 3810 3811 return True 3812 3813 def _replace_columns_with_dots(self, this): 3814 if isinstance(this, exp.Dot): 3815 exp.replace_children(this, self._replace_columns_with_dots) 3816 elif isinstance(this, exp.Column): 3817 exp.replace_children(this, self._replace_columns_with_dots) 3818 table = this.args.get("table") 3819 this = ( 3820 self.expression(exp.Dot, this=table, expression=this.this) 3821 if table 3822 else self.expression(exp.Var, this=this.name) 3823 ) 3824 elif isinstance(this, exp.Identifier): 3825 this = self.expression(exp.Var, this=this.name) 3826 return this 3827 3828 def _replace_lambda(self, node, lambda_variables): 3829 if isinstance(node, exp.Column): 3830 if node.name in lambda_variables: 3831 return node.this 3832 return node
Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer
and produces
a parsed syntax tree.
Arguments:
- error_level: the desired error level. Default: ErrorLevel.RAISE
- error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
- index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
- alias_post_tablesample: If the table alias comes after tablesample. Default: False
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
699 def __init__( 700 self, 701 error_level: t.Optional[ErrorLevel] = None, 702 error_message_context: int = 100, 703 index_offset: int = 0, 704 unnest_column_only: bool = False, 705 alias_post_tablesample: bool = False, 706 max_errors: int = 3, 707 null_ordering: t.Optional[str] = None, 708 ): 709 self.error_level = error_level or ErrorLevel.IMMEDIATE 710 self.error_message_context = error_message_context 711 self.index_offset = index_offset 712 self.unnest_column_only = unnest_column_only 713 self.alias_post_tablesample = alias_post_tablesample 714 self.max_errors = max_errors 715 self.null_ordering = null_ordering 716 self.reset()
728 def parse( 729 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 730 ) -> t.List[t.Optional[exp.Expression]]: 731 """ 732 Parses a list of tokens and returns a list of syntax trees, one tree 733 per parsed SQL statement. 734 735 Args: 736 raw_tokens: the list of tokens. 737 sql: the original SQL string, used to produce helpful debug messages. 738 739 Returns: 740 The list of syntax trees. 741 """ 742 return self._parse( 743 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 744 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The list of syntax trees.
746 def parse_into( 747 self, 748 expression_types: exp.IntoType, 749 raw_tokens: t.List[Token], 750 sql: t.Optional[str] = None, 751 ) -> t.List[t.Optional[exp.Expression]]: 752 """ 753 Parses a list of tokens into a given Expression type. If a collection of Expression 754 types is given instead, this method will try to parse the token list into each one 755 of them, stopping at the first for which the parsing succeeds. 756 757 Args: 758 expression_types: the expression type(s) to try and parse the token list into. 759 raw_tokens: the list of tokens. 760 sql: the original SQL string, used to produce helpful debug messages. 761 762 Returns: 763 The target Expression. 764 """ 765 errors = [] 766 for expression_type in ensure_collection(expression_types): 767 parser = self.EXPRESSION_PARSERS.get(expression_type) 768 if not parser: 769 raise TypeError(f"No parser registered for {expression_type}") 770 try: 771 return self._parse(parser, raw_tokens, sql) 772 except ParseError as e: 773 e.errors[0]["into_expression"] = expression_type 774 errors.append(e) 775 raise ParseError( 776 f"Failed to parse into {expression_types}", 777 errors=merge_errors(errors), 778 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: the expression type(s) to try and parse the token list into.
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
814 def check_errors(self) -> None: 815 """ 816 Logs or raises any found errors, depending on the chosen error level setting. 817 """ 818 if self.error_level == ErrorLevel.WARN: 819 for error in self.errors: 820 logger.error(str(error)) 821 elif self.error_level == ErrorLevel.RAISE and self.errors: 822 raise ParseError( 823 concat_messages(self.errors, self.max_errors), 824 errors=merge_errors(self.errors), 825 )
Logs or raises any found errors, depending on the chosen error level setting.
827 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 828 """ 829 Appends an error in the list of recorded errors or raises it, depending on the chosen 830 error level setting. 831 """ 832 token = token or self._curr or self._prev or Token.string("") 833 start = self._find_token(token) 834 end = start + len(token.text) 835 start_context = self.sql[max(start - self.error_message_context, 0) : start] 836 highlight = self.sql[start:end] 837 end_context = self.sql[end : end + self.error_message_context] 838 839 error = ParseError.new( 840 f"{message}. Line {token.line}, Col: {token.col}.\n" 841 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 842 description=message, 843 line=token.line, 844 col=token.col, 845 start_context=start_context, 846 highlight=highlight, 847 end_context=end_context, 848 ) 849 850 if self.error_level == ErrorLevel.IMMEDIATE: 851 raise error 852 853 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
855 def expression( 856 self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs 857 ) -> exp.Expression: 858 """ 859 Creates a new, validated Expression. 860 861 Args: 862 exp_class: the expression class to instantiate. 863 comments: an optional list of comments to attach to the expression. 864 kwargs: the arguments to set for the expression along with their respective values. 865 866 Returns: 867 The target expression. 868 """ 869 instance = exp_class(**kwargs) 870 if self._prev_comments: 871 instance.comments = self._prev_comments 872 self._prev_comments = None 873 if comments: 874 instance.comments = comments 875 self.validate_expression(instance) 876 return instance
Creates a new, validated Expression.
Arguments:
- exp_class: the expression class to instantiate.
- comments: an optional list of comments to attach to the expression.
- kwargs: the arguments to set for the expression along with their respective values.
Returns:
The target expression.
878 def validate_expression( 879 self, expression: exp.Expression, args: t.Optional[t.List] = None 880 ) -> None: 881 """ 882 Validates an already instantiated expression, making sure that all its mandatory arguments 883 are set. 884 885 Args: 886 expression: the expression to validate. 887 args: an optional list of items that was used to instantiate the expression, if it's a Func. 888 """ 889 if self.error_level == ErrorLevel.IGNORE: 890 return 891 892 for error_message in expression.error_messages(args): 893 self.raise_error(error_message)
Validates an already instantiated expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: the expression to validate.
- args: an optional list of items that was used to instantiate the expression, if it's a Func.