sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_collection, ensure_list, seq_get 10from sqlglot.tokens import Token, Tokenizer, TokenType 11from sqlglot.trie import in_trie, new_trie 12 13if t.TYPE_CHECKING: 14 from sqlglot._typing import E 15 16logger = logging.getLogger("sqlglot") 17 18 19def parse_var_map(args: t.List) -> exp.Expression: 20 if len(args) == 1 and args[0].is_star: 21 return exp.StarMap(this=args[0]) 22 23 keys = [] 24 values = [] 25 for i in range(0, len(args), 2): 26 keys.append(args[i]) 27 values.append(args[i + 1]) 28 return exp.VarMap( 29 keys=exp.Array(expressions=keys), 30 values=exp.Array(expressions=values), 31 ) 32 33 34def parse_like(args: t.List) -> exp.Expression: 35 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 36 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 37 38 39def binary_range_parser( 40 expr_type: t.Type[exp.Expression], 41) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 42 return lambda self, this: self._parse_escape( 43 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 44 ) 45 46 47class _Parser(type): 48 def __new__(cls, clsname, bases, attrs): 49 klass = super().__new__(cls, clsname, bases, attrs) 50 klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 51 klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS) 52 53 return klass 54 55 56class Parser(metaclass=_Parser): 57 """ 58 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 59 a parsed syntax tree. 60 61 Args: 62 error_level: the desired error level. 63 Default: ErrorLevel.RAISE 64 error_message_context: determines the amount of context to capture from a 65 query string when displaying the error message (in number of characters). 66 Default: 50. 67 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 68 Default: 0 69 alias_post_tablesample: If the table alias comes after tablesample. 70 Default: False 71 max_errors: Maximum number of error messages to include in a raised ParseError. 72 This is only relevant if error_level is ErrorLevel.RAISE. 73 Default: 3 74 null_ordering: Indicates the default null ordering method to use if not explicitly set. 75 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 76 Default: "nulls_are_small" 77 """ 78 79 FUNCTIONS: t.Dict[str, t.Callable] = { 80 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 81 "DATE_TO_DATE_STR": lambda args: exp.Cast( 82 this=seq_get(args, 0), 83 to=exp.DataType(this=exp.DataType.Type.TEXT), 84 ), 85 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 86 "IFNULL": exp.Coalesce.from_arg_list, 87 "LIKE": parse_like, 88 "TIME_TO_TIME_STR": lambda args: exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 93 this=exp.Cast( 94 this=seq_get(args, 0), 95 to=exp.DataType(this=exp.DataType.Type.TEXT), 96 ), 97 start=exp.Literal.number(1), 98 length=exp.Literal.number(10), 99 ), 100 "VAR_MAP": parse_var_map, 101 } 102 103 NO_PAREN_FUNCTIONS = { 104 TokenType.CURRENT_DATE: exp.CurrentDate, 105 TokenType.CURRENT_DATETIME: exp.CurrentDate, 106 TokenType.CURRENT_TIME: exp.CurrentTime, 107 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 108 TokenType.CURRENT_USER: exp.CurrentUser, 109 } 110 111 JOIN_HINTS: t.Set[str] = set() 112 113 NESTED_TYPE_TOKENS = { 114 TokenType.ARRAY, 115 TokenType.MAP, 116 TokenType.NULLABLE, 117 TokenType.STRUCT, 118 } 119 120 TYPE_TOKENS = { 121 TokenType.BIT, 122 TokenType.BOOLEAN, 123 TokenType.TINYINT, 124 TokenType.UTINYINT, 125 TokenType.SMALLINT, 126 TokenType.USMALLINT, 127 TokenType.INT, 128 TokenType.UINT, 129 TokenType.BIGINT, 130 TokenType.UBIGINT, 131 TokenType.INT128, 132 TokenType.UINT128, 133 TokenType.INT256, 134 TokenType.UINT256, 135 TokenType.FLOAT, 136 TokenType.DOUBLE, 137 TokenType.CHAR, 138 TokenType.NCHAR, 139 TokenType.VARCHAR, 140 TokenType.NVARCHAR, 141 TokenType.TEXT, 142 TokenType.MEDIUMTEXT, 143 TokenType.LONGTEXT, 144 TokenType.MEDIUMBLOB, 145 TokenType.LONGBLOB, 146 TokenType.BINARY, 147 TokenType.VARBINARY, 148 TokenType.JSON, 149 TokenType.JSONB, 150 TokenType.INTERVAL, 151 TokenType.TIME, 152 TokenType.TIMESTAMP, 153 TokenType.TIMESTAMPTZ, 154 TokenType.TIMESTAMPLTZ, 155 TokenType.DATETIME, 156 TokenType.DATETIME64, 157 TokenType.DATE, 158 TokenType.DECIMAL, 159 TokenType.BIGDECIMAL, 160 TokenType.UUID, 161 TokenType.GEOGRAPHY, 162 TokenType.GEOMETRY, 163 TokenType.HLLSKETCH, 164 TokenType.HSTORE, 165 TokenType.PSEUDO_TYPE, 166 TokenType.SUPER, 167 TokenType.SERIAL, 168 TokenType.SMALLSERIAL, 169 TokenType.BIGSERIAL, 170 TokenType.XML, 171 TokenType.UNIQUEIDENTIFIER, 172 TokenType.MONEY, 173 TokenType.SMALLMONEY, 174 TokenType.ROWVERSION, 175 TokenType.IMAGE, 176 TokenType.VARIANT, 177 TokenType.OBJECT, 178 TokenType.INET, 179 *NESTED_TYPE_TOKENS, 180 } 181 182 SUBQUERY_PREDICATES = { 183 TokenType.ANY: exp.Any, 184 TokenType.ALL: exp.All, 185 TokenType.EXISTS: exp.Exists, 186 TokenType.SOME: exp.Any, 187 } 188 189 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 190 191 DB_CREATABLES = { 192 TokenType.DATABASE, 193 TokenType.SCHEMA, 194 TokenType.TABLE, 195 TokenType.VIEW, 196 } 197 198 CREATABLES = { 199 TokenType.COLUMN, 200 TokenType.FUNCTION, 201 TokenType.INDEX, 202 TokenType.PROCEDURE, 203 *DB_CREATABLES, 204 } 205 206 ID_VAR_TOKENS = { 207 TokenType.VAR, 208 TokenType.ANTI, 209 TokenType.APPLY, 210 TokenType.ASC, 211 TokenType.AUTO_INCREMENT, 212 TokenType.BEGIN, 213 TokenType.CACHE, 214 TokenType.COLLATE, 215 TokenType.COMMAND, 216 TokenType.COMMENT, 217 TokenType.COMMIT, 218 TokenType.CONSTRAINT, 219 TokenType.DEFAULT, 220 TokenType.DELETE, 221 TokenType.DESC, 222 TokenType.DESCRIBE, 223 TokenType.DIV, 224 TokenType.END, 225 TokenType.EXECUTE, 226 TokenType.ESCAPE, 227 TokenType.FALSE, 228 TokenType.FIRST, 229 TokenType.FILTER, 230 TokenType.FORMAT, 231 TokenType.FULL, 232 TokenType.IF, 233 TokenType.IS, 234 TokenType.ISNULL, 235 TokenType.INTERVAL, 236 TokenType.KEEP, 237 TokenType.LEFT, 238 TokenType.LOAD, 239 TokenType.MERGE, 240 TokenType.NATURAL, 241 TokenType.NEXT, 242 TokenType.OFFSET, 243 TokenType.ORDINALITY, 244 TokenType.OVERWRITE, 245 TokenType.PARTITION, 246 TokenType.PERCENT, 247 TokenType.PIVOT, 248 TokenType.PRAGMA, 249 TokenType.RANGE, 250 TokenType.REFERENCES, 251 TokenType.RIGHT, 252 TokenType.ROW, 253 TokenType.ROWS, 254 TokenType.SEMI, 255 TokenType.SET, 256 TokenType.SETTINGS, 257 TokenType.SHOW, 258 TokenType.TEMPORARY, 259 TokenType.TOP, 260 TokenType.TRUE, 261 TokenType.UNIQUE, 262 TokenType.UNPIVOT, 263 TokenType.VOLATILE, 264 TokenType.WINDOW, 265 *CREATABLES, 266 *SUBQUERY_PREDICATES, 267 *TYPE_TOKENS, 268 *NO_PAREN_FUNCTIONS, 269 } 270 271 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 272 273 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 274 TokenType.APPLY, 275 TokenType.FULL, 276 TokenType.LEFT, 277 TokenType.LOCK, 278 TokenType.NATURAL, 279 TokenType.OFFSET, 280 TokenType.RIGHT, 281 TokenType.WINDOW, 282 } 283 284 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 285 286 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 287 288 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 289 290 FUNC_TOKENS = { 291 TokenType.COMMAND, 292 TokenType.CURRENT_DATE, 293 TokenType.CURRENT_DATETIME, 294 TokenType.CURRENT_TIMESTAMP, 295 TokenType.CURRENT_TIME, 296 TokenType.CURRENT_USER, 297 TokenType.FILTER, 298 TokenType.FIRST, 299 TokenType.FORMAT, 300 TokenType.GLOB, 301 TokenType.IDENTIFIER, 302 TokenType.INDEX, 303 TokenType.ISNULL, 304 TokenType.ILIKE, 305 TokenType.LIKE, 306 TokenType.MERGE, 307 TokenType.OFFSET, 308 TokenType.PRIMARY_KEY, 309 TokenType.RANGE, 310 TokenType.REPLACE, 311 TokenType.ROW, 312 TokenType.UNNEST, 313 TokenType.VAR, 314 TokenType.LEFT, 315 TokenType.RIGHT, 316 TokenType.DATE, 317 TokenType.DATETIME, 318 TokenType.TABLE, 319 TokenType.TIMESTAMP, 320 TokenType.TIMESTAMPTZ, 321 TokenType.WINDOW, 322 *TYPE_TOKENS, 323 *SUBQUERY_PREDICATES, 324 } 325 326 CONJUNCTION = { 327 TokenType.AND: exp.And, 328 TokenType.OR: exp.Or, 329 } 330 331 EQUALITY = { 332 TokenType.EQ: exp.EQ, 333 TokenType.NEQ: exp.NEQ, 334 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 335 } 336 337 COMPARISON = { 338 TokenType.GT: exp.GT, 339 TokenType.GTE: exp.GTE, 340 TokenType.LT: exp.LT, 341 TokenType.LTE: exp.LTE, 342 } 343 344 BITWISE = { 345 TokenType.AMP: exp.BitwiseAnd, 346 TokenType.CARET: exp.BitwiseXor, 347 TokenType.PIPE: exp.BitwiseOr, 348 TokenType.DPIPE: exp.DPipe, 349 } 350 351 TERM = { 352 TokenType.DASH: exp.Sub, 353 TokenType.PLUS: exp.Add, 354 TokenType.MOD: exp.Mod, 355 TokenType.COLLATE: exp.Collate, 356 } 357 358 FACTOR = { 359 TokenType.DIV: exp.IntDiv, 360 TokenType.LR_ARROW: exp.Distance, 361 TokenType.SLASH: exp.Div, 362 TokenType.STAR: exp.Mul, 363 } 364 365 TIMESTAMPS = { 366 TokenType.TIME, 367 TokenType.TIMESTAMP, 368 TokenType.TIMESTAMPTZ, 369 TokenType.TIMESTAMPLTZ, 370 } 371 372 SET_OPERATIONS = { 373 TokenType.UNION, 374 TokenType.INTERSECT, 375 TokenType.EXCEPT, 376 } 377 378 JOIN_SIDES = { 379 TokenType.LEFT, 380 TokenType.RIGHT, 381 TokenType.FULL, 382 } 383 384 JOIN_KINDS = { 385 TokenType.INNER, 386 TokenType.OUTER, 387 TokenType.CROSS, 388 TokenType.SEMI, 389 TokenType.ANTI, 390 } 391 392 LAMBDAS = { 393 TokenType.ARROW: lambda self, expressions: self.expression( 394 exp.Lambda, 395 this=self._replace_lambda( 396 self._parse_conjunction(), 397 {node.name for node in expressions}, 398 ), 399 expressions=expressions, 400 ), 401 TokenType.FARROW: lambda self, expressions: self.expression( 402 exp.Kwarg, 403 this=exp.Var(this=expressions[0].name), 404 expression=self._parse_conjunction(), 405 ), 406 } 407 408 COLUMN_OPERATORS = { 409 TokenType.DOT: None, 410 TokenType.DCOLON: lambda self, this, to: self.expression( 411 exp.Cast if self.STRICT_CAST else exp.TryCast, 412 this=this, 413 to=to, 414 ), 415 TokenType.ARROW: lambda self, this, path: self.expression( 416 exp.JSONExtract, 417 this=this, 418 expression=path, 419 ), 420 TokenType.DARROW: lambda self, this, path: self.expression( 421 exp.JSONExtractScalar, 422 this=this, 423 expression=path, 424 ), 425 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 426 exp.JSONBExtract, 427 this=this, 428 expression=path, 429 ), 430 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 431 exp.JSONBExtractScalar, 432 this=this, 433 expression=path, 434 ), 435 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 436 exp.JSONBContains, 437 this=this, 438 expression=key, 439 ), 440 } 441 442 EXPRESSION_PARSERS = { 443 exp.Column: lambda self: self._parse_column(), 444 exp.DataType: lambda self: self._parse_types(), 445 exp.From: lambda self: self._parse_from(), 446 exp.Group: lambda self: self._parse_group(), 447 exp.Identifier: lambda self: self._parse_id_var(), 448 exp.Lateral: lambda self: self._parse_lateral(), 449 exp.Join: lambda self: self._parse_join(), 450 exp.Order: lambda self: self._parse_order(), 451 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"), 452 exp.Sort: lambda self: self._parse_sort(exp.Sort, "SORT", "BY"), 453 exp.Lambda: lambda self: self._parse_lambda(), 454 exp.Limit: lambda self: self._parse_limit(), 455 exp.Offset: lambda self: self._parse_offset(), 456 exp.TableAlias: lambda self: self._parse_table_alias(), 457 exp.Table: lambda self: self._parse_table_parts(), 458 exp.Condition: lambda self: self._parse_conjunction(), 459 exp.Expression: lambda self: self._parse_statement(), 460 exp.Properties: lambda self: self._parse_properties(), 461 exp.Where: lambda self: self._parse_where(), 462 exp.Ordered: lambda self: self._parse_ordered(), 463 exp.Having: lambda self: self._parse_having(), 464 exp.With: lambda self: self._parse_with(), 465 exp.Window: lambda self: self._parse_named_window(), 466 exp.Qualify: lambda self: self._parse_qualify(), 467 exp.Returning: lambda self: self._parse_returning(), 468 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 469 } 470 471 STATEMENT_PARSERS = { 472 TokenType.ALTER: lambda self: self._parse_alter(), 473 TokenType.BEGIN: lambda self: self._parse_transaction(), 474 TokenType.CACHE: lambda self: self._parse_cache(), 475 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 476 TokenType.COMMENT: lambda self: self._parse_comment(), 477 TokenType.CREATE: lambda self: self._parse_create(), 478 TokenType.DELETE: lambda self: self._parse_delete(), 479 TokenType.DESC: lambda self: self._parse_describe(), 480 TokenType.DESCRIBE: lambda self: self._parse_describe(), 481 TokenType.DROP: lambda self: self._parse_drop(), 482 TokenType.END: lambda self: self._parse_commit_or_rollback(), 483 TokenType.INSERT: lambda self: self._parse_insert(), 484 TokenType.LOAD: lambda self: self._parse_load(), 485 TokenType.MERGE: lambda self: self._parse_merge(), 486 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 487 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 488 TokenType.SET: lambda self: self._parse_set(), 489 TokenType.UNCACHE: lambda self: self._parse_uncache(), 490 TokenType.UPDATE: lambda self: self._parse_update(), 491 TokenType.USE: lambda self: self.expression( 492 exp.Use, 493 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 494 and exp.Var(this=self._prev.text), 495 this=self._parse_table(schema=False), 496 ), 497 } 498 499 UNARY_PARSERS = { 500 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 501 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 502 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 503 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 504 } 505 506 PRIMARY_PARSERS = { 507 TokenType.STRING: lambda self, token: self.expression( 508 exp.Literal, this=token.text, is_string=True 509 ), 510 TokenType.NUMBER: lambda self, token: self.expression( 511 exp.Literal, this=token.text, is_string=False 512 ), 513 TokenType.STAR: lambda self, _: self.expression( 514 exp.Star, 515 **{"except": self._parse_except(), "replace": self._parse_replace()}, 516 ), 517 TokenType.NULL: lambda self, _: self.expression(exp.Null), 518 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 519 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 520 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 521 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 522 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 523 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 524 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 525 exp.National, this=token.text 526 ), 527 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 528 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 529 } 530 531 PLACEHOLDER_PARSERS = { 532 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 533 TokenType.PARAMETER: lambda self: self._parse_parameter(), 534 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 535 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 536 else None, 537 } 538 539 RANGE_PARSERS = { 540 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 541 TokenType.GLOB: binary_range_parser(exp.Glob), 542 TokenType.ILIKE: binary_range_parser(exp.ILike), 543 TokenType.IN: lambda self, this: self._parse_in(this), 544 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 545 TokenType.IS: lambda self, this: self._parse_is(this), 546 TokenType.LIKE: binary_range_parser(exp.Like), 547 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 548 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 549 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 550 } 551 552 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 553 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 554 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 555 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 556 "CHARACTER SET": lambda self: self._parse_character_set(), 557 "CHECKSUM": lambda self: self._parse_checksum(), 558 "CLUSTER": lambda self: self._parse_cluster(), 559 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 560 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 561 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 562 "DEFINER": lambda self: self._parse_definer(), 563 "DETERMINISTIC": lambda self: self.expression( 564 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 565 ), 566 "DISTKEY": lambda self: self._parse_distkey(), 567 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 568 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 569 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 570 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 571 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 572 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 573 "FREESPACE": lambda self: self._parse_freespace(), 574 "IMMUTABLE": lambda self: self.expression( 575 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 576 ), 577 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 578 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 579 "LIKE": lambda self: self._parse_create_like(), 580 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 581 "LOCK": lambda self: self._parse_locking(), 582 "LOCKING": lambda self: self._parse_locking(), 583 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 584 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 585 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 586 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 587 "NO": lambda self: self._parse_no_property(), 588 "ON": lambda self: self._parse_on_property(), 589 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 590 "PARTITION BY": lambda self: self._parse_partitioned_by(), 591 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 592 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 593 "PRIMARY KEY": lambda self: self._parse_primary_key(), 594 "RETURNS": lambda self: self._parse_returns(), 595 "ROW": lambda self: self._parse_row(), 596 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 597 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 598 "SETTINGS": lambda self: self.expression( 599 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 600 ), 601 "SORTKEY": lambda self: self._parse_sortkey(), 602 "STABLE": lambda self: self.expression( 603 exp.StabilityProperty, this=exp.Literal.string("STABLE") 604 ), 605 "STORED": lambda self: self._parse_stored(), 606 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 607 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 608 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 609 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 610 "TTL": lambda self: self._parse_ttl(), 611 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 612 "VOLATILE": lambda self: self._parse_volatile_property(), 613 "WITH": lambda self: self._parse_with_property(), 614 } 615 616 CONSTRAINT_PARSERS = { 617 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 618 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 619 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 620 "CHARACTER SET": lambda self: self.expression( 621 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 622 ), 623 "CHECK": lambda self: self.expression( 624 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 625 ), 626 "COLLATE": lambda self: self.expression( 627 exp.CollateColumnConstraint, this=self._parse_var() 628 ), 629 "COMMENT": lambda self: self.expression( 630 exp.CommentColumnConstraint, this=self._parse_string() 631 ), 632 "COMPRESS": lambda self: self._parse_compress(), 633 "DEFAULT": lambda self: self.expression( 634 exp.DefaultColumnConstraint, this=self._parse_bitwise() 635 ), 636 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 637 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 638 "FORMAT": lambda self: self.expression( 639 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 640 ), 641 "GENERATED": lambda self: self._parse_generated_as_identity(), 642 "IDENTITY": lambda self: self._parse_auto_increment(), 643 "INLINE": lambda self: self._parse_inline(), 644 "LIKE": lambda self: self._parse_create_like(), 645 "NOT": lambda self: self._parse_not_constraint(), 646 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 647 "ON": lambda self: self._match(TokenType.UPDATE) 648 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 649 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 650 "PRIMARY KEY": lambda self: self._parse_primary_key(), 651 "REFERENCES": lambda self: self._parse_references(match=False), 652 "TITLE": lambda self: self.expression( 653 exp.TitleColumnConstraint, this=self._parse_var_or_string() 654 ), 655 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 656 "UNIQUE": lambda self: self._parse_unique(), 657 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 658 } 659 660 ALTER_PARSERS = { 661 "ADD": lambda self: self._parse_alter_table_add(), 662 "ALTER": lambda self: self._parse_alter_table_alter(), 663 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 664 "DROP": lambda self: self._parse_alter_table_drop(), 665 "RENAME": lambda self: self._parse_alter_table_rename(), 666 } 667 668 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 669 670 NO_PAREN_FUNCTION_PARSERS = { 671 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 672 TokenType.CASE: lambda self: self._parse_case(), 673 TokenType.IF: lambda self: self._parse_if(), 674 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 675 exp.NextValueFor, 676 this=self._parse_column(), 677 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 678 ), 679 } 680 681 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 682 683 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 684 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 685 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 686 "DECODE": lambda self: self._parse_decode(), 687 "EXTRACT": lambda self: self._parse_extract(), 688 "JSON_OBJECT": lambda self: self._parse_json_object(), 689 "LOG": lambda self: self._parse_logarithm(), 690 "MATCH": lambda self: self._parse_match_against(), 691 "OPENJSON": lambda self: self._parse_open_json(), 692 "POSITION": lambda self: self._parse_position(), 693 "SAFE_CAST": lambda self: self._parse_cast(False), 694 "STRING_AGG": lambda self: self._parse_string_agg(), 695 "SUBSTRING": lambda self: self._parse_substring(), 696 "TRIM": lambda self: self._parse_trim(), 697 "TRY_CAST": lambda self: self._parse_cast(False), 698 "TRY_CONVERT": lambda self: self._parse_convert(False), 699 } 700 701 QUERY_MODIFIER_PARSERS = { 702 "joins": lambda self: list(iter(self._parse_join, None)), 703 "laterals": lambda self: list(iter(self._parse_lateral, None)), 704 "match": lambda self: self._parse_match_recognize(), 705 "where": lambda self: self._parse_where(), 706 "group": lambda self: self._parse_group(), 707 "having": lambda self: self._parse_having(), 708 "qualify": lambda self: self._parse_qualify(), 709 "windows": lambda self: self._parse_window_clause(), 710 "order": lambda self: self._parse_order(), 711 "limit": lambda self: self._parse_limit(), 712 "offset": lambda self: self._parse_offset(), 713 "locks": lambda self: self._parse_locks(), 714 "sample": lambda self: self._parse_table_sample(as_modifier=True), 715 } 716 717 SET_PARSERS = { 718 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 719 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 720 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 721 "TRANSACTION": lambda self: self._parse_set_transaction(), 722 } 723 724 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 725 726 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 727 728 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 729 730 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 731 732 TRANSACTION_CHARACTERISTICS = { 733 "ISOLATION LEVEL REPEATABLE READ", 734 "ISOLATION LEVEL READ COMMITTED", 735 "ISOLATION LEVEL READ UNCOMMITTED", 736 "ISOLATION LEVEL SERIALIZABLE", 737 "READ WRITE", 738 "READ ONLY", 739 } 740 741 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 742 743 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 744 745 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 746 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 747 748 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 749 750 STRICT_CAST = True 751 752 CONVERT_TYPE_FIRST = False 753 754 PREFIXED_PIVOT_COLUMNS = False 755 IDENTIFY_PIVOT_STRINGS = False 756 757 LOG_BASE_FIRST = True 758 LOG_DEFAULTS_TO_LN = False 759 760 __slots__ = ( 761 "error_level", 762 "error_message_context", 763 "sql", 764 "errors", 765 "index_offset", 766 "unnest_column_only", 767 "alias_post_tablesample", 768 "max_errors", 769 "null_ordering", 770 "_tokens", 771 "_index", 772 "_curr", 773 "_next", 774 "_prev", 775 "_prev_comments", 776 "_show_trie", 777 "_set_trie", 778 ) 779 780 def __init__( 781 self, 782 error_level: t.Optional[ErrorLevel] = None, 783 error_message_context: int = 100, 784 index_offset: int = 0, 785 unnest_column_only: bool = False, 786 alias_post_tablesample: bool = False, 787 max_errors: int = 3, 788 null_ordering: t.Optional[str] = None, 789 ): 790 self.error_level = error_level or ErrorLevel.IMMEDIATE 791 self.error_message_context = error_message_context 792 self.index_offset = index_offset 793 self.unnest_column_only = unnest_column_only 794 self.alias_post_tablesample = alias_post_tablesample 795 self.max_errors = max_errors 796 self.null_ordering = null_ordering 797 self.reset() 798 799 def reset(self): 800 self.sql = "" 801 self.errors = [] 802 self._tokens = [] 803 self._index = 0 804 self._curr = None 805 self._next = None 806 self._prev = None 807 self._prev_comments = None 808 809 def parse( 810 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 811 ) -> t.List[t.Optional[exp.Expression]]: 812 """ 813 Parses a list of tokens and returns a list of syntax trees, one tree 814 per parsed SQL statement. 815 816 Args: 817 raw_tokens: the list of tokens. 818 sql: the original SQL string, used to produce helpful debug messages. 819 820 Returns: 821 The list of syntax trees. 822 """ 823 return self._parse( 824 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 825 ) 826 827 def parse_into( 828 self, 829 expression_types: exp.IntoType, 830 raw_tokens: t.List[Token], 831 sql: t.Optional[str] = None, 832 ) -> t.List[t.Optional[exp.Expression]]: 833 """ 834 Parses a list of tokens into a given Expression type. If a collection of Expression 835 types is given instead, this method will try to parse the token list into each one 836 of them, stopping at the first for which the parsing succeeds. 837 838 Args: 839 expression_types: the expression type(s) to try and parse the token list into. 840 raw_tokens: the list of tokens. 841 sql: the original SQL string, used to produce helpful debug messages. 842 843 Returns: 844 The target Expression. 845 """ 846 errors = [] 847 for expression_type in ensure_collection(expression_types): 848 parser = self.EXPRESSION_PARSERS.get(expression_type) 849 if not parser: 850 raise TypeError(f"No parser registered for {expression_type}") 851 try: 852 return self._parse(parser, raw_tokens, sql) 853 except ParseError as e: 854 e.errors[0]["into_expression"] = expression_type 855 errors.append(e) 856 raise ParseError( 857 f"Failed to parse into {expression_types}", 858 errors=merge_errors(errors), 859 ) from errors[-1] 860 861 def _parse( 862 self, 863 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 864 raw_tokens: t.List[Token], 865 sql: t.Optional[str] = None, 866 ) -> t.List[t.Optional[exp.Expression]]: 867 self.reset() 868 self.sql = sql or "" 869 total = len(raw_tokens) 870 chunks: t.List[t.List[Token]] = [[]] 871 872 for i, token in enumerate(raw_tokens): 873 if token.token_type == TokenType.SEMICOLON: 874 if i < total - 1: 875 chunks.append([]) 876 else: 877 chunks[-1].append(token) 878 879 expressions = [] 880 881 for tokens in chunks: 882 self._index = -1 883 self._tokens = tokens 884 self._advance() 885 886 expressions.append(parse_method(self)) 887 888 if self._index < len(self._tokens): 889 self.raise_error("Invalid expression / Unexpected token") 890 891 self.check_errors() 892 893 return expressions 894 895 def check_errors(self) -> None: 896 """ 897 Logs or raises any found errors, depending on the chosen error level setting. 898 """ 899 if self.error_level == ErrorLevel.WARN: 900 for error in self.errors: 901 logger.error(str(error)) 902 elif self.error_level == ErrorLevel.RAISE and self.errors: 903 raise ParseError( 904 concat_messages(self.errors, self.max_errors), 905 errors=merge_errors(self.errors), 906 ) 907 908 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 909 """ 910 Appends an error in the list of recorded errors or raises it, depending on the chosen 911 error level setting. 912 """ 913 token = token or self._curr or self._prev or Token.string("") 914 start = token.start 915 end = token.end + 1 916 start_context = self.sql[max(start - self.error_message_context, 0) : start] 917 highlight = self.sql[start:end] 918 end_context = self.sql[end : end + self.error_message_context] 919 920 error = ParseError.new( 921 f"{message}. Line {token.line}, Col: {token.col}.\n" 922 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 923 description=message, 924 line=token.line, 925 col=token.col, 926 start_context=start_context, 927 highlight=highlight, 928 end_context=end_context, 929 ) 930 931 if self.error_level == ErrorLevel.IMMEDIATE: 932 raise error 933 934 self.errors.append(error) 935 936 def expression( 937 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 938 ) -> E: 939 """ 940 Creates a new, validated Expression. 941 942 Args: 943 exp_class: the expression class to instantiate. 944 comments: an optional list of comments to attach to the expression. 945 kwargs: the arguments to set for the expression along with their respective values. 946 947 Returns: 948 The target expression. 949 """ 950 instance = exp_class(**kwargs) 951 instance.add_comments(comments) if comments else self._add_comments(instance) 952 self.validate_expression(instance) 953 return instance 954 955 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 956 if expression and self._prev_comments: 957 expression.add_comments(self._prev_comments) 958 self._prev_comments = None 959 960 def validate_expression( 961 self, expression: exp.Expression, args: t.Optional[t.List] = None 962 ) -> None: 963 """ 964 Validates an already instantiated expression, making sure that all its mandatory arguments 965 are set. 966 967 Args: 968 expression: the expression to validate. 969 args: an optional list of items that was used to instantiate the expression, if it's a Func. 970 """ 971 if self.error_level == ErrorLevel.IGNORE: 972 return 973 974 for error_message in expression.error_messages(args): 975 self.raise_error(error_message) 976 977 def _find_sql(self, start: Token, end: Token) -> str: 978 return self.sql[start.start : end.end + 1] 979 980 def _advance(self, times: int = 1) -> None: 981 self._index += times 982 self._curr = seq_get(self._tokens, self._index) 983 self._next = seq_get(self._tokens, self._index + 1) 984 if self._index > 0: 985 self._prev = self._tokens[self._index - 1] 986 self._prev_comments = self._prev.comments 987 else: 988 self._prev = None 989 self._prev_comments = None 990 991 def _retreat(self, index: int) -> None: 992 if index != self._index: 993 self._advance(index - self._index) 994 995 def _parse_command(self) -> exp.Command: 996 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 997 998 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 999 start = self._prev 1000 exists = self._parse_exists() if allow_exists else None 1001 1002 self._match(TokenType.ON) 1003 1004 kind = self._match_set(self.CREATABLES) and self._prev 1005 1006 if not kind: 1007 return self._parse_as_command(start) 1008 1009 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1010 this = self._parse_user_defined_function(kind=kind.token_type) 1011 elif kind.token_type == TokenType.TABLE: 1012 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1013 elif kind.token_type == TokenType.COLUMN: 1014 this = self._parse_column() 1015 else: 1016 this = self._parse_id_var() 1017 1018 self._match(TokenType.IS) 1019 1020 return self.expression( 1021 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1022 ) 1023 1024 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1025 def _parse_ttl(self) -> exp.Expression: 1026 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1027 this = self._parse_bitwise() 1028 1029 if self._match_text_seq("DELETE"): 1030 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1031 if self._match_text_seq("RECOMPRESS"): 1032 return self.expression( 1033 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1034 ) 1035 if self._match_text_seq("TO", "DISK"): 1036 return self.expression( 1037 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1038 ) 1039 if self._match_text_seq("TO", "VOLUME"): 1040 return self.expression( 1041 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1042 ) 1043 1044 return this 1045 1046 expressions = self._parse_csv(_parse_ttl_action) 1047 where = self._parse_where() 1048 group = self._parse_group() 1049 1050 aggregates = None 1051 if group and self._match(TokenType.SET): 1052 aggregates = self._parse_csv(self._parse_set_item) 1053 1054 return self.expression( 1055 exp.MergeTreeTTL, 1056 expressions=expressions, 1057 where=where, 1058 group=group, 1059 aggregates=aggregates, 1060 ) 1061 1062 def _parse_statement(self) -> t.Optional[exp.Expression]: 1063 if self._curr is None: 1064 return None 1065 1066 if self._match_set(self.STATEMENT_PARSERS): 1067 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1068 1069 if self._match_set(Tokenizer.COMMANDS): 1070 return self._parse_command() 1071 1072 expression = self._parse_expression() 1073 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1074 return self._parse_query_modifiers(expression) 1075 1076 def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]: 1077 start = self._prev 1078 temporary = self._match(TokenType.TEMPORARY) 1079 materialized = self._match_text_seq("MATERIALIZED") 1080 kind = self._match_set(self.CREATABLES) and self._prev.text 1081 if not kind: 1082 return self._parse_as_command(start) 1083 1084 return self.expression( 1085 exp.Drop, 1086 exists=self._parse_exists(), 1087 this=self._parse_table(schema=True), 1088 kind=kind, 1089 temporary=temporary, 1090 materialized=materialized, 1091 cascade=self._match_text_seq("CASCADE"), 1092 constraints=self._match_text_seq("CONSTRAINTS"), 1093 purge=self._match_text_seq("PURGE"), 1094 ) 1095 1096 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1097 return ( 1098 self._match(TokenType.IF) 1099 and (not not_ or self._match(TokenType.NOT)) 1100 and self._match(TokenType.EXISTS) 1101 ) 1102 1103 def _parse_create(self) -> t.Optional[exp.Expression]: 1104 start = self._prev 1105 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 1106 TokenType.OR, TokenType.REPLACE 1107 ) 1108 unique = self._match(TokenType.UNIQUE) 1109 1110 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1111 self._match(TokenType.TABLE) 1112 1113 properties = None 1114 create_token = self._match_set(self.CREATABLES) and self._prev 1115 1116 if not create_token: 1117 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1118 create_token = self._match_set(self.CREATABLES) and self._prev 1119 1120 if not properties or not create_token: 1121 return self._parse_as_command(start) 1122 1123 exists = self._parse_exists(not_=True) 1124 this = None 1125 expression = None 1126 indexes = None 1127 no_schema_binding = None 1128 begin = None 1129 clone = None 1130 1131 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1132 this = self._parse_user_defined_function(kind=create_token.token_type) 1133 temp_properties = self._parse_properties() 1134 if properties and temp_properties: 1135 properties.expressions.extend(temp_properties.expressions) 1136 elif temp_properties: 1137 properties = temp_properties 1138 1139 self._match(TokenType.ALIAS) 1140 begin = self._match(TokenType.BEGIN) 1141 return_ = self._match_text_seq("RETURN") 1142 expression = self._parse_statement() 1143 1144 if return_: 1145 expression = self.expression(exp.Return, this=expression) 1146 elif create_token.token_type == TokenType.INDEX: 1147 this = self._parse_index(index=self._parse_id_var()) 1148 elif create_token.token_type in self.DB_CREATABLES: 1149 table_parts = self._parse_table_parts(schema=True) 1150 1151 # exp.Properties.Location.POST_NAME 1152 if self._match(TokenType.COMMA): 1153 temp_properties = self._parse_properties(before=True) 1154 if properties and temp_properties: 1155 properties.expressions.extend(temp_properties.expressions) 1156 elif temp_properties: 1157 properties = temp_properties 1158 1159 this = self._parse_schema(this=table_parts) 1160 1161 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1162 temp_properties = self._parse_properties() 1163 if properties and temp_properties: 1164 properties.expressions.extend(temp_properties.expressions) 1165 elif temp_properties: 1166 properties = temp_properties 1167 1168 self._match(TokenType.ALIAS) 1169 1170 # exp.Properties.Location.POST_ALIAS 1171 if not ( 1172 self._match(TokenType.SELECT, advance=False) 1173 or self._match(TokenType.WITH, advance=False) 1174 or self._match(TokenType.L_PAREN, advance=False) 1175 ): 1176 temp_properties = self._parse_properties() 1177 if properties and temp_properties: 1178 properties.expressions.extend(temp_properties.expressions) 1179 elif temp_properties: 1180 properties = temp_properties 1181 1182 expression = self._parse_ddl_select() 1183 1184 if create_token.token_type == TokenType.TABLE: 1185 indexes = [] 1186 while True: 1187 index = self._parse_index() 1188 1189 # exp.Properties.Location.POST_EXPRESSION or exp.Properties.Location.POST_INDEX 1190 temp_properties = self._parse_properties() 1191 if properties and temp_properties: 1192 properties.expressions.extend(temp_properties.expressions) 1193 elif temp_properties: 1194 properties = temp_properties 1195 1196 if not index: 1197 break 1198 else: 1199 self._match(TokenType.COMMA) 1200 indexes.append(index) 1201 elif create_token.token_type == TokenType.VIEW: 1202 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1203 no_schema_binding = True 1204 1205 if self._match_text_seq("CLONE"): 1206 clone = self._parse_table(schema=True) 1207 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1208 clone_kind = ( 1209 self._match(TokenType.L_PAREN) 1210 and self._match_texts(self.CLONE_KINDS) 1211 and self._prev.text.upper() 1212 ) 1213 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1214 self._match(TokenType.R_PAREN) 1215 clone = self.expression( 1216 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1217 ) 1218 1219 return self.expression( 1220 exp.Create, 1221 this=this, 1222 kind=create_token.text, 1223 replace=replace, 1224 unique=unique, 1225 expression=expression, 1226 exists=exists, 1227 properties=properties, 1228 indexes=indexes, 1229 no_schema_binding=no_schema_binding, 1230 begin=begin, 1231 clone=clone, 1232 ) 1233 1234 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1235 # only used for teradata currently 1236 self._match(TokenType.COMMA) 1237 1238 kwargs = { 1239 "no": self._match_text_seq("NO"), 1240 "dual": self._match_text_seq("DUAL"), 1241 "before": self._match_text_seq("BEFORE"), 1242 "default": self._match_text_seq("DEFAULT"), 1243 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1244 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1245 "after": self._match_text_seq("AFTER"), 1246 "minimum": self._match_texts(("MIN", "MINIMUM")), 1247 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1248 } 1249 1250 if self._match_texts(self.PROPERTY_PARSERS): 1251 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1252 try: 1253 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1254 except TypeError: 1255 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1256 1257 return None 1258 1259 def _parse_property(self) -> t.Optional[exp.Expression]: 1260 if self._match_texts(self.PROPERTY_PARSERS): 1261 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1262 1263 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1264 return self._parse_character_set(default=True) 1265 1266 if self._match_text_seq("COMPOUND", "SORTKEY"): 1267 return self._parse_sortkey(compound=True) 1268 1269 if self._match_text_seq("SQL", "SECURITY"): 1270 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1271 1272 assignment = self._match_pair( 1273 TokenType.VAR, TokenType.EQ, advance=False 1274 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1275 1276 if assignment: 1277 key = self._parse_var_or_string() 1278 self._match(TokenType.EQ) 1279 return self.expression(exp.Property, this=key, value=self._parse_column()) 1280 1281 return None 1282 1283 def _parse_stored(self) -> exp.Expression: 1284 self._match(TokenType.ALIAS) 1285 1286 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1287 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1288 1289 return self.expression( 1290 exp.FileFormatProperty, 1291 this=self.expression( 1292 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1293 ) 1294 if input_format or output_format 1295 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1296 ) 1297 1298 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1299 self._match(TokenType.EQ) 1300 self._match(TokenType.ALIAS) 1301 return self.expression(exp_class, this=self._parse_field()) 1302 1303 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Expression]: 1304 properties = [] 1305 1306 while True: 1307 if before: 1308 prop = self._parse_property_before() 1309 else: 1310 prop = self._parse_property() 1311 1312 if not prop: 1313 break 1314 for p in ensure_list(prop): 1315 properties.append(p) 1316 1317 if properties: 1318 return self.expression(exp.Properties, expressions=properties) 1319 1320 return None 1321 1322 def _parse_fallback(self, no: bool = False) -> exp.Expression: 1323 return self.expression( 1324 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1325 ) 1326 1327 def _parse_volatile_property(self) -> exp.Expression: 1328 if self._index >= 2: 1329 pre_volatile_token = self._tokens[self._index - 2] 1330 else: 1331 pre_volatile_token = None 1332 1333 if pre_volatile_token and pre_volatile_token.token_type in ( 1334 TokenType.CREATE, 1335 TokenType.REPLACE, 1336 TokenType.UNIQUE, 1337 ): 1338 return exp.VolatileProperty() 1339 1340 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1341 1342 def _parse_with_property( 1343 self, 1344 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1345 self._match(TokenType.WITH) 1346 if self._match(TokenType.L_PAREN, advance=False): 1347 return self._parse_wrapped_csv(self._parse_property) 1348 1349 if self._match_text_seq("JOURNAL"): 1350 return self._parse_withjournaltable() 1351 1352 if self._match_text_seq("DATA"): 1353 return self._parse_withdata(no=False) 1354 elif self._match_text_seq("NO", "DATA"): 1355 return self._parse_withdata(no=True) 1356 1357 if not self._next: 1358 return None 1359 1360 return self._parse_withisolatedloading() 1361 1362 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1363 def _parse_definer(self) -> t.Optional[exp.Expression]: 1364 self._match(TokenType.EQ) 1365 1366 user = self._parse_id_var() 1367 self._match(TokenType.PARAMETER) 1368 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1369 1370 if not user or not host: 1371 return None 1372 1373 return exp.DefinerProperty(this=f"{user}@{host}") 1374 1375 def _parse_withjournaltable(self) -> exp.Expression: 1376 self._match(TokenType.TABLE) 1377 self._match(TokenType.EQ) 1378 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1379 1380 def _parse_log(self, no: bool = False) -> exp.Expression: 1381 return self.expression(exp.LogProperty, no=no) 1382 1383 def _parse_journal(self, **kwargs) -> exp.Expression: 1384 return self.expression(exp.JournalProperty, **kwargs) 1385 1386 def _parse_checksum(self) -> exp.Expression: 1387 self._match(TokenType.EQ) 1388 1389 on = None 1390 if self._match(TokenType.ON): 1391 on = True 1392 elif self._match_text_seq("OFF"): 1393 on = False 1394 default = self._match(TokenType.DEFAULT) 1395 1396 return self.expression( 1397 exp.ChecksumProperty, 1398 on=on, 1399 default=default, 1400 ) 1401 1402 def _parse_cluster(self) -> t.Optional[exp.Expression]: 1403 if not self._match_text_seq("BY"): 1404 self._retreat(self._index - 1) 1405 return None 1406 return self.expression( 1407 exp.Cluster, 1408 expressions=self._parse_csv(self._parse_ordered), 1409 ) 1410 1411 def _parse_freespace(self) -> exp.Expression: 1412 self._match(TokenType.EQ) 1413 return self.expression( 1414 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1415 ) 1416 1417 def _parse_mergeblockratio(self, no: bool = False, default: bool = False) -> exp.Expression: 1418 if self._match(TokenType.EQ): 1419 return self.expression( 1420 exp.MergeBlockRatioProperty, 1421 this=self._parse_number(), 1422 percent=self._match(TokenType.PERCENT), 1423 ) 1424 return self.expression( 1425 exp.MergeBlockRatioProperty, 1426 no=no, 1427 default=default, 1428 ) 1429 1430 def _parse_datablocksize( 1431 self, 1432 default: t.Optional[bool] = None, 1433 minimum: t.Optional[bool] = None, 1434 maximum: t.Optional[bool] = None, 1435 ) -> exp.Expression: 1436 self._match(TokenType.EQ) 1437 size = self._parse_number() 1438 units = None 1439 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1440 units = self._prev.text 1441 return self.expression( 1442 exp.DataBlocksizeProperty, 1443 size=size, 1444 units=units, 1445 default=default, 1446 minimum=minimum, 1447 maximum=maximum, 1448 ) 1449 1450 def _parse_blockcompression(self) -> exp.Expression: 1451 self._match(TokenType.EQ) 1452 always = self._match_text_seq("ALWAYS") 1453 manual = self._match_text_seq("MANUAL") 1454 never = self._match_text_seq("NEVER") 1455 default = self._match_text_seq("DEFAULT") 1456 autotemp = None 1457 if self._match_text_seq("AUTOTEMP"): 1458 autotemp = self._parse_schema() 1459 1460 return self.expression( 1461 exp.BlockCompressionProperty, 1462 always=always, 1463 manual=manual, 1464 never=never, 1465 default=default, 1466 autotemp=autotemp, 1467 ) 1468 1469 def _parse_withisolatedloading(self) -> exp.Expression: 1470 no = self._match_text_seq("NO") 1471 concurrent = self._match_text_seq("CONCURRENT") 1472 self._match_text_seq("ISOLATED", "LOADING") 1473 for_all = self._match_text_seq("FOR", "ALL") 1474 for_insert = self._match_text_seq("FOR", "INSERT") 1475 for_none = self._match_text_seq("FOR", "NONE") 1476 return self.expression( 1477 exp.IsolatedLoadingProperty, 1478 no=no, 1479 concurrent=concurrent, 1480 for_all=for_all, 1481 for_insert=for_insert, 1482 for_none=for_none, 1483 ) 1484 1485 def _parse_locking(self) -> exp.Expression: 1486 if self._match(TokenType.TABLE): 1487 kind = "TABLE" 1488 elif self._match(TokenType.VIEW): 1489 kind = "VIEW" 1490 elif self._match(TokenType.ROW): 1491 kind = "ROW" 1492 elif self._match_text_seq("DATABASE"): 1493 kind = "DATABASE" 1494 else: 1495 kind = None 1496 1497 if kind in ("DATABASE", "TABLE", "VIEW"): 1498 this = self._parse_table_parts() 1499 else: 1500 this = None 1501 1502 if self._match(TokenType.FOR): 1503 for_or_in = "FOR" 1504 elif self._match(TokenType.IN): 1505 for_or_in = "IN" 1506 else: 1507 for_or_in = None 1508 1509 if self._match_text_seq("ACCESS"): 1510 lock_type = "ACCESS" 1511 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1512 lock_type = "EXCLUSIVE" 1513 elif self._match_text_seq("SHARE"): 1514 lock_type = "SHARE" 1515 elif self._match_text_seq("READ"): 1516 lock_type = "READ" 1517 elif self._match_text_seq("WRITE"): 1518 lock_type = "WRITE" 1519 elif self._match_text_seq("CHECKSUM"): 1520 lock_type = "CHECKSUM" 1521 else: 1522 lock_type = None 1523 1524 override = self._match_text_seq("OVERRIDE") 1525 1526 return self.expression( 1527 exp.LockingProperty, 1528 this=this, 1529 kind=kind, 1530 for_or_in=for_or_in, 1531 lock_type=lock_type, 1532 override=override, 1533 ) 1534 1535 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1536 if self._match(TokenType.PARTITION_BY): 1537 return self._parse_csv(self._parse_conjunction) 1538 return [] 1539 1540 def _parse_partitioned_by(self) -> exp.Expression: 1541 self._match(TokenType.EQ) 1542 return self.expression( 1543 exp.PartitionedByProperty, 1544 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1545 ) 1546 1547 def _parse_withdata(self, no: bool = False) -> exp.Expression: 1548 if self._match_text_seq("AND", "STATISTICS"): 1549 statistics = True 1550 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1551 statistics = False 1552 else: 1553 statistics = None 1554 1555 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1556 1557 def _parse_no_property(self) -> t.Optional[exp.Property]: 1558 if self._match_text_seq("PRIMARY", "INDEX"): 1559 return exp.NoPrimaryIndexProperty() 1560 return None 1561 1562 def _parse_on_property(self) -> t.Optional[exp.Property]: 1563 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1564 return exp.OnCommitProperty() 1565 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1566 return exp.OnCommitProperty(delete=True) 1567 return None 1568 1569 def _parse_distkey(self) -> exp.Expression: 1570 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1571 1572 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1573 table = self._parse_table(schema=True) 1574 options = [] 1575 while self._match_texts(("INCLUDING", "EXCLUDING")): 1576 this = self._prev.text.upper() 1577 id_var = self._parse_id_var() 1578 1579 if not id_var: 1580 return None 1581 1582 options.append( 1583 self.expression( 1584 exp.Property, 1585 this=this, 1586 value=exp.Var(this=id_var.this.upper()), 1587 ) 1588 ) 1589 return self.expression(exp.LikeProperty, this=table, expressions=options) 1590 1591 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1592 return self.expression( 1593 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1594 ) 1595 1596 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1597 self._match(TokenType.EQ) 1598 return self.expression( 1599 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1600 ) 1601 1602 def _parse_returns(self) -> exp.Expression: 1603 value: t.Optional[exp.Expression] 1604 is_table = self._match(TokenType.TABLE) 1605 1606 if is_table: 1607 if self._match(TokenType.LT): 1608 value = self.expression( 1609 exp.Schema, 1610 this="TABLE", 1611 expressions=self._parse_csv(self._parse_struct_types), 1612 ) 1613 if not self._match(TokenType.GT): 1614 self.raise_error("Expecting >") 1615 else: 1616 value = self._parse_schema(exp.Var(this="TABLE")) 1617 else: 1618 value = self._parse_types() 1619 1620 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1621 1622 def _parse_describe(self) -> exp.Expression: 1623 kind = self._match_set(self.CREATABLES) and self._prev.text 1624 this = self._parse_table() 1625 1626 return self.expression(exp.Describe, this=this, kind=kind) 1627 1628 def _parse_insert(self) -> exp.Expression: 1629 overwrite = self._match(TokenType.OVERWRITE) 1630 local = self._match_text_seq("LOCAL") 1631 alternative = None 1632 1633 if self._match_text_seq("DIRECTORY"): 1634 this: t.Optional[exp.Expression] = self.expression( 1635 exp.Directory, 1636 this=self._parse_var_or_string(), 1637 local=local, 1638 row_format=self._parse_row_format(match_row=True), 1639 ) 1640 else: 1641 if self._match(TokenType.OR): 1642 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1643 1644 self._match(TokenType.INTO) 1645 self._match(TokenType.TABLE) 1646 this = self._parse_table(schema=True) 1647 1648 return self.expression( 1649 exp.Insert, 1650 this=this, 1651 exists=self._parse_exists(), 1652 partition=self._parse_partition(), 1653 expression=self._parse_ddl_select(), 1654 conflict=self._parse_on_conflict(), 1655 returning=self._parse_returning(), 1656 overwrite=overwrite, 1657 alternative=alternative, 1658 ) 1659 1660 def _parse_on_conflict(self) -> t.Optional[exp.Expression]: 1661 conflict = self._match_text_seq("ON", "CONFLICT") 1662 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1663 1664 if not (conflict or duplicate): 1665 return None 1666 1667 nothing = None 1668 expressions = None 1669 key = None 1670 constraint = None 1671 1672 if conflict: 1673 if self._match_text_seq("ON", "CONSTRAINT"): 1674 constraint = self._parse_id_var() 1675 else: 1676 key = self._parse_csv(self._parse_value) 1677 1678 self._match_text_seq("DO") 1679 if self._match_text_seq("NOTHING"): 1680 nothing = True 1681 else: 1682 self._match(TokenType.UPDATE) 1683 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1684 1685 return self.expression( 1686 exp.OnConflict, 1687 duplicate=duplicate, 1688 expressions=expressions, 1689 nothing=nothing, 1690 key=key, 1691 constraint=constraint, 1692 ) 1693 1694 def _parse_returning(self) -> t.Optional[exp.Expression]: 1695 if not self._match(TokenType.RETURNING): 1696 return None 1697 1698 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1699 1700 def _parse_row(self) -> t.Optional[exp.Expression]: 1701 if not self._match(TokenType.FORMAT): 1702 return None 1703 return self._parse_row_format() 1704 1705 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1706 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1707 return None 1708 1709 if self._match_text_seq("SERDE"): 1710 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1711 1712 self._match_text_seq("DELIMITED") 1713 1714 kwargs = {} 1715 1716 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1717 kwargs["fields"] = self._parse_string() 1718 if self._match_text_seq("ESCAPED", "BY"): 1719 kwargs["escaped"] = self._parse_string() 1720 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1721 kwargs["collection_items"] = self._parse_string() 1722 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1723 kwargs["map_keys"] = self._parse_string() 1724 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1725 kwargs["lines"] = self._parse_string() 1726 if self._match_text_seq("NULL", "DEFINED", "AS"): 1727 kwargs["null"] = self._parse_string() 1728 1729 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1730 1731 def _parse_load(self) -> exp.Expression: 1732 if self._match_text_seq("DATA"): 1733 local = self._match_text_seq("LOCAL") 1734 self._match_text_seq("INPATH") 1735 inpath = self._parse_string() 1736 overwrite = self._match(TokenType.OVERWRITE) 1737 self._match_pair(TokenType.INTO, TokenType.TABLE) 1738 1739 return self.expression( 1740 exp.LoadData, 1741 this=self._parse_table(schema=True), 1742 local=local, 1743 overwrite=overwrite, 1744 inpath=inpath, 1745 partition=self._parse_partition(), 1746 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1747 serde=self._match_text_seq("SERDE") and self._parse_string(), 1748 ) 1749 return self._parse_as_command(self._prev) 1750 1751 def _parse_delete(self) -> exp.Expression: 1752 self._match(TokenType.FROM) 1753 1754 return self.expression( 1755 exp.Delete, 1756 this=self._parse_table(), 1757 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1758 where=self._parse_where(), 1759 returning=self._parse_returning(), 1760 ) 1761 1762 def _parse_update(self) -> exp.Expression: 1763 return self.expression( 1764 exp.Update, 1765 **{ # type: ignore 1766 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1767 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1768 "from": self._parse_from(modifiers=True), 1769 "where": self._parse_where(), 1770 "returning": self._parse_returning(), 1771 }, 1772 ) 1773 1774 def _parse_uncache(self) -> exp.Expression: 1775 if not self._match(TokenType.TABLE): 1776 self.raise_error("Expecting TABLE after UNCACHE") 1777 1778 return self.expression( 1779 exp.Uncache, 1780 exists=self._parse_exists(), 1781 this=self._parse_table(schema=True), 1782 ) 1783 1784 def _parse_cache(self) -> exp.Expression: 1785 lazy = self._match_text_seq("LAZY") 1786 self._match(TokenType.TABLE) 1787 table = self._parse_table(schema=True) 1788 options = [] 1789 1790 if self._match_text_seq("OPTIONS"): 1791 self._match_l_paren() 1792 k = self._parse_string() 1793 self._match(TokenType.EQ) 1794 v = self._parse_string() 1795 options = [k, v] 1796 self._match_r_paren() 1797 1798 self._match(TokenType.ALIAS) 1799 return self.expression( 1800 exp.Cache, 1801 this=table, 1802 lazy=lazy, 1803 options=options, 1804 expression=self._parse_select(nested=True), 1805 ) 1806 1807 def _parse_partition(self) -> t.Optional[exp.Expression]: 1808 if not self._match(TokenType.PARTITION): 1809 return None 1810 1811 return self.expression( 1812 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1813 ) 1814 1815 def _parse_value(self) -> exp.Expression: 1816 if self._match(TokenType.L_PAREN): 1817 expressions = self._parse_csv(self._parse_conjunction) 1818 self._match_r_paren() 1819 return self.expression(exp.Tuple, expressions=expressions) 1820 1821 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1822 # Source: https://prestodb.io/docs/current/sql/values.html 1823 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1824 1825 def _parse_select( 1826 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1827 ) -> t.Optional[exp.Expression]: 1828 cte = self._parse_with() 1829 if cte: 1830 this = self._parse_statement() 1831 1832 if not this: 1833 self.raise_error("Failed to parse any statement following CTE") 1834 return cte 1835 1836 if "with" in this.arg_types: 1837 this.set("with", cte) 1838 else: 1839 self.raise_error(f"{this.key} does not support CTE") 1840 this = cte 1841 elif self._match(TokenType.SELECT): 1842 comments = self._prev_comments 1843 1844 hint = self._parse_hint() 1845 all_ = self._match(TokenType.ALL) 1846 distinct = self._match(TokenType.DISTINCT) 1847 1848 kind = ( 1849 self._match(TokenType.ALIAS) 1850 and self._match_texts(("STRUCT", "VALUE")) 1851 and self._prev.text 1852 ) 1853 1854 if distinct: 1855 distinct = self.expression( 1856 exp.Distinct, 1857 on=self._parse_value() if self._match(TokenType.ON) else None, 1858 ) 1859 1860 if all_ and distinct: 1861 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1862 1863 limit = self._parse_limit(top=True) 1864 expressions = self._parse_csv(self._parse_expression) 1865 1866 this = self.expression( 1867 exp.Select, 1868 kind=kind, 1869 hint=hint, 1870 distinct=distinct, 1871 expressions=expressions, 1872 limit=limit, 1873 ) 1874 this.comments = comments 1875 1876 into = self._parse_into() 1877 if into: 1878 this.set("into", into) 1879 1880 from_ = self._parse_from() 1881 if from_: 1882 this.set("from", from_) 1883 1884 this = self._parse_query_modifiers(this) 1885 elif (table or nested) and self._match(TokenType.L_PAREN): 1886 this = self._parse_table() if table else self._parse_select(nested=True) 1887 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1888 self._match_r_paren() 1889 1890 # early return so that subquery unions aren't parsed again 1891 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1892 # Union ALL should be a property of the top select node, not the subquery 1893 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1894 elif self._match(TokenType.VALUES): 1895 this = self.expression( 1896 exp.Values, 1897 expressions=self._parse_csv(self._parse_value), 1898 alias=self._parse_table_alias(), 1899 ) 1900 else: 1901 this = None 1902 1903 return self._parse_set_operations(this) 1904 1905 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1906 if not skip_with_token and not self._match(TokenType.WITH): 1907 return None 1908 1909 comments = self._prev_comments 1910 recursive = self._match(TokenType.RECURSIVE) 1911 1912 expressions = [] 1913 while True: 1914 expressions.append(self._parse_cte()) 1915 1916 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1917 break 1918 else: 1919 self._match(TokenType.WITH) 1920 1921 return self.expression( 1922 exp.With, comments=comments, expressions=expressions, recursive=recursive 1923 ) 1924 1925 def _parse_cte(self) -> exp.Expression: 1926 alias = self._parse_table_alias() 1927 if not alias or not alias.this: 1928 self.raise_error("Expected CTE to have alias") 1929 1930 self._match(TokenType.ALIAS) 1931 1932 return self.expression( 1933 exp.CTE, 1934 this=self._parse_wrapped(self._parse_statement), 1935 alias=alias, 1936 ) 1937 1938 def _parse_table_alias( 1939 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1940 ) -> t.Optional[exp.Expression]: 1941 any_token = self._match(TokenType.ALIAS) 1942 alias = ( 1943 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1944 or self._parse_string_as_identifier() 1945 ) 1946 1947 index = self._index 1948 if self._match(TokenType.L_PAREN): 1949 columns = self._parse_csv(self._parse_function_parameter) 1950 self._match_r_paren() if columns else self._retreat(index) 1951 else: 1952 columns = None 1953 1954 if not alias and not columns: 1955 return None 1956 1957 return self.expression(exp.TableAlias, this=alias, columns=columns) 1958 1959 def _parse_subquery( 1960 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1961 ) -> exp.Expression: 1962 return self.expression( 1963 exp.Subquery, 1964 this=this, 1965 pivots=self._parse_pivots(), 1966 alias=self._parse_table_alias() if parse_alias else None, 1967 ) 1968 1969 def _parse_query_modifiers( 1970 self, this: t.Optional[exp.Expression] 1971 ) -> t.Optional[exp.Expression]: 1972 if isinstance(this, self.MODIFIABLES): 1973 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1974 expression = parser(self) 1975 1976 if expression: 1977 this.set(key, expression) 1978 return this 1979 1980 def _parse_hint(self) -> t.Optional[exp.Expression]: 1981 if self._match(TokenType.HINT): 1982 hints = self._parse_csv(self._parse_function) 1983 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 1984 self.raise_error("Expected */ after HINT") 1985 return self.expression(exp.Hint, expressions=hints) 1986 1987 return None 1988 1989 def _parse_into(self) -> t.Optional[exp.Expression]: 1990 if not self._match(TokenType.INTO): 1991 return None 1992 1993 temp = self._match(TokenType.TEMPORARY) 1994 unlogged = self._match_text_seq("UNLOGGED") 1995 self._match(TokenType.TABLE) 1996 1997 return self.expression( 1998 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 1999 ) 2000 2001 def _parse_from(self, modifiers: bool = False) -> t.Optional[exp.Expression]: 2002 if not self._match(TokenType.FROM): 2003 return None 2004 2005 comments = self._prev_comments 2006 this = self._parse_table() 2007 2008 return self.expression( 2009 exp.From, 2010 comments=comments, 2011 this=self._parse_query_modifiers(this) if modifiers else this, 2012 ) 2013 2014 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 2015 if not self._match(TokenType.MATCH_RECOGNIZE): 2016 return None 2017 2018 self._match_l_paren() 2019 2020 partition = self._parse_partition_by() 2021 order = self._parse_order() 2022 measures = ( 2023 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2024 ) 2025 2026 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2027 rows = exp.Var(this="ONE ROW PER MATCH") 2028 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2029 text = "ALL ROWS PER MATCH" 2030 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2031 text += f" SHOW EMPTY MATCHES" 2032 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2033 text += f" OMIT EMPTY MATCHES" 2034 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2035 text += f" WITH UNMATCHED ROWS" 2036 rows = exp.Var(this=text) 2037 else: 2038 rows = None 2039 2040 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2041 text = "AFTER MATCH SKIP" 2042 if self._match_text_seq("PAST", "LAST", "ROW"): 2043 text += f" PAST LAST ROW" 2044 elif self._match_text_seq("TO", "NEXT", "ROW"): 2045 text += f" TO NEXT ROW" 2046 elif self._match_text_seq("TO", "FIRST"): 2047 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2048 elif self._match_text_seq("TO", "LAST"): 2049 text += f" TO LAST {self._advance_any().text}" # type: ignore 2050 after = exp.Var(this=text) 2051 else: 2052 after = None 2053 2054 if self._match_text_seq("PATTERN"): 2055 self._match_l_paren() 2056 2057 if not self._curr: 2058 self.raise_error("Expecting )", self._curr) 2059 2060 paren = 1 2061 start = self._curr 2062 2063 while self._curr and paren > 0: 2064 if self._curr.token_type == TokenType.L_PAREN: 2065 paren += 1 2066 if self._curr.token_type == TokenType.R_PAREN: 2067 paren -= 1 2068 end = self._prev 2069 self._advance() 2070 if paren > 0: 2071 self.raise_error("Expecting )", self._curr) 2072 pattern = exp.Var(this=self._find_sql(start, end)) 2073 else: 2074 pattern = None 2075 2076 define = ( 2077 self._parse_csv( 2078 lambda: self.expression( 2079 exp.Alias, 2080 alias=self._parse_id_var(any_token=True), 2081 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2082 ) 2083 ) 2084 if self._match_text_seq("DEFINE") 2085 else None 2086 ) 2087 2088 self._match_r_paren() 2089 2090 return self.expression( 2091 exp.MatchRecognize, 2092 partition_by=partition, 2093 order=order, 2094 measures=measures, 2095 rows=rows, 2096 after=after, 2097 pattern=pattern, 2098 define=define, 2099 alias=self._parse_table_alias(), 2100 ) 2101 2102 def _parse_lateral(self) -> t.Optional[exp.Expression]: 2103 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2104 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2105 2106 if outer_apply or cross_apply: 2107 this = self._parse_select(table=True) 2108 view = None 2109 outer = not cross_apply 2110 elif self._match(TokenType.LATERAL): 2111 this = self._parse_select(table=True) 2112 view = self._match(TokenType.VIEW) 2113 outer = self._match(TokenType.OUTER) 2114 else: 2115 return None 2116 2117 if not this: 2118 this = self._parse_function() or self._parse_id_var(any_token=False) 2119 while self._match(TokenType.DOT): 2120 this = exp.Dot( 2121 this=this, 2122 expression=self._parse_function() or self._parse_id_var(any_token=False), 2123 ) 2124 2125 table_alias: t.Optional[exp.Expression] 2126 2127 if view: 2128 table = self._parse_id_var(any_token=False) 2129 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2130 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 2131 else: 2132 table_alias = self._parse_table_alias() 2133 2134 expression = self.expression( 2135 exp.Lateral, 2136 this=this, 2137 view=view, 2138 outer=outer, 2139 alias=table_alias, 2140 ) 2141 2142 return expression 2143 2144 def _parse_join_side_and_kind( 2145 self, 2146 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2147 return ( 2148 self._match(TokenType.NATURAL) and self._prev, 2149 self._match_set(self.JOIN_SIDES) and self._prev, 2150 self._match_set(self.JOIN_KINDS) and self._prev, 2151 ) 2152 2153 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 2154 if self._match(TokenType.COMMA): 2155 return self.expression(exp.Join, this=self._parse_table()) 2156 2157 index = self._index 2158 natural, side, kind = self._parse_join_side_and_kind() 2159 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2160 join = self._match(TokenType.JOIN) 2161 2162 if not skip_join_token and not join: 2163 self._retreat(index) 2164 kind = None 2165 natural = None 2166 side = None 2167 2168 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2169 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2170 2171 if not skip_join_token and not join and not outer_apply and not cross_apply: 2172 return None 2173 2174 if outer_apply: 2175 side = Token(TokenType.LEFT, "LEFT") 2176 2177 kwargs: t.Dict[ 2178 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 2179 ] = {"this": self._parse_table()} 2180 2181 if natural: 2182 kwargs["natural"] = True 2183 if side: 2184 kwargs["side"] = side.text 2185 if kind: 2186 kwargs["kind"] = kind.text 2187 if hint: 2188 kwargs["hint"] = hint 2189 2190 if self._match(TokenType.ON): 2191 kwargs["on"] = self._parse_conjunction() 2192 elif self._match(TokenType.USING): 2193 kwargs["using"] = self._parse_wrapped_id_vars() 2194 2195 return self.expression(exp.Join, **kwargs) # type: ignore 2196 2197 def _parse_index( 2198 self, 2199 index: t.Optional[exp.Expression] = None, 2200 ) -> t.Optional[exp.Expression]: 2201 if index: 2202 unique = None 2203 primary = None 2204 amp = None 2205 2206 self._match(TokenType.ON) 2207 self._match(TokenType.TABLE) # hive 2208 table = self._parse_table_parts(schema=True) 2209 else: 2210 unique = self._match(TokenType.UNIQUE) 2211 primary = self._match_text_seq("PRIMARY") 2212 amp = self._match_text_seq("AMP") 2213 if not self._match(TokenType.INDEX): 2214 return None 2215 index = self._parse_id_var() 2216 table = None 2217 2218 if self._match(TokenType.L_PAREN, advance=False): 2219 columns = self._parse_wrapped_csv(self._parse_ordered) 2220 else: 2221 columns = None 2222 2223 return self.expression( 2224 exp.Index, 2225 this=index, 2226 table=table, 2227 columns=columns, 2228 unique=unique, 2229 primary=primary, 2230 amp=amp, 2231 ) 2232 2233 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2234 return ( 2235 (not schema and self._parse_function()) 2236 or self._parse_id_var(any_token=False) 2237 or self._parse_string_as_identifier() 2238 or self._parse_placeholder() 2239 ) 2240 2241 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2242 catalog = None 2243 db = None 2244 table = self._parse_table_part(schema=schema) 2245 2246 while self._match(TokenType.DOT): 2247 if catalog: 2248 # This allows nesting the table in arbitrarily many dot expressions if needed 2249 table = self.expression( 2250 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2251 ) 2252 else: 2253 catalog = db 2254 db = table 2255 table = self._parse_table_part(schema=schema) 2256 2257 if not table: 2258 self.raise_error(f"Expected table name but got {self._curr}") 2259 2260 return self.expression( 2261 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2262 ) 2263 2264 def _parse_table( 2265 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2266 ) -> t.Optional[exp.Expression]: 2267 lateral = self._parse_lateral() 2268 if lateral: 2269 return lateral 2270 2271 unnest = self._parse_unnest() 2272 if unnest: 2273 return unnest 2274 2275 values = self._parse_derived_table_values() 2276 if values: 2277 return values 2278 2279 subquery = self._parse_select(table=True) 2280 if subquery: 2281 if not subquery.args.get("pivots"): 2282 subquery.set("pivots", self._parse_pivots()) 2283 return subquery 2284 2285 this: exp.Expression = self._parse_table_parts(schema=schema) 2286 2287 if schema: 2288 return self._parse_schema(this=this) 2289 2290 if self.alias_post_tablesample: 2291 table_sample = self._parse_table_sample() 2292 2293 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2294 if alias: 2295 this.set("alias", alias) 2296 2297 if not this.args.get("pivots"): 2298 this.set("pivots", self._parse_pivots()) 2299 2300 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2301 this.set( 2302 "hints", 2303 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2304 ) 2305 self._match_r_paren() 2306 2307 if not self.alias_post_tablesample: 2308 table_sample = self._parse_table_sample() 2309 2310 if table_sample: 2311 table_sample.set("this", this) 2312 this = table_sample 2313 2314 return this 2315 2316 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2317 if not self._match(TokenType.UNNEST): 2318 return None 2319 2320 expressions = self._parse_wrapped_csv(self._parse_type) 2321 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2322 alias = self._parse_table_alias() 2323 2324 if alias and self.unnest_column_only: 2325 if alias.args.get("columns"): 2326 self.raise_error("Unexpected extra column alias in unnest.") 2327 alias.set("columns", [alias.this]) 2328 alias.set("this", None) 2329 2330 offset = None 2331 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2332 self._match(TokenType.ALIAS) 2333 offset = self._parse_id_var() or exp.Identifier(this="offset") 2334 2335 return self.expression( 2336 exp.Unnest, 2337 expressions=expressions, 2338 ordinality=ordinality, 2339 alias=alias, 2340 offset=offset, 2341 ) 2342 2343 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2344 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2345 if not is_derived and not self._match(TokenType.VALUES): 2346 return None 2347 2348 expressions = self._parse_csv(self._parse_value) 2349 2350 if is_derived: 2351 self._match_r_paren() 2352 2353 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2354 2355 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]: 2356 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2357 as_modifier and self._match_text_seq("USING", "SAMPLE") 2358 ): 2359 return None 2360 2361 bucket_numerator = None 2362 bucket_denominator = None 2363 bucket_field = None 2364 percent = None 2365 rows = None 2366 size = None 2367 seed = None 2368 2369 kind = ( 2370 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2371 ) 2372 method = self._parse_var(tokens=(TokenType.ROW,)) 2373 2374 self._match(TokenType.L_PAREN) 2375 2376 num = self._parse_number() 2377 2378 if self._match_text_seq("BUCKET"): 2379 bucket_numerator = self._parse_number() 2380 self._match_text_seq("OUT", "OF") 2381 bucket_denominator = bucket_denominator = self._parse_number() 2382 self._match(TokenType.ON) 2383 bucket_field = self._parse_field() 2384 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2385 percent = num 2386 elif self._match(TokenType.ROWS): 2387 rows = num 2388 else: 2389 size = num 2390 2391 self._match(TokenType.R_PAREN) 2392 2393 if self._match(TokenType.L_PAREN): 2394 method = self._parse_var() 2395 seed = self._match(TokenType.COMMA) and self._parse_number() 2396 self._match_r_paren() 2397 elif self._match_texts(("SEED", "REPEATABLE")): 2398 seed = self._parse_wrapped(self._parse_number) 2399 2400 return self.expression( 2401 exp.TableSample, 2402 method=method, 2403 bucket_numerator=bucket_numerator, 2404 bucket_denominator=bucket_denominator, 2405 bucket_field=bucket_field, 2406 percent=percent, 2407 rows=rows, 2408 size=size, 2409 seed=seed, 2410 kind=kind, 2411 ) 2412 2413 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2414 return list(iter(self._parse_pivot, None)) 2415 2416 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2417 index = self._index 2418 2419 if self._match(TokenType.PIVOT): 2420 unpivot = False 2421 elif self._match(TokenType.UNPIVOT): 2422 unpivot = True 2423 else: 2424 return None 2425 2426 expressions = [] 2427 field = None 2428 2429 if not self._match(TokenType.L_PAREN): 2430 self._retreat(index) 2431 return None 2432 2433 if unpivot: 2434 expressions = self._parse_csv(self._parse_column) 2435 else: 2436 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2437 2438 if not expressions: 2439 self.raise_error("Failed to parse PIVOT's aggregation list") 2440 2441 if not self._match(TokenType.FOR): 2442 self.raise_error("Expecting FOR") 2443 2444 value = self._parse_column() 2445 2446 if not self._match(TokenType.IN): 2447 self.raise_error("Expecting IN") 2448 2449 field = self._parse_in(value, alias=True) 2450 2451 self._match_r_paren() 2452 2453 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2454 2455 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2456 pivot.set("alias", self._parse_table_alias()) 2457 2458 if not unpivot: 2459 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2460 2461 columns: t.List[exp.Expression] = [] 2462 for fld in pivot.args["field"].expressions: 2463 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2464 for name in names: 2465 if self.PREFIXED_PIVOT_COLUMNS: 2466 name = f"{name}_{field_name}" if name else field_name 2467 else: 2468 name = f"{field_name}_{name}" if name else field_name 2469 2470 columns.append(exp.to_identifier(name)) 2471 2472 pivot.set("columns", columns) 2473 2474 return pivot 2475 2476 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2477 return [agg.alias for agg in aggregations] 2478 2479 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2480 if not skip_where_token and not self._match(TokenType.WHERE): 2481 return None 2482 2483 return self.expression( 2484 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2485 ) 2486 2487 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2488 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2489 return None 2490 2491 elements = defaultdict(list) 2492 2493 while True: 2494 expressions = self._parse_csv(self._parse_conjunction) 2495 if expressions: 2496 elements["expressions"].extend(expressions) 2497 2498 grouping_sets = self._parse_grouping_sets() 2499 if grouping_sets: 2500 elements["grouping_sets"].extend(grouping_sets) 2501 2502 rollup = None 2503 cube = None 2504 totals = None 2505 2506 with_ = self._match(TokenType.WITH) 2507 if self._match(TokenType.ROLLUP): 2508 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2509 elements["rollup"].extend(ensure_list(rollup)) 2510 2511 if self._match(TokenType.CUBE): 2512 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2513 elements["cube"].extend(ensure_list(cube)) 2514 2515 if self._match_text_seq("TOTALS"): 2516 totals = True 2517 elements["totals"] = True # type: ignore 2518 2519 if not (grouping_sets or rollup or cube or totals): 2520 break 2521 2522 return self.expression(exp.Group, **elements) # type: ignore 2523 2524 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2525 if not self._match(TokenType.GROUPING_SETS): 2526 return None 2527 2528 return self._parse_wrapped_csv(self._parse_grouping_set) 2529 2530 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2531 if self._match(TokenType.L_PAREN): 2532 grouping_set = self._parse_csv(self._parse_column) 2533 self._match_r_paren() 2534 return self.expression(exp.Tuple, expressions=grouping_set) 2535 2536 return self._parse_column() 2537 2538 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2539 if not skip_having_token and not self._match(TokenType.HAVING): 2540 return None 2541 return self.expression(exp.Having, this=self._parse_conjunction()) 2542 2543 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2544 if not self._match(TokenType.QUALIFY): 2545 return None 2546 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2547 2548 def _parse_order( 2549 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2550 ) -> t.Optional[exp.Expression]: 2551 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2552 return this 2553 2554 return self.expression( 2555 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2556 ) 2557 2558 def _parse_sort( 2559 self, exp_class: t.Type[exp.Expression], *texts: str 2560 ) -> t.Optional[exp.Expression]: 2561 if not self._match_text_seq(*texts): 2562 return None 2563 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2564 2565 def _parse_ordered(self) -> exp.Expression: 2566 this = self._parse_conjunction() 2567 self._match(TokenType.ASC) 2568 is_desc = self._match(TokenType.DESC) 2569 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2570 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2571 desc = is_desc or False 2572 asc = not desc 2573 nulls_first = is_nulls_first or False 2574 explicitly_null_ordered = is_nulls_first or is_nulls_last 2575 if ( 2576 not explicitly_null_ordered 2577 and ( 2578 (asc and self.null_ordering == "nulls_are_small") 2579 or (desc and self.null_ordering != "nulls_are_small") 2580 ) 2581 and self.null_ordering != "nulls_are_last" 2582 ): 2583 nulls_first = True 2584 2585 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2586 2587 def _parse_limit( 2588 self, this: t.Optional[exp.Expression] = None, top: bool = False 2589 ) -> t.Optional[exp.Expression]: 2590 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2591 limit_paren = self._match(TokenType.L_PAREN) 2592 limit_exp = self.expression( 2593 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2594 ) 2595 2596 if limit_paren: 2597 self._match_r_paren() 2598 2599 return limit_exp 2600 2601 if self._match(TokenType.FETCH): 2602 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2603 direction = self._prev.text if direction else "FIRST" 2604 2605 count = self._parse_number() 2606 percent = self._match(TokenType.PERCENT) 2607 2608 self._match_set((TokenType.ROW, TokenType.ROWS)) 2609 2610 only = self._match_text_seq("ONLY") 2611 with_ties = self._match_text_seq("WITH", "TIES") 2612 2613 if only and with_ties: 2614 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2615 2616 return self.expression( 2617 exp.Fetch, 2618 direction=direction, 2619 count=count, 2620 percent=percent, 2621 with_ties=with_ties, 2622 ) 2623 2624 return this 2625 2626 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2627 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2628 return this 2629 2630 count = self._parse_number() 2631 self._match_set((TokenType.ROW, TokenType.ROWS)) 2632 return self.expression(exp.Offset, this=this, expression=count) 2633 2634 def _parse_locks(self) -> t.List[exp.Expression]: 2635 # Lists are invariant, so we need to use a type hint here 2636 locks: t.List[exp.Expression] = [] 2637 2638 while True: 2639 if self._match_text_seq("FOR", "UPDATE"): 2640 update = True 2641 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2642 "LOCK", "IN", "SHARE", "MODE" 2643 ): 2644 update = False 2645 else: 2646 break 2647 2648 expressions = None 2649 if self._match_text_seq("OF"): 2650 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2651 2652 wait: t.Optional[bool | exp.Expression] = None 2653 if self._match_text_seq("NOWAIT"): 2654 wait = True 2655 elif self._match_text_seq("WAIT"): 2656 wait = self._parse_primary() 2657 elif self._match_text_seq("SKIP", "LOCKED"): 2658 wait = False 2659 2660 locks.append( 2661 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2662 ) 2663 2664 return locks 2665 2666 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2667 if not self._match_set(self.SET_OPERATIONS): 2668 return this 2669 2670 token_type = self._prev.token_type 2671 2672 if token_type == TokenType.UNION: 2673 expression = exp.Union 2674 elif token_type == TokenType.EXCEPT: 2675 expression = exp.Except 2676 else: 2677 expression = exp.Intersect 2678 2679 return self.expression( 2680 expression, 2681 this=this, 2682 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2683 expression=self._parse_set_operations(self._parse_select(nested=True)), 2684 ) 2685 2686 def _parse_expression(self) -> t.Optional[exp.Expression]: 2687 return self._parse_alias(self._parse_conjunction()) 2688 2689 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2690 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2691 2692 def _parse_equality(self) -> t.Optional[exp.Expression]: 2693 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2694 2695 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2696 return self._parse_tokens(self._parse_range, self.COMPARISON) 2697 2698 def _parse_range(self) -> t.Optional[exp.Expression]: 2699 this = self._parse_bitwise() 2700 negate = self._match(TokenType.NOT) 2701 2702 if self._match_set(self.RANGE_PARSERS): 2703 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2704 if not expression: 2705 return this 2706 2707 this = expression 2708 elif self._match(TokenType.ISNULL): 2709 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2710 2711 # Postgres supports ISNULL and NOTNULL for conditions. 2712 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2713 if self._match(TokenType.NOTNULL): 2714 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2715 this = self.expression(exp.Not, this=this) 2716 2717 if negate: 2718 this = self.expression(exp.Not, this=this) 2719 2720 if self._match(TokenType.IS): 2721 this = self._parse_is(this) 2722 2723 return this 2724 2725 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2726 index = self._index - 1 2727 negate = self._match(TokenType.NOT) 2728 if self._match_text_seq("DISTINCT", "FROM"): 2729 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2730 return self.expression(klass, this=this, expression=self._parse_expression()) 2731 2732 expression = self._parse_null() or self._parse_boolean() 2733 if not expression: 2734 self._retreat(index) 2735 return None 2736 2737 this = self.expression(exp.Is, this=this, expression=expression) 2738 return self.expression(exp.Not, this=this) if negate else this 2739 2740 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.Expression: 2741 unnest = self._parse_unnest() 2742 if unnest: 2743 this = self.expression(exp.In, this=this, unnest=unnest) 2744 elif self._match(TokenType.L_PAREN): 2745 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2746 2747 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2748 this = self.expression(exp.In, this=this, query=expressions[0]) 2749 else: 2750 this = self.expression(exp.In, this=this, expressions=expressions) 2751 2752 self._match_r_paren(this) 2753 else: 2754 this = self.expression(exp.In, this=this, field=self._parse_field()) 2755 2756 return this 2757 2758 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2759 low = self._parse_bitwise() 2760 self._match(TokenType.AND) 2761 high = self._parse_bitwise() 2762 return self.expression(exp.Between, this=this, low=low, high=high) 2763 2764 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2765 if not self._match(TokenType.ESCAPE): 2766 return this 2767 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2768 2769 def _parse_interval(self) -> t.Optional[exp.Expression]: 2770 if not self._match(TokenType.INTERVAL): 2771 return None 2772 2773 this = self._parse_primary() or self._parse_term() 2774 unit = self._parse_function() or self._parse_var() 2775 2776 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2777 # each INTERVAL expression into this canonical form so it's easy to transpile 2778 if this and isinstance(this, exp.Literal): 2779 if this.is_number: 2780 this = exp.Literal.string(this.name) 2781 2782 # Try to not clutter Snowflake's multi-part intervals like INTERVAL '1 day, 1 year' 2783 parts = this.name.split() 2784 if not unit and len(parts) <= 2: 2785 this = exp.Literal.string(seq_get(parts, 0)) 2786 unit = self.expression(exp.Var, this=seq_get(parts, 1)) 2787 2788 return self.expression(exp.Interval, this=this, unit=unit) 2789 2790 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2791 this = self._parse_term() 2792 2793 while True: 2794 if self._match_set(self.BITWISE): 2795 this = self.expression( 2796 self.BITWISE[self._prev.token_type], 2797 this=this, 2798 expression=self._parse_term(), 2799 ) 2800 elif self._match_pair(TokenType.LT, TokenType.LT): 2801 this = self.expression( 2802 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2803 ) 2804 elif self._match_pair(TokenType.GT, TokenType.GT): 2805 this = self.expression( 2806 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2807 ) 2808 else: 2809 break 2810 2811 return this 2812 2813 def _parse_term(self) -> t.Optional[exp.Expression]: 2814 return self._parse_tokens(self._parse_factor, self.TERM) 2815 2816 def _parse_factor(self) -> t.Optional[exp.Expression]: 2817 return self._parse_tokens(self._parse_unary, self.FACTOR) 2818 2819 def _parse_unary(self) -> t.Optional[exp.Expression]: 2820 if self._match_set(self.UNARY_PARSERS): 2821 return self.UNARY_PARSERS[self._prev.token_type](self) 2822 return self._parse_at_time_zone(self._parse_type()) 2823 2824 def _parse_type(self) -> t.Optional[exp.Expression]: 2825 interval = self._parse_interval() 2826 if interval: 2827 return interval 2828 2829 index = self._index 2830 data_type = self._parse_types(check_func=True) 2831 this = self._parse_column() 2832 2833 if data_type: 2834 if isinstance(this, exp.Literal): 2835 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2836 if parser: 2837 return parser(self, this, data_type) 2838 return self.expression(exp.Cast, this=this, to=data_type) 2839 if not data_type.expressions: 2840 self._retreat(index) 2841 return self._parse_column() 2842 return data_type 2843 2844 return this 2845 2846 def _parse_type_size(self) -> t.Optional[exp.Expression]: 2847 this = self._parse_type() 2848 if not this: 2849 return None 2850 2851 return self.expression( 2852 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2853 ) 2854 2855 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2856 index = self._index 2857 2858 prefix = self._match_text_seq("SYSUDTLIB", ".") 2859 2860 if not self._match_set(self.TYPE_TOKENS): 2861 return None 2862 2863 type_token = self._prev.token_type 2864 2865 if type_token == TokenType.PSEUDO_TYPE: 2866 return self.expression(exp.PseudoType, this=self._prev.text) 2867 2868 nested = type_token in self.NESTED_TYPE_TOKENS 2869 is_struct = type_token == TokenType.STRUCT 2870 expressions = None 2871 maybe_func = False 2872 2873 if self._match(TokenType.L_PAREN): 2874 if is_struct: 2875 expressions = self._parse_csv(self._parse_struct_types) 2876 elif nested: 2877 expressions = self._parse_csv(self._parse_types) 2878 else: 2879 expressions = self._parse_csv(self._parse_type_size) 2880 2881 if not expressions or not self._match(TokenType.R_PAREN): 2882 self._retreat(index) 2883 return None 2884 2885 maybe_func = True 2886 2887 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2888 this = exp.DataType( 2889 this=exp.DataType.Type.ARRAY, 2890 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2891 nested=True, 2892 ) 2893 2894 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2895 this = exp.DataType( 2896 this=exp.DataType.Type.ARRAY, 2897 expressions=[this], 2898 nested=True, 2899 ) 2900 2901 return this 2902 2903 if self._match(TokenType.L_BRACKET): 2904 self._retreat(index) 2905 return None 2906 2907 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2908 if nested and self._match(TokenType.LT): 2909 if is_struct: 2910 expressions = self._parse_csv(self._parse_struct_types) 2911 else: 2912 expressions = self._parse_csv(self._parse_types) 2913 2914 if not self._match(TokenType.GT): 2915 self.raise_error("Expecting >") 2916 2917 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2918 values = self._parse_csv(self._parse_conjunction) 2919 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2920 2921 value: t.Optional[exp.Expression] = None 2922 if type_token in self.TIMESTAMPS: 2923 if self._match_text_seq("WITH", "TIME", "ZONE") or type_token == TokenType.TIMESTAMPTZ: 2924 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2925 elif ( 2926 self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE") 2927 or type_token == TokenType.TIMESTAMPLTZ 2928 ): 2929 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2930 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 2931 if type_token == TokenType.TIME: 2932 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2933 else: 2934 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2935 2936 maybe_func = maybe_func and value is None 2937 2938 if value is None: 2939 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2940 elif type_token == TokenType.INTERVAL: 2941 unit = self._parse_var() 2942 2943 if not unit: 2944 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 2945 else: 2946 value = self.expression(exp.Interval, unit=unit) 2947 2948 if maybe_func and check_func: 2949 index2 = self._index 2950 peek = self._parse_string() 2951 2952 if not peek: 2953 self._retreat(index) 2954 return None 2955 2956 self._retreat(index2) 2957 2958 if value: 2959 return value 2960 2961 return exp.DataType( 2962 this=exp.DataType.Type[type_token.value.upper()], 2963 expressions=expressions, 2964 nested=nested, 2965 values=values, 2966 prefix=prefix, 2967 ) 2968 2969 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 2970 this = self._parse_type() or self._parse_id_var() 2971 self._match(TokenType.COLON) 2972 return self._parse_column_def(this) 2973 2974 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2975 if not self._match_text_seq("AT", "TIME", "ZONE"): 2976 return this 2977 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 2978 2979 def _parse_column(self) -> t.Optional[exp.Expression]: 2980 this = self._parse_field() 2981 if isinstance(this, exp.Identifier): 2982 this = self.expression(exp.Column, this=this) 2983 elif not this: 2984 return self._parse_bracket(this) 2985 this = self._parse_bracket(this) 2986 2987 while self._match_set(self.COLUMN_OPERATORS): 2988 op_token = self._prev.token_type 2989 op = self.COLUMN_OPERATORS.get(op_token) 2990 2991 if op_token == TokenType.DCOLON: 2992 field = self._parse_types() 2993 if not field: 2994 self.raise_error("Expected type") 2995 elif op and self._curr: 2996 self._advance() 2997 value = self._prev.text 2998 field = ( 2999 exp.Literal.number(value) 3000 if self._prev.token_type == TokenType.NUMBER 3001 else exp.Literal.string(value) 3002 ) 3003 else: 3004 field = ( 3005 self._parse_star() 3006 or self._parse_function(anonymous=True) 3007 or self._parse_id_var() 3008 ) 3009 3010 if isinstance(field, exp.Func): 3011 # bigquery allows function calls like x.y.count(...) 3012 # SAFE.SUBSTR(...) 3013 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3014 this = self._replace_columns_with_dots(this) 3015 3016 if op: 3017 this = op(self, this, field) 3018 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3019 this = self.expression( 3020 exp.Column, 3021 this=field, 3022 table=this.this, 3023 db=this.args.get("table"), 3024 catalog=this.args.get("db"), 3025 ) 3026 else: 3027 this = self.expression(exp.Dot, this=this, expression=field) 3028 this = self._parse_bracket(this) 3029 3030 return this 3031 3032 def _parse_primary(self) -> t.Optional[exp.Expression]: 3033 if self._match_set(self.PRIMARY_PARSERS): 3034 token_type = self._prev.token_type 3035 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3036 3037 if token_type == TokenType.STRING: 3038 expressions = [primary] 3039 while self._match(TokenType.STRING): 3040 expressions.append(exp.Literal.string(self._prev.text)) 3041 if len(expressions) > 1: 3042 return self.expression(exp.Concat, expressions=expressions) 3043 return primary 3044 3045 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3046 return exp.Literal.number(f"0.{self._prev.text}") 3047 3048 if self._match(TokenType.L_PAREN): 3049 comments = self._prev_comments 3050 query = self._parse_select() 3051 3052 if query: 3053 expressions = [query] 3054 else: 3055 expressions = self._parse_csv(self._parse_expression) 3056 3057 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3058 3059 if isinstance(this, exp.Subqueryable): 3060 this = self._parse_set_operations( 3061 self._parse_subquery(this=this, parse_alias=False) 3062 ) 3063 elif len(expressions) > 1: 3064 this = self.expression(exp.Tuple, expressions=expressions) 3065 else: 3066 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3067 3068 if this: 3069 this.add_comments(comments) 3070 self._match_r_paren(expression=this) 3071 3072 return this 3073 3074 return None 3075 3076 def _parse_field( 3077 self, 3078 any_token: bool = False, 3079 tokens: t.Optional[t.Collection[TokenType]] = None, 3080 ) -> t.Optional[exp.Expression]: 3081 return ( 3082 self._parse_primary() 3083 or self._parse_function() 3084 or self._parse_id_var(any_token=any_token, tokens=tokens) 3085 ) 3086 3087 def _parse_function( 3088 self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False 3089 ) -> t.Optional[exp.Expression]: 3090 if not self._curr: 3091 return None 3092 3093 token_type = self._curr.token_type 3094 3095 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3096 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3097 3098 if not self._next or self._next.token_type != TokenType.L_PAREN: 3099 if token_type in self.NO_PAREN_FUNCTIONS: 3100 self._advance() 3101 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3102 3103 return None 3104 3105 if token_type not in self.FUNC_TOKENS: 3106 return None 3107 3108 this = self._curr.text 3109 upper = this.upper() 3110 self._advance(2) 3111 3112 parser = self.FUNCTION_PARSERS.get(upper) 3113 3114 if parser and not anonymous: 3115 this = parser(self) 3116 else: 3117 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3118 3119 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3120 this = self.expression(subquery_predicate, this=self._parse_select()) 3121 self._match_r_paren() 3122 return this 3123 3124 if functions is None: 3125 functions = self.FUNCTIONS 3126 3127 function = functions.get(upper) 3128 3129 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3130 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3131 3132 if function and not anonymous: 3133 this = function(args) 3134 self.validate_expression(this, args) 3135 else: 3136 this = self.expression(exp.Anonymous, this=this, expressions=args) 3137 3138 self._match_r_paren(this) 3139 return self._parse_window(this) 3140 3141 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3142 return self._parse_column_def(self._parse_id_var()) 3143 3144 def _parse_user_defined_function( 3145 self, kind: t.Optional[TokenType] = None 3146 ) -> t.Optional[exp.Expression]: 3147 this = self._parse_id_var() 3148 3149 while self._match(TokenType.DOT): 3150 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3151 3152 if not self._match(TokenType.L_PAREN): 3153 return this 3154 3155 expressions = self._parse_csv(self._parse_function_parameter) 3156 self._match_r_paren() 3157 return self.expression( 3158 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3159 ) 3160 3161 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 3162 literal = self._parse_primary() 3163 if literal: 3164 return self.expression(exp.Introducer, this=token.text, expression=literal) 3165 3166 return self.expression(exp.Identifier, this=token.text) 3167 3168 def _parse_session_parameter(self) -> exp.Expression: 3169 kind = None 3170 this = self._parse_id_var() or self._parse_primary() 3171 3172 if this and self._match(TokenType.DOT): 3173 kind = this.name 3174 this = self._parse_var() or self._parse_primary() 3175 3176 return self.expression(exp.SessionParameter, this=this, kind=kind) 3177 3178 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3179 index = self._index 3180 3181 if self._match(TokenType.L_PAREN): 3182 expressions = self._parse_csv(self._parse_id_var) 3183 3184 if not self._match(TokenType.R_PAREN): 3185 self._retreat(index) 3186 else: 3187 expressions = [self._parse_id_var()] 3188 3189 if self._match_set(self.LAMBDAS): 3190 return self.LAMBDAS[self._prev.token_type](self, expressions) 3191 3192 self._retreat(index) 3193 3194 this: t.Optional[exp.Expression] 3195 3196 if self._match(TokenType.DISTINCT): 3197 this = self.expression( 3198 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3199 ) 3200 else: 3201 this = self._parse_select_or_expression(alias=alias) 3202 3203 if isinstance(this, exp.EQ): 3204 left = this.this 3205 if isinstance(left, exp.Column): 3206 left.replace(exp.Var(this=left.text("this"))) 3207 3208 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3209 3210 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3211 index = self._index 3212 3213 try: 3214 if self._parse_select(nested=True): 3215 return this 3216 except Exception: 3217 pass 3218 finally: 3219 self._retreat(index) 3220 3221 if not self._match(TokenType.L_PAREN): 3222 return this 3223 3224 args = self._parse_csv( 3225 lambda: self._parse_constraint() 3226 or self._parse_column_def(self._parse_field(any_token=True)) 3227 ) 3228 self._match_r_paren() 3229 return self.expression(exp.Schema, this=this, expressions=args) 3230 3231 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3232 # column defs are not really columns, they're identifiers 3233 if isinstance(this, exp.Column): 3234 this = this.this 3235 kind = self._parse_types() 3236 3237 if self._match_text_seq("FOR", "ORDINALITY"): 3238 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3239 3240 constraints = [] 3241 while True: 3242 constraint = self._parse_column_constraint() 3243 if not constraint: 3244 break 3245 constraints.append(constraint) 3246 3247 if not kind and not constraints: 3248 return this 3249 3250 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3251 3252 def _parse_auto_increment(self) -> exp.Expression: 3253 start = None 3254 increment = None 3255 3256 if self._match(TokenType.L_PAREN, advance=False): 3257 args = self._parse_wrapped_csv(self._parse_bitwise) 3258 start = seq_get(args, 0) 3259 increment = seq_get(args, 1) 3260 elif self._match_text_seq("START"): 3261 start = self._parse_bitwise() 3262 self._match_text_seq("INCREMENT") 3263 increment = self._parse_bitwise() 3264 3265 if start and increment: 3266 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3267 3268 return exp.AutoIncrementColumnConstraint() 3269 3270 def _parse_compress(self) -> exp.Expression: 3271 if self._match(TokenType.L_PAREN, advance=False): 3272 return self.expression( 3273 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3274 ) 3275 3276 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3277 3278 def _parse_generated_as_identity(self) -> exp.Expression: 3279 if self._match_text_seq("BY", "DEFAULT"): 3280 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3281 this = self.expression( 3282 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3283 ) 3284 else: 3285 self._match_text_seq("ALWAYS") 3286 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3287 3288 self._match(TokenType.ALIAS) 3289 identity = self._match_text_seq("IDENTITY") 3290 3291 if self._match(TokenType.L_PAREN): 3292 if self._match_text_seq("START", "WITH"): 3293 this.set("start", self._parse_bitwise()) 3294 if self._match_text_seq("INCREMENT", "BY"): 3295 this.set("increment", self._parse_bitwise()) 3296 if self._match_text_seq("MINVALUE"): 3297 this.set("minvalue", self._parse_bitwise()) 3298 if self._match_text_seq("MAXVALUE"): 3299 this.set("maxvalue", self._parse_bitwise()) 3300 3301 if self._match_text_seq("CYCLE"): 3302 this.set("cycle", True) 3303 elif self._match_text_seq("NO", "CYCLE"): 3304 this.set("cycle", False) 3305 3306 if not identity: 3307 this.set("expression", self._parse_bitwise()) 3308 3309 self._match_r_paren() 3310 3311 return this 3312 3313 def _parse_inline(self) -> t.Optional[exp.Expression]: 3314 self._match_text_seq("LENGTH") 3315 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3316 3317 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 3318 if self._match_text_seq("NULL"): 3319 return self.expression(exp.NotNullColumnConstraint) 3320 if self._match_text_seq("CASESPECIFIC"): 3321 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3322 return None 3323 3324 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3325 if self._match(TokenType.CONSTRAINT): 3326 this = self._parse_id_var() 3327 else: 3328 this = None 3329 3330 if self._match_texts(self.CONSTRAINT_PARSERS): 3331 return self.expression( 3332 exp.ColumnConstraint, 3333 this=this, 3334 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3335 ) 3336 3337 return this 3338 3339 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3340 if not self._match(TokenType.CONSTRAINT): 3341 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3342 3343 this = self._parse_id_var() 3344 expressions = [] 3345 3346 while True: 3347 constraint = self._parse_unnamed_constraint() or self._parse_function() 3348 if not constraint: 3349 break 3350 expressions.append(constraint) 3351 3352 return self.expression(exp.Constraint, this=this, expressions=expressions) 3353 3354 def _parse_unnamed_constraint( 3355 self, constraints: t.Optional[t.Collection[str]] = None 3356 ) -> t.Optional[exp.Expression]: 3357 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3358 return None 3359 3360 constraint = self._prev.text.upper() 3361 if constraint not in self.CONSTRAINT_PARSERS: 3362 self.raise_error(f"No parser found for schema constraint {constraint}.") 3363 3364 return self.CONSTRAINT_PARSERS[constraint](self) 3365 3366 def _parse_unique(self) -> exp.Expression: 3367 if not self._match(TokenType.L_PAREN, advance=False): 3368 return self.expression(exp.UniqueColumnConstraint) 3369 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 3370 3371 def _parse_key_constraint_options(self) -> t.List[str]: 3372 options = [] 3373 while True: 3374 if not self._curr: 3375 break 3376 3377 if self._match(TokenType.ON): 3378 action = None 3379 on = self._advance_any() and self._prev.text 3380 3381 if self._match_text_seq("NO", "ACTION"): 3382 action = "NO ACTION" 3383 elif self._match_text_seq("CASCADE"): 3384 action = "CASCADE" 3385 elif self._match_pair(TokenType.SET, TokenType.NULL): 3386 action = "SET NULL" 3387 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3388 action = "SET DEFAULT" 3389 else: 3390 self.raise_error("Invalid key constraint") 3391 3392 options.append(f"ON {on} {action}") 3393 elif self._match_text_seq("NOT", "ENFORCED"): 3394 options.append("NOT ENFORCED") 3395 elif self._match_text_seq("DEFERRABLE"): 3396 options.append("DEFERRABLE") 3397 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3398 options.append("INITIALLY DEFERRED") 3399 elif self._match_text_seq("NORELY"): 3400 options.append("NORELY") 3401 elif self._match_text_seq("MATCH", "FULL"): 3402 options.append("MATCH FULL") 3403 else: 3404 break 3405 3406 return options 3407 3408 def _parse_references(self, match: bool = True) -> t.Optional[exp.Expression]: 3409 if match and not self._match(TokenType.REFERENCES): 3410 return None 3411 3412 expressions = None 3413 this = self._parse_id_var() 3414 3415 if self._match(TokenType.L_PAREN, advance=False): 3416 expressions = self._parse_wrapped_id_vars() 3417 3418 options = self._parse_key_constraint_options() 3419 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3420 3421 def _parse_foreign_key(self) -> exp.Expression: 3422 expressions = self._parse_wrapped_id_vars() 3423 reference = self._parse_references() 3424 options = {} 3425 3426 while self._match(TokenType.ON): 3427 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3428 self.raise_error("Expected DELETE or UPDATE") 3429 3430 kind = self._prev.text.lower() 3431 3432 if self._match_text_seq("NO", "ACTION"): 3433 action = "NO ACTION" 3434 elif self._match(TokenType.SET): 3435 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3436 action = "SET " + self._prev.text.upper() 3437 else: 3438 self._advance() 3439 action = self._prev.text.upper() 3440 3441 options[kind] = action 3442 3443 return self.expression( 3444 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3445 ) 3446 3447 def _parse_primary_key(self) -> exp.Expression: 3448 desc = ( 3449 self._match_set((TokenType.ASC, TokenType.DESC)) 3450 and self._prev.token_type == TokenType.DESC 3451 ) 3452 3453 if not self._match(TokenType.L_PAREN, advance=False): 3454 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3455 3456 expressions = self._parse_wrapped_csv(self._parse_field) 3457 options = self._parse_key_constraint_options() 3458 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3459 3460 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3461 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3462 return this 3463 3464 bracket_kind = self._prev.token_type 3465 expressions: t.List[t.Optional[exp.Expression]] 3466 3467 if self._match(TokenType.COLON): 3468 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3469 else: 3470 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3471 3472 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3473 if bracket_kind == TokenType.L_BRACE: 3474 this = self.expression(exp.Struct, expressions=expressions) 3475 elif not this or this.name.upper() == "ARRAY": 3476 this = self.expression(exp.Array, expressions=expressions) 3477 else: 3478 expressions = apply_index_offset(this, expressions, -self.index_offset) 3479 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3480 3481 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3482 self.raise_error("Expected ]") 3483 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3484 self.raise_error("Expected }") 3485 3486 self._add_comments(this) 3487 return self._parse_bracket(this) 3488 3489 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3490 if self._match(TokenType.COLON): 3491 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3492 return this 3493 3494 def _parse_case(self) -> t.Optional[exp.Expression]: 3495 ifs = [] 3496 default = None 3497 3498 expression = self._parse_conjunction() 3499 3500 while self._match(TokenType.WHEN): 3501 this = self._parse_conjunction() 3502 self._match(TokenType.THEN) 3503 then = self._parse_conjunction() 3504 ifs.append(self.expression(exp.If, this=this, true=then)) 3505 3506 if self._match(TokenType.ELSE): 3507 default = self._parse_conjunction() 3508 3509 if not self._match(TokenType.END): 3510 self.raise_error("Expected END after CASE", self._prev) 3511 3512 return self._parse_window( 3513 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3514 ) 3515 3516 def _parse_if(self) -> t.Optional[exp.Expression]: 3517 if self._match(TokenType.L_PAREN): 3518 args = self._parse_csv(self._parse_conjunction) 3519 this = exp.If.from_arg_list(args) 3520 self.validate_expression(this, args) 3521 self._match_r_paren() 3522 else: 3523 index = self._index - 1 3524 condition = self._parse_conjunction() 3525 3526 if not condition: 3527 self._retreat(index) 3528 return None 3529 3530 self._match(TokenType.THEN) 3531 true = self._parse_conjunction() 3532 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3533 self._match(TokenType.END) 3534 this = self.expression(exp.If, this=condition, true=true, false=false) 3535 3536 return self._parse_window(this) 3537 3538 def _parse_extract(self) -> exp.Expression: 3539 this = self._parse_function() or self._parse_var() or self._parse_type() 3540 3541 if self._match(TokenType.FROM): 3542 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3543 3544 if not self._match(TokenType.COMMA): 3545 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3546 3547 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3548 3549 def _parse_cast(self, strict: bool) -> exp.Expression: 3550 this = self._parse_conjunction() 3551 3552 if not self._match(TokenType.ALIAS): 3553 if self._match(TokenType.COMMA): 3554 return self.expression( 3555 exp.CastToStrType, this=this, expression=self._parse_string() 3556 ) 3557 else: 3558 self.raise_error("Expected AS after CAST") 3559 3560 to = self._parse_types() 3561 3562 if not to: 3563 self.raise_error("Expected TYPE after CAST") 3564 elif to.this == exp.DataType.Type.CHAR: 3565 if self._match(TokenType.CHARACTER_SET): 3566 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3567 3568 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3569 3570 def _parse_string_agg(self) -> exp.Expression: 3571 expression: t.Optional[exp.Expression] 3572 3573 if self._match(TokenType.DISTINCT): 3574 args = self._parse_csv(self._parse_conjunction) 3575 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3576 else: 3577 args = self._parse_csv(self._parse_conjunction) 3578 expression = seq_get(args, 0) 3579 3580 index = self._index 3581 if not self._match(TokenType.R_PAREN): 3582 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3583 order = self._parse_order(this=expression) 3584 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3585 3586 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3587 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3588 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3589 if not self._match_text_seq("WITHIN", "GROUP"): 3590 self._retreat(index) 3591 this = exp.GroupConcat.from_arg_list(args) 3592 self.validate_expression(this, args) 3593 return this 3594 3595 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3596 order = self._parse_order(this=expression) 3597 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3598 3599 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3600 to: t.Optional[exp.Expression] 3601 this = self._parse_bitwise() 3602 3603 if self._match(TokenType.USING): 3604 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3605 elif self._match(TokenType.COMMA): 3606 to = self._parse_bitwise() 3607 else: 3608 to = None 3609 3610 # Swap the argument order if needed to produce the correct AST 3611 if self.CONVERT_TYPE_FIRST: 3612 this, to = to, this 3613 3614 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3615 3616 def _parse_decode(self) -> t.Optional[exp.Expression]: 3617 """ 3618 There are generally two variants of the DECODE function: 3619 3620 - DECODE(bin, charset) 3621 - DECODE(expression, search, result [, search, result] ... [, default]) 3622 3623 The second variant will always be parsed into a CASE expression. Note that NULL 3624 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3625 instead of relying on pattern matching. 3626 """ 3627 args = self._parse_csv(self._parse_conjunction) 3628 3629 if len(args) < 3: 3630 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3631 3632 expression, *expressions = args 3633 if not expression: 3634 return None 3635 3636 ifs = [] 3637 for search, result in zip(expressions[::2], expressions[1::2]): 3638 if not search or not result: 3639 return None 3640 3641 if isinstance(search, exp.Literal): 3642 ifs.append( 3643 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3644 ) 3645 elif isinstance(search, exp.Null): 3646 ifs.append( 3647 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3648 ) 3649 else: 3650 cond = exp.or_( 3651 exp.EQ(this=expression.copy(), expression=search), 3652 exp.and_( 3653 exp.Is(this=expression.copy(), expression=exp.Null()), 3654 exp.Is(this=search.copy(), expression=exp.Null()), 3655 copy=False, 3656 ), 3657 copy=False, 3658 ) 3659 ifs.append(exp.If(this=cond, true=result)) 3660 3661 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3662 3663 def _parse_json_key_value(self) -> t.Optional[exp.Expression]: 3664 self._match_text_seq("KEY") 3665 key = self._parse_field() 3666 self._match(TokenType.COLON) 3667 self._match_text_seq("VALUE") 3668 value = self._parse_field() 3669 if not key and not value: 3670 return None 3671 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3672 3673 def _parse_json_object(self) -> exp.Expression: 3674 expressions = self._parse_csv(self._parse_json_key_value) 3675 3676 null_handling = None 3677 if self._match_text_seq("NULL", "ON", "NULL"): 3678 null_handling = "NULL ON NULL" 3679 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3680 null_handling = "ABSENT ON NULL" 3681 3682 unique_keys = None 3683 if self._match_text_seq("WITH", "UNIQUE"): 3684 unique_keys = True 3685 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3686 unique_keys = False 3687 3688 self._match_text_seq("KEYS") 3689 3690 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3691 format_json = self._match_text_seq("FORMAT", "JSON") 3692 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3693 3694 return self.expression( 3695 exp.JSONObject, 3696 expressions=expressions, 3697 null_handling=null_handling, 3698 unique_keys=unique_keys, 3699 return_type=return_type, 3700 format_json=format_json, 3701 encoding=encoding, 3702 ) 3703 3704 def _parse_logarithm(self) -> exp.Expression: 3705 # Default argument order is base, expression 3706 args = self._parse_csv(self._parse_range) 3707 3708 if len(args) > 1: 3709 if not self.LOG_BASE_FIRST: 3710 args.reverse() 3711 return exp.Log.from_arg_list(args) 3712 3713 return self.expression( 3714 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3715 ) 3716 3717 def _parse_match_against(self) -> exp.Expression: 3718 expressions = self._parse_csv(self._parse_column) 3719 3720 self._match_text_seq(")", "AGAINST", "(") 3721 3722 this = self._parse_string() 3723 3724 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3725 modifier = "IN NATURAL LANGUAGE MODE" 3726 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3727 modifier = f"{modifier} WITH QUERY EXPANSION" 3728 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3729 modifier = "IN BOOLEAN MODE" 3730 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3731 modifier = "WITH QUERY EXPANSION" 3732 else: 3733 modifier = None 3734 3735 return self.expression( 3736 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3737 ) 3738 3739 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3740 def _parse_open_json(self) -> exp.Expression: 3741 this = self._parse_bitwise() 3742 path = self._match(TokenType.COMMA) and self._parse_string() 3743 3744 def _parse_open_json_column_def() -> exp.Expression: 3745 this = self._parse_field(any_token=True) 3746 kind = self._parse_types() 3747 path = self._parse_string() 3748 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3749 return self.expression( 3750 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3751 ) 3752 3753 expressions = None 3754 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3755 self._match_l_paren() 3756 expressions = self._parse_csv(_parse_open_json_column_def) 3757 3758 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3759 3760 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3761 args = self._parse_csv(self._parse_bitwise) 3762 3763 if self._match(TokenType.IN): 3764 return self.expression( 3765 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3766 ) 3767 3768 if haystack_first: 3769 haystack = seq_get(args, 0) 3770 needle = seq_get(args, 1) 3771 else: 3772 needle = seq_get(args, 0) 3773 haystack = seq_get(args, 1) 3774 3775 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3776 3777 self.validate_expression(this, args) 3778 3779 return this 3780 3781 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3782 args = self._parse_csv(self._parse_table) 3783 return exp.JoinHint(this=func_name.upper(), expressions=args) 3784 3785 def _parse_substring(self) -> exp.Expression: 3786 # Postgres supports the form: substring(string [from int] [for int]) 3787 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3788 3789 args = self._parse_csv(self._parse_bitwise) 3790 3791 if self._match(TokenType.FROM): 3792 args.append(self._parse_bitwise()) 3793 if self._match(TokenType.FOR): 3794 args.append(self._parse_bitwise()) 3795 3796 this = exp.Substring.from_arg_list(args) 3797 self.validate_expression(this, args) 3798 3799 return this 3800 3801 def _parse_trim(self) -> exp.Expression: 3802 # https://www.w3resource.com/sql/character-functions/trim.php 3803 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3804 3805 position = None 3806 collation = None 3807 3808 if self._match_texts(self.TRIM_TYPES): 3809 position = self._prev.text.upper() 3810 3811 expression = self._parse_bitwise() 3812 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3813 this = self._parse_bitwise() 3814 else: 3815 this = expression 3816 expression = None 3817 3818 if self._match(TokenType.COLLATE): 3819 collation = self._parse_bitwise() 3820 3821 return self.expression( 3822 exp.Trim, 3823 this=this, 3824 position=position, 3825 expression=expression, 3826 collation=collation, 3827 ) 3828 3829 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3830 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3831 3832 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3833 return self._parse_window(self._parse_id_var(), alias=True) 3834 3835 def _parse_respect_or_ignore_nulls( 3836 self, this: t.Optional[exp.Expression] 3837 ) -> t.Optional[exp.Expression]: 3838 if self._match_text_seq("IGNORE", "NULLS"): 3839 return self.expression(exp.IgnoreNulls, this=this) 3840 if self._match_text_seq("RESPECT", "NULLS"): 3841 return self.expression(exp.RespectNulls, this=this) 3842 return this 3843 3844 def _parse_window( 3845 self, this: t.Optional[exp.Expression], alias: bool = False 3846 ) -> t.Optional[exp.Expression]: 3847 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3848 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3849 self._match_r_paren() 3850 3851 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3852 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3853 if self._match_text_seq("WITHIN", "GROUP"): 3854 order = self._parse_wrapped(self._parse_order) 3855 this = self.expression(exp.WithinGroup, this=this, expression=order) 3856 3857 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3858 # Some dialects choose to implement and some do not. 3859 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3860 3861 # There is some code above in _parse_lambda that handles 3862 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3863 3864 # The below changes handle 3865 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3866 3867 # Oracle allows both formats 3868 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3869 # and Snowflake chose to do the same for familiarity 3870 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3871 this = self._parse_respect_or_ignore_nulls(this) 3872 3873 # bigquery select from window x AS (partition by ...) 3874 if alias: 3875 over = None 3876 self._match(TokenType.ALIAS) 3877 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 3878 return this 3879 else: 3880 over = self._prev.text.upper() 3881 3882 if not self._match(TokenType.L_PAREN): 3883 return self.expression( 3884 exp.Window, this=this, alias=self._parse_id_var(False), over=over 3885 ) 3886 3887 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3888 3889 first = self._match(TokenType.FIRST) 3890 if self._match_text_seq("LAST"): 3891 first = False 3892 3893 partition = self._parse_partition_by() 3894 order = self._parse_order() 3895 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3896 3897 if kind: 3898 self._match(TokenType.BETWEEN) 3899 start = self._parse_window_spec() 3900 self._match(TokenType.AND) 3901 end = self._parse_window_spec() 3902 3903 spec = self.expression( 3904 exp.WindowSpec, 3905 kind=kind, 3906 start=start["value"], 3907 start_side=start["side"], 3908 end=end["value"], 3909 end_side=end["side"], 3910 ) 3911 else: 3912 spec = None 3913 3914 self._match_r_paren() 3915 3916 return self.expression( 3917 exp.Window, 3918 this=this, 3919 partition_by=partition, 3920 order=order, 3921 spec=spec, 3922 alias=window_alias, 3923 over=over, 3924 first=first, 3925 ) 3926 3927 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3928 self._match(TokenType.BETWEEN) 3929 3930 return { 3931 "value": ( 3932 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 3933 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 3934 or self._parse_bitwise() 3935 ), 3936 "side": self._match_texts(("PRECEDING", "FOLLOWING")) and self._prev.text, 3937 } 3938 3939 def _parse_alias( 3940 self, this: t.Optional[exp.Expression], explicit: bool = False 3941 ) -> t.Optional[exp.Expression]: 3942 any_token = self._match(TokenType.ALIAS) 3943 3944 if explicit and not any_token: 3945 return this 3946 3947 if self._match(TokenType.L_PAREN): 3948 aliases = self.expression( 3949 exp.Aliases, 3950 this=this, 3951 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3952 ) 3953 self._match_r_paren(aliases) 3954 return aliases 3955 3956 alias = self._parse_id_var(any_token) 3957 3958 if alias: 3959 return self.expression(exp.Alias, this=this, alias=alias) 3960 3961 return this 3962 3963 def _parse_id_var( 3964 self, 3965 any_token: bool = True, 3966 tokens: t.Optional[t.Collection[TokenType]] = None, 3967 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3968 ) -> t.Optional[exp.Expression]: 3969 identifier = self._parse_identifier() 3970 3971 if identifier: 3972 return identifier 3973 3974 prefix = "" 3975 3976 if prefix_tokens: 3977 while self._match_set(prefix_tokens): 3978 prefix += self._prev.text 3979 3980 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3981 quoted = self._prev.token_type == TokenType.STRING 3982 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3983 3984 return None 3985 3986 def _parse_string(self) -> t.Optional[exp.Expression]: 3987 if self._match(TokenType.STRING): 3988 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 3989 return self._parse_placeholder() 3990 3991 def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]: 3992 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 3993 3994 def _parse_number(self) -> t.Optional[exp.Expression]: 3995 if self._match(TokenType.NUMBER): 3996 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 3997 return self._parse_placeholder() 3998 3999 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4000 if self._match(TokenType.IDENTIFIER): 4001 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4002 return self._parse_placeholder() 4003 4004 def _parse_var( 4005 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4006 ) -> t.Optional[exp.Expression]: 4007 if ( 4008 (any_token and self._advance_any()) 4009 or self._match(TokenType.VAR) 4010 or (self._match_set(tokens) if tokens else False) 4011 ): 4012 return self.expression(exp.Var, this=self._prev.text) 4013 return self._parse_placeholder() 4014 4015 def _advance_any(self) -> t.Optional[Token]: 4016 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4017 self._advance() 4018 return self._prev 4019 return None 4020 4021 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4022 return self._parse_var() or self._parse_string() 4023 4024 def _parse_null(self) -> t.Optional[exp.Expression]: 4025 if self._match(TokenType.NULL): 4026 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4027 return None 4028 4029 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4030 if self._match(TokenType.TRUE): 4031 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4032 if self._match(TokenType.FALSE): 4033 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4034 return None 4035 4036 def _parse_star(self) -> t.Optional[exp.Expression]: 4037 if self._match(TokenType.STAR): 4038 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4039 return None 4040 4041 def _parse_parameter(self) -> exp.Expression: 4042 wrapped = self._match(TokenType.L_BRACE) 4043 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4044 self._match(TokenType.R_BRACE) 4045 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4046 4047 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4048 if self._match_set(self.PLACEHOLDER_PARSERS): 4049 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4050 if placeholder: 4051 return placeholder 4052 self._advance(-1) 4053 return None 4054 4055 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4056 if not self._match(TokenType.EXCEPT): 4057 return None 4058 if self._match(TokenType.L_PAREN, advance=False): 4059 return self._parse_wrapped_csv(self._parse_column) 4060 return self._parse_csv(self._parse_column) 4061 4062 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4063 if not self._match(TokenType.REPLACE): 4064 return None 4065 if self._match(TokenType.L_PAREN, advance=False): 4066 return self._parse_wrapped_csv(self._parse_expression) 4067 return self._parse_csv(self._parse_expression) 4068 4069 def _parse_csv( 4070 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4071 ) -> t.List[t.Optional[exp.Expression]]: 4072 parse_result = parse_method() 4073 items = [parse_result] if parse_result is not None else [] 4074 4075 while self._match(sep): 4076 self._add_comments(parse_result) 4077 parse_result = parse_method() 4078 if parse_result is not None: 4079 items.append(parse_result) 4080 4081 return items 4082 4083 def _parse_tokens( 4084 self, parse_method: t.Callable, expressions: t.Dict 4085 ) -> t.Optional[exp.Expression]: 4086 this = parse_method() 4087 4088 while self._match_set(expressions): 4089 this = self.expression( 4090 expressions[self._prev.token_type], 4091 this=this, 4092 comments=self._prev_comments, 4093 expression=parse_method(), 4094 ) 4095 4096 return this 4097 4098 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4099 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4100 4101 def _parse_wrapped_csv( 4102 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4103 ) -> t.List[t.Optional[exp.Expression]]: 4104 return self._parse_wrapped( 4105 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4106 ) 4107 4108 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4109 wrapped = self._match(TokenType.L_PAREN) 4110 if not wrapped and not optional: 4111 self.raise_error("Expecting (") 4112 parse_result = parse_method() 4113 if wrapped: 4114 self._match_r_paren() 4115 return parse_result 4116 4117 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4118 return self._parse_select() or self._parse_set_operations( 4119 self._parse_expression() if alias else self._parse_conjunction() 4120 ) 4121 4122 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4123 return self._parse_set_operations( 4124 self._parse_select(nested=True, parse_subquery_alias=False) 4125 ) 4126 4127 def _parse_transaction(self) -> exp.Expression: 4128 this = None 4129 if self._match_texts(self.TRANSACTION_KIND): 4130 this = self._prev.text 4131 4132 self._match_texts({"TRANSACTION", "WORK"}) 4133 4134 modes = [] 4135 while True: 4136 mode = [] 4137 while self._match(TokenType.VAR): 4138 mode.append(self._prev.text) 4139 4140 if mode: 4141 modes.append(" ".join(mode)) 4142 if not self._match(TokenType.COMMA): 4143 break 4144 4145 return self.expression(exp.Transaction, this=this, modes=modes) 4146 4147 def _parse_commit_or_rollback(self) -> exp.Expression: 4148 chain = None 4149 savepoint = None 4150 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4151 4152 self._match_texts({"TRANSACTION", "WORK"}) 4153 4154 if self._match_text_seq("TO"): 4155 self._match_text_seq("SAVEPOINT") 4156 savepoint = self._parse_id_var() 4157 4158 if self._match(TokenType.AND): 4159 chain = not self._match_text_seq("NO") 4160 self._match_text_seq("CHAIN") 4161 4162 if is_rollback: 4163 return self.expression(exp.Rollback, savepoint=savepoint) 4164 return self.expression(exp.Commit, chain=chain) 4165 4166 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4167 if not self._match_text_seq("ADD"): 4168 return None 4169 4170 self._match(TokenType.COLUMN) 4171 exists_column = self._parse_exists(not_=True) 4172 expression = self._parse_column_def(self._parse_field(any_token=True)) 4173 4174 if expression: 4175 expression.set("exists", exists_column) 4176 4177 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4178 if self._match_texts(("FIRST", "AFTER")): 4179 position = self._prev.text 4180 column_position = self.expression( 4181 exp.ColumnPosition, this=self._parse_column(), position=position 4182 ) 4183 expression.set("position", column_position) 4184 4185 return expression 4186 4187 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 4188 drop = self._match(TokenType.DROP) and self._parse_drop() 4189 if drop and not isinstance(drop, exp.Command): 4190 drop.set("kind", drop.args.get("kind", "COLUMN")) 4191 return drop 4192 4193 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4194 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 4195 return self.expression( 4196 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4197 ) 4198 4199 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 4200 this = None 4201 kind = self._prev.token_type 4202 4203 if kind == TokenType.CONSTRAINT: 4204 this = self._parse_id_var() 4205 4206 if self._match_text_seq("CHECK"): 4207 expression = self._parse_wrapped(self._parse_conjunction) 4208 enforced = self._match_text_seq("ENFORCED") 4209 4210 return self.expression( 4211 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4212 ) 4213 4214 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4215 expression = self._parse_foreign_key() 4216 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4217 expression = self._parse_primary_key() 4218 else: 4219 expression = None 4220 4221 return self.expression(exp.AddConstraint, this=this, expression=expression) 4222 4223 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4224 index = self._index - 1 4225 4226 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4227 return self._parse_csv(self._parse_add_constraint) 4228 4229 self._retreat(index) 4230 return self._parse_csv(self._parse_add_column) 4231 4232 def _parse_alter_table_alter(self) -> exp.Expression: 4233 self._match(TokenType.COLUMN) 4234 column = self._parse_field(any_token=True) 4235 4236 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4237 return self.expression(exp.AlterColumn, this=column, drop=True) 4238 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4239 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4240 4241 self._match_text_seq("SET", "DATA") 4242 return self.expression( 4243 exp.AlterColumn, 4244 this=column, 4245 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4246 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4247 using=self._match(TokenType.USING) and self._parse_conjunction(), 4248 ) 4249 4250 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4251 index = self._index - 1 4252 4253 partition_exists = self._parse_exists() 4254 if self._match(TokenType.PARTITION, advance=False): 4255 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4256 4257 self._retreat(index) 4258 return self._parse_csv(self._parse_drop_column) 4259 4260 def _parse_alter_table_rename(self) -> exp.Expression: 4261 self._match_text_seq("TO") 4262 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4263 4264 def _parse_alter(self) -> t.Optional[exp.Expression]: 4265 start = self._prev 4266 4267 if not self._match(TokenType.TABLE): 4268 return self._parse_as_command(start) 4269 4270 exists = self._parse_exists() 4271 this = self._parse_table(schema=True) 4272 4273 if self._next: 4274 self._advance() 4275 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4276 4277 if parser: 4278 actions = ensure_list(parser(self)) 4279 4280 if not self._curr: 4281 return self.expression( 4282 exp.AlterTable, 4283 this=this, 4284 exists=exists, 4285 actions=actions, 4286 ) 4287 return self._parse_as_command(start) 4288 4289 def _parse_merge(self) -> exp.Expression: 4290 self._match(TokenType.INTO) 4291 target = self._parse_table() 4292 4293 self._match(TokenType.USING) 4294 using = self._parse_table() 4295 4296 self._match(TokenType.ON) 4297 on = self._parse_conjunction() 4298 4299 whens = [] 4300 while self._match(TokenType.WHEN): 4301 matched = not self._match(TokenType.NOT) 4302 self._match_text_seq("MATCHED") 4303 source = ( 4304 False 4305 if self._match_text_seq("BY", "TARGET") 4306 else self._match_text_seq("BY", "SOURCE") 4307 ) 4308 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4309 4310 self._match(TokenType.THEN) 4311 4312 if self._match(TokenType.INSERT): 4313 _this = self._parse_star() 4314 if _this: 4315 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4316 else: 4317 then = self.expression( 4318 exp.Insert, 4319 this=self._parse_value(), 4320 expression=self._match(TokenType.VALUES) and self._parse_value(), 4321 ) 4322 elif self._match(TokenType.UPDATE): 4323 expressions = self._parse_star() 4324 if expressions: 4325 then = self.expression(exp.Update, expressions=expressions) 4326 else: 4327 then = self.expression( 4328 exp.Update, 4329 expressions=self._match(TokenType.SET) 4330 and self._parse_csv(self._parse_equality), 4331 ) 4332 elif self._match(TokenType.DELETE): 4333 then = self.expression(exp.Var, this=self._prev.text) 4334 else: 4335 then = None 4336 4337 whens.append( 4338 self.expression( 4339 exp.When, 4340 matched=matched, 4341 source=source, 4342 condition=condition, 4343 then=then, 4344 ) 4345 ) 4346 4347 return self.expression( 4348 exp.Merge, 4349 this=target, 4350 using=using, 4351 on=on, 4352 expressions=whens, 4353 ) 4354 4355 def _parse_show(self) -> t.Optional[exp.Expression]: 4356 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 4357 if parser: 4358 return parser(self) 4359 self._advance() 4360 return self.expression(exp.Show, this=self._prev.text.upper()) 4361 4362 def _parse_set_item_assignment( 4363 self, kind: t.Optional[str] = None 4364 ) -> t.Optional[exp.Expression]: 4365 index = self._index 4366 4367 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4368 return self._parse_set_transaction(global_=kind == "GLOBAL") 4369 4370 left = self._parse_primary() or self._parse_id_var() 4371 4372 if not self._match_texts(("=", "TO")): 4373 self._retreat(index) 4374 return None 4375 4376 right = self._parse_statement() or self._parse_id_var() 4377 this = self.expression( 4378 exp.EQ, 4379 this=left, 4380 expression=right, 4381 ) 4382 4383 return self.expression( 4384 exp.SetItem, 4385 this=this, 4386 kind=kind, 4387 ) 4388 4389 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4390 self._match_text_seq("TRANSACTION") 4391 characteristics = self._parse_csv( 4392 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4393 ) 4394 return self.expression( 4395 exp.SetItem, 4396 expressions=characteristics, 4397 kind="TRANSACTION", 4398 **{"global": global_}, # type: ignore 4399 ) 4400 4401 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4402 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 4403 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4404 4405 def _parse_set(self) -> exp.Expression: 4406 index = self._index 4407 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4408 4409 if self._curr: 4410 self._retreat(index) 4411 return self._parse_as_command(self._prev) 4412 4413 return set_ 4414 4415 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]: 4416 for option in options: 4417 if self._match_text_seq(*option.split(" ")): 4418 return exp.Var(this=option) 4419 return None 4420 4421 def _parse_as_command(self, start: Token) -> exp.Command: 4422 while self._curr: 4423 self._advance() 4424 text = self._find_sql(start, self._prev) 4425 size = len(start.text) 4426 return exp.Command(this=text[:size], expression=text[size:]) 4427 4428 def _find_parser( 4429 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4430 ) -> t.Optional[t.Callable]: 4431 if not self._curr: 4432 return None 4433 4434 index = self._index 4435 this = [] 4436 while True: 4437 # The current token might be multiple words 4438 curr = self._curr.text.upper() 4439 key = curr.split(" ") 4440 this.append(curr) 4441 self._advance() 4442 result, trie = in_trie(trie, key) 4443 if result == 0: 4444 break 4445 if result == 2: 4446 subparser = parsers[" ".join(this)] 4447 return subparser 4448 self._retreat(index) 4449 return None 4450 4451 def _match(self, token_type, advance=True, expression=None): 4452 if not self._curr: 4453 return None 4454 4455 if self._curr.token_type == token_type: 4456 if advance: 4457 self._advance() 4458 self._add_comments(expression) 4459 return True 4460 4461 return None 4462 4463 def _match_set(self, types, advance=True): 4464 if not self._curr: 4465 return None 4466 4467 if self._curr.token_type in types: 4468 if advance: 4469 self._advance() 4470 return True 4471 4472 return None 4473 4474 def _match_pair(self, token_type_a, token_type_b, advance=True): 4475 if not self._curr or not self._next: 4476 return None 4477 4478 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4479 if advance: 4480 self._advance(2) 4481 return True 4482 4483 return None 4484 4485 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4486 if not self._match(TokenType.L_PAREN, expression=expression): 4487 self.raise_error("Expecting (") 4488 4489 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4490 if not self._match(TokenType.R_PAREN, expression=expression): 4491 self.raise_error("Expecting )") 4492 4493 def _match_texts(self, texts, advance=True): 4494 if self._curr and self._curr.text.upper() in texts: 4495 if advance: 4496 self._advance() 4497 return True 4498 return False 4499 4500 def _match_text_seq(self, *texts, advance=True): 4501 index = self._index 4502 for text in texts: 4503 if self._curr and self._curr.text.upper() == text: 4504 self._advance() 4505 else: 4506 self._retreat(index) 4507 return False 4508 4509 if not advance: 4510 self._retreat(index) 4511 4512 return True 4513 4514 def _replace_columns_with_dots( 4515 self, this: t.Optional[exp.Expression] 4516 ) -> t.Optional[exp.Expression]: 4517 if isinstance(this, exp.Dot): 4518 exp.replace_children(this, self._replace_columns_with_dots) 4519 elif isinstance(this, exp.Column): 4520 exp.replace_children(this, self._replace_columns_with_dots) 4521 table = this.args.get("table") 4522 this = ( 4523 self.expression(exp.Dot, this=table, expression=this.this) 4524 if table 4525 else self.expression(exp.Var, this=this.name) 4526 ) 4527 elif isinstance(this, exp.Identifier): 4528 this = self.expression(exp.Var, this=this.name) 4529 4530 return this 4531 4532 def _replace_lambda( 4533 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4534 ) -> t.Optional[exp.Expression]: 4535 if not node: 4536 return node 4537 4538 for column in node.find_all(exp.Column): 4539 if column.parts[0].name in lambda_variables: 4540 dot_or_id = column.to_dot() if column.table else column.this 4541 parent = column.parent 4542 4543 while isinstance(parent, exp.Dot): 4544 if not isinstance(parent.parent, exp.Dot): 4545 parent.replace(dot_or_id) 4546 break 4547 parent = parent.parent 4548 else: 4549 if column is node: 4550 node = dot_or_id 4551 else: 4552 column.replace(dot_or_id) 4553 return node
20def parse_var_map(args: t.List) -> exp.Expression: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 return exp.VarMap( 30 keys=exp.Array(expressions=keys), 31 values=exp.Array(expressions=values), 32 )
57class Parser(metaclass=_Parser): 58 """ 59 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 60 a parsed syntax tree. 61 62 Args: 63 error_level: the desired error level. 64 Default: ErrorLevel.RAISE 65 error_message_context: determines the amount of context to capture from a 66 query string when displaying the error message (in number of characters). 67 Default: 50. 68 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 69 Default: 0 70 alias_post_tablesample: If the table alias comes after tablesample. 71 Default: False 72 max_errors: Maximum number of error messages to include in a raised ParseError. 73 This is only relevant if error_level is ErrorLevel.RAISE. 74 Default: 3 75 null_ordering: Indicates the default null ordering method to use if not explicitly set. 76 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 77 Default: "nulls_are_small" 78 """ 79 80 FUNCTIONS: t.Dict[str, t.Callable] = { 81 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 82 "DATE_TO_DATE_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 87 "IFNULL": exp.Coalesce.from_arg_list, 88 "LIKE": parse_like, 89 "TIME_TO_TIME_STR": lambda args: exp.Cast( 90 this=seq_get(args, 0), 91 to=exp.DataType(this=exp.DataType.Type.TEXT), 92 ), 93 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 94 this=exp.Cast( 95 this=seq_get(args, 0), 96 to=exp.DataType(this=exp.DataType.Type.TEXT), 97 ), 98 start=exp.Literal.number(1), 99 length=exp.Literal.number(10), 100 ), 101 "VAR_MAP": parse_var_map, 102 } 103 104 NO_PAREN_FUNCTIONS = { 105 TokenType.CURRENT_DATE: exp.CurrentDate, 106 TokenType.CURRENT_DATETIME: exp.CurrentDate, 107 TokenType.CURRENT_TIME: exp.CurrentTime, 108 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 109 TokenType.CURRENT_USER: exp.CurrentUser, 110 } 111 112 JOIN_HINTS: t.Set[str] = set() 113 114 NESTED_TYPE_TOKENS = { 115 TokenType.ARRAY, 116 TokenType.MAP, 117 TokenType.NULLABLE, 118 TokenType.STRUCT, 119 } 120 121 TYPE_TOKENS = { 122 TokenType.BIT, 123 TokenType.BOOLEAN, 124 TokenType.TINYINT, 125 TokenType.UTINYINT, 126 TokenType.SMALLINT, 127 TokenType.USMALLINT, 128 TokenType.INT, 129 TokenType.UINT, 130 TokenType.BIGINT, 131 TokenType.UBIGINT, 132 TokenType.INT128, 133 TokenType.UINT128, 134 TokenType.INT256, 135 TokenType.UINT256, 136 TokenType.FLOAT, 137 TokenType.DOUBLE, 138 TokenType.CHAR, 139 TokenType.NCHAR, 140 TokenType.VARCHAR, 141 TokenType.NVARCHAR, 142 TokenType.TEXT, 143 TokenType.MEDIUMTEXT, 144 TokenType.LONGTEXT, 145 TokenType.MEDIUMBLOB, 146 TokenType.LONGBLOB, 147 TokenType.BINARY, 148 TokenType.VARBINARY, 149 TokenType.JSON, 150 TokenType.JSONB, 151 TokenType.INTERVAL, 152 TokenType.TIME, 153 TokenType.TIMESTAMP, 154 TokenType.TIMESTAMPTZ, 155 TokenType.TIMESTAMPLTZ, 156 TokenType.DATETIME, 157 TokenType.DATETIME64, 158 TokenType.DATE, 159 TokenType.DECIMAL, 160 TokenType.BIGDECIMAL, 161 TokenType.UUID, 162 TokenType.GEOGRAPHY, 163 TokenType.GEOMETRY, 164 TokenType.HLLSKETCH, 165 TokenType.HSTORE, 166 TokenType.PSEUDO_TYPE, 167 TokenType.SUPER, 168 TokenType.SERIAL, 169 TokenType.SMALLSERIAL, 170 TokenType.BIGSERIAL, 171 TokenType.XML, 172 TokenType.UNIQUEIDENTIFIER, 173 TokenType.MONEY, 174 TokenType.SMALLMONEY, 175 TokenType.ROWVERSION, 176 TokenType.IMAGE, 177 TokenType.VARIANT, 178 TokenType.OBJECT, 179 TokenType.INET, 180 *NESTED_TYPE_TOKENS, 181 } 182 183 SUBQUERY_PREDICATES = { 184 TokenType.ANY: exp.Any, 185 TokenType.ALL: exp.All, 186 TokenType.EXISTS: exp.Exists, 187 TokenType.SOME: exp.Any, 188 } 189 190 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 191 192 DB_CREATABLES = { 193 TokenType.DATABASE, 194 TokenType.SCHEMA, 195 TokenType.TABLE, 196 TokenType.VIEW, 197 } 198 199 CREATABLES = { 200 TokenType.COLUMN, 201 TokenType.FUNCTION, 202 TokenType.INDEX, 203 TokenType.PROCEDURE, 204 *DB_CREATABLES, 205 } 206 207 ID_VAR_TOKENS = { 208 TokenType.VAR, 209 TokenType.ANTI, 210 TokenType.APPLY, 211 TokenType.ASC, 212 TokenType.AUTO_INCREMENT, 213 TokenType.BEGIN, 214 TokenType.CACHE, 215 TokenType.COLLATE, 216 TokenType.COMMAND, 217 TokenType.COMMENT, 218 TokenType.COMMIT, 219 TokenType.CONSTRAINT, 220 TokenType.DEFAULT, 221 TokenType.DELETE, 222 TokenType.DESC, 223 TokenType.DESCRIBE, 224 TokenType.DIV, 225 TokenType.END, 226 TokenType.EXECUTE, 227 TokenType.ESCAPE, 228 TokenType.FALSE, 229 TokenType.FIRST, 230 TokenType.FILTER, 231 TokenType.FORMAT, 232 TokenType.FULL, 233 TokenType.IF, 234 TokenType.IS, 235 TokenType.ISNULL, 236 TokenType.INTERVAL, 237 TokenType.KEEP, 238 TokenType.LEFT, 239 TokenType.LOAD, 240 TokenType.MERGE, 241 TokenType.NATURAL, 242 TokenType.NEXT, 243 TokenType.OFFSET, 244 TokenType.ORDINALITY, 245 TokenType.OVERWRITE, 246 TokenType.PARTITION, 247 TokenType.PERCENT, 248 TokenType.PIVOT, 249 TokenType.PRAGMA, 250 TokenType.RANGE, 251 TokenType.REFERENCES, 252 TokenType.RIGHT, 253 TokenType.ROW, 254 TokenType.ROWS, 255 TokenType.SEMI, 256 TokenType.SET, 257 TokenType.SETTINGS, 258 TokenType.SHOW, 259 TokenType.TEMPORARY, 260 TokenType.TOP, 261 TokenType.TRUE, 262 TokenType.UNIQUE, 263 TokenType.UNPIVOT, 264 TokenType.VOLATILE, 265 TokenType.WINDOW, 266 *CREATABLES, 267 *SUBQUERY_PREDICATES, 268 *TYPE_TOKENS, 269 *NO_PAREN_FUNCTIONS, 270 } 271 272 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 273 274 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 275 TokenType.APPLY, 276 TokenType.FULL, 277 TokenType.LEFT, 278 TokenType.LOCK, 279 TokenType.NATURAL, 280 TokenType.OFFSET, 281 TokenType.RIGHT, 282 TokenType.WINDOW, 283 } 284 285 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 286 287 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 288 289 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 290 291 FUNC_TOKENS = { 292 TokenType.COMMAND, 293 TokenType.CURRENT_DATE, 294 TokenType.CURRENT_DATETIME, 295 TokenType.CURRENT_TIMESTAMP, 296 TokenType.CURRENT_TIME, 297 TokenType.CURRENT_USER, 298 TokenType.FILTER, 299 TokenType.FIRST, 300 TokenType.FORMAT, 301 TokenType.GLOB, 302 TokenType.IDENTIFIER, 303 TokenType.INDEX, 304 TokenType.ISNULL, 305 TokenType.ILIKE, 306 TokenType.LIKE, 307 TokenType.MERGE, 308 TokenType.OFFSET, 309 TokenType.PRIMARY_KEY, 310 TokenType.RANGE, 311 TokenType.REPLACE, 312 TokenType.ROW, 313 TokenType.UNNEST, 314 TokenType.VAR, 315 TokenType.LEFT, 316 TokenType.RIGHT, 317 TokenType.DATE, 318 TokenType.DATETIME, 319 TokenType.TABLE, 320 TokenType.TIMESTAMP, 321 TokenType.TIMESTAMPTZ, 322 TokenType.WINDOW, 323 *TYPE_TOKENS, 324 *SUBQUERY_PREDICATES, 325 } 326 327 CONJUNCTION = { 328 TokenType.AND: exp.And, 329 TokenType.OR: exp.Or, 330 } 331 332 EQUALITY = { 333 TokenType.EQ: exp.EQ, 334 TokenType.NEQ: exp.NEQ, 335 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 336 } 337 338 COMPARISON = { 339 TokenType.GT: exp.GT, 340 TokenType.GTE: exp.GTE, 341 TokenType.LT: exp.LT, 342 TokenType.LTE: exp.LTE, 343 } 344 345 BITWISE = { 346 TokenType.AMP: exp.BitwiseAnd, 347 TokenType.CARET: exp.BitwiseXor, 348 TokenType.PIPE: exp.BitwiseOr, 349 TokenType.DPIPE: exp.DPipe, 350 } 351 352 TERM = { 353 TokenType.DASH: exp.Sub, 354 TokenType.PLUS: exp.Add, 355 TokenType.MOD: exp.Mod, 356 TokenType.COLLATE: exp.Collate, 357 } 358 359 FACTOR = { 360 TokenType.DIV: exp.IntDiv, 361 TokenType.LR_ARROW: exp.Distance, 362 TokenType.SLASH: exp.Div, 363 TokenType.STAR: exp.Mul, 364 } 365 366 TIMESTAMPS = { 367 TokenType.TIME, 368 TokenType.TIMESTAMP, 369 TokenType.TIMESTAMPTZ, 370 TokenType.TIMESTAMPLTZ, 371 } 372 373 SET_OPERATIONS = { 374 TokenType.UNION, 375 TokenType.INTERSECT, 376 TokenType.EXCEPT, 377 } 378 379 JOIN_SIDES = { 380 TokenType.LEFT, 381 TokenType.RIGHT, 382 TokenType.FULL, 383 } 384 385 JOIN_KINDS = { 386 TokenType.INNER, 387 TokenType.OUTER, 388 TokenType.CROSS, 389 TokenType.SEMI, 390 TokenType.ANTI, 391 } 392 393 LAMBDAS = { 394 TokenType.ARROW: lambda self, expressions: self.expression( 395 exp.Lambda, 396 this=self._replace_lambda( 397 self._parse_conjunction(), 398 {node.name for node in expressions}, 399 ), 400 expressions=expressions, 401 ), 402 TokenType.FARROW: lambda self, expressions: self.expression( 403 exp.Kwarg, 404 this=exp.Var(this=expressions[0].name), 405 expression=self._parse_conjunction(), 406 ), 407 } 408 409 COLUMN_OPERATORS = { 410 TokenType.DOT: None, 411 TokenType.DCOLON: lambda self, this, to: self.expression( 412 exp.Cast if self.STRICT_CAST else exp.TryCast, 413 this=this, 414 to=to, 415 ), 416 TokenType.ARROW: lambda self, this, path: self.expression( 417 exp.JSONExtract, 418 this=this, 419 expression=path, 420 ), 421 TokenType.DARROW: lambda self, this, path: self.expression( 422 exp.JSONExtractScalar, 423 this=this, 424 expression=path, 425 ), 426 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 427 exp.JSONBExtract, 428 this=this, 429 expression=path, 430 ), 431 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 432 exp.JSONBExtractScalar, 433 this=this, 434 expression=path, 435 ), 436 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 437 exp.JSONBContains, 438 this=this, 439 expression=key, 440 ), 441 } 442 443 EXPRESSION_PARSERS = { 444 exp.Column: lambda self: self._parse_column(), 445 exp.DataType: lambda self: self._parse_types(), 446 exp.From: lambda self: self._parse_from(), 447 exp.Group: lambda self: self._parse_group(), 448 exp.Identifier: lambda self: self._parse_id_var(), 449 exp.Lateral: lambda self: self._parse_lateral(), 450 exp.Join: lambda self: self._parse_join(), 451 exp.Order: lambda self: self._parse_order(), 452 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"), 453 exp.Sort: lambda self: self._parse_sort(exp.Sort, "SORT", "BY"), 454 exp.Lambda: lambda self: self._parse_lambda(), 455 exp.Limit: lambda self: self._parse_limit(), 456 exp.Offset: lambda self: self._parse_offset(), 457 exp.TableAlias: lambda self: self._parse_table_alias(), 458 exp.Table: lambda self: self._parse_table_parts(), 459 exp.Condition: lambda self: self._parse_conjunction(), 460 exp.Expression: lambda self: self._parse_statement(), 461 exp.Properties: lambda self: self._parse_properties(), 462 exp.Where: lambda self: self._parse_where(), 463 exp.Ordered: lambda self: self._parse_ordered(), 464 exp.Having: lambda self: self._parse_having(), 465 exp.With: lambda self: self._parse_with(), 466 exp.Window: lambda self: self._parse_named_window(), 467 exp.Qualify: lambda self: self._parse_qualify(), 468 exp.Returning: lambda self: self._parse_returning(), 469 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 470 } 471 472 STATEMENT_PARSERS = { 473 TokenType.ALTER: lambda self: self._parse_alter(), 474 TokenType.BEGIN: lambda self: self._parse_transaction(), 475 TokenType.CACHE: lambda self: self._parse_cache(), 476 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 477 TokenType.COMMENT: lambda self: self._parse_comment(), 478 TokenType.CREATE: lambda self: self._parse_create(), 479 TokenType.DELETE: lambda self: self._parse_delete(), 480 TokenType.DESC: lambda self: self._parse_describe(), 481 TokenType.DESCRIBE: lambda self: self._parse_describe(), 482 TokenType.DROP: lambda self: self._parse_drop(), 483 TokenType.END: lambda self: self._parse_commit_or_rollback(), 484 TokenType.INSERT: lambda self: self._parse_insert(), 485 TokenType.LOAD: lambda self: self._parse_load(), 486 TokenType.MERGE: lambda self: self._parse_merge(), 487 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 488 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 489 TokenType.SET: lambda self: self._parse_set(), 490 TokenType.UNCACHE: lambda self: self._parse_uncache(), 491 TokenType.UPDATE: lambda self: self._parse_update(), 492 TokenType.USE: lambda self: self.expression( 493 exp.Use, 494 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 495 and exp.Var(this=self._prev.text), 496 this=self._parse_table(schema=False), 497 ), 498 } 499 500 UNARY_PARSERS = { 501 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 502 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 503 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 504 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 505 } 506 507 PRIMARY_PARSERS = { 508 TokenType.STRING: lambda self, token: self.expression( 509 exp.Literal, this=token.text, is_string=True 510 ), 511 TokenType.NUMBER: lambda self, token: self.expression( 512 exp.Literal, this=token.text, is_string=False 513 ), 514 TokenType.STAR: lambda self, _: self.expression( 515 exp.Star, 516 **{"except": self._parse_except(), "replace": self._parse_replace()}, 517 ), 518 TokenType.NULL: lambda self, _: self.expression(exp.Null), 519 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 520 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 521 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 522 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 523 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 524 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 525 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 526 exp.National, this=token.text 527 ), 528 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 529 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 530 } 531 532 PLACEHOLDER_PARSERS = { 533 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 534 TokenType.PARAMETER: lambda self: self._parse_parameter(), 535 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 536 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 537 else None, 538 } 539 540 RANGE_PARSERS = { 541 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 542 TokenType.GLOB: binary_range_parser(exp.Glob), 543 TokenType.ILIKE: binary_range_parser(exp.ILike), 544 TokenType.IN: lambda self, this: self._parse_in(this), 545 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 546 TokenType.IS: lambda self, this: self._parse_is(this), 547 TokenType.LIKE: binary_range_parser(exp.Like), 548 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 549 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 550 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 551 } 552 553 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 554 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 555 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 556 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 557 "CHARACTER SET": lambda self: self._parse_character_set(), 558 "CHECKSUM": lambda self: self._parse_checksum(), 559 "CLUSTER": lambda self: self._parse_cluster(), 560 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 561 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 562 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 563 "DEFINER": lambda self: self._parse_definer(), 564 "DETERMINISTIC": lambda self: self.expression( 565 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 566 ), 567 "DISTKEY": lambda self: self._parse_distkey(), 568 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 569 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 570 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 571 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 572 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 573 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 574 "FREESPACE": lambda self: self._parse_freespace(), 575 "IMMUTABLE": lambda self: self.expression( 576 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 577 ), 578 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 579 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 580 "LIKE": lambda self: self._parse_create_like(), 581 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 582 "LOCK": lambda self: self._parse_locking(), 583 "LOCKING": lambda self: self._parse_locking(), 584 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 585 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 586 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 587 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 588 "NO": lambda self: self._parse_no_property(), 589 "ON": lambda self: self._parse_on_property(), 590 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 591 "PARTITION BY": lambda self: self._parse_partitioned_by(), 592 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 593 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 594 "PRIMARY KEY": lambda self: self._parse_primary_key(), 595 "RETURNS": lambda self: self._parse_returns(), 596 "ROW": lambda self: self._parse_row(), 597 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 598 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 599 "SETTINGS": lambda self: self.expression( 600 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 601 ), 602 "SORTKEY": lambda self: self._parse_sortkey(), 603 "STABLE": lambda self: self.expression( 604 exp.StabilityProperty, this=exp.Literal.string("STABLE") 605 ), 606 "STORED": lambda self: self._parse_stored(), 607 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 608 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 609 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 610 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 611 "TTL": lambda self: self._parse_ttl(), 612 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 613 "VOLATILE": lambda self: self._parse_volatile_property(), 614 "WITH": lambda self: self._parse_with_property(), 615 } 616 617 CONSTRAINT_PARSERS = { 618 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 619 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 620 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 621 "CHARACTER SET": lambda self: self.expression( 622 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 623 ), 624 "CHECK": lambda self: self.expression( 625 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 626 ), 627 "COLLATE": lambda self: self.expression( 628 exp.CollateColumnConstraint, this=self._parse_var() 629 ), 630 "COMMENT": lambda self: self.expression( 631 exp.CommentColumnConstraint, this=self._parse_string() 632 ), 633 "COMPRESS": lambda self: self._parse_compress(), 634 "DEFAULT": lambda self: self.expression( 635 exp.DefaultColumnConstraint, this=self._parse_bitwise() 636 ), 637 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 638 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 639 "FORMAT": lambda self: self.expression( 640 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 641 ), 642 "GENERATED": lambda self: self._parse_generated_as_identity(), 643 "IDENTITY": lambda self: self._parse_auto_increment(), 644 "INLINE": lambda self: self._parse_inline(), 645 "LIKE": lambda self: self._parse_create_like(), 646 "NOT": lambda self: self._parse_not_constraint(), 647 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 648 "ON": lambda self: self._match(TokenType.UPDATE) 649 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 650 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 651 "PRIMARY KEY": lambda self: self._parse_primary_key(), 652 "REFERENCES": lambda self: self._parse_references(match=False), 653 "TITLE": lambda self: self.expression( 654 exp.TitleColumnConstraint, this=self._parse_var_or_string() 655 ), 656 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 657 "UNIQUE": lambda self: self._parse_unique(), 658 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 659 } 660 661 ALTER_PARSERS = { 662 "ADD": lambda self: self._parse_alter_table_add(), 663 "ALTER": lambda self: self._parse_alter_table_alter(), 664 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 665 "DROP": lambda self: self._parse_alter_table_drop(), 666 "RENAME": lambda self: self._parse_alter_table_rename(), 667 } 668 669 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 670 671 NO_PAREN_FUNCTION_PARSERS = { 672 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 673 TokenType.CASE: lambda self: self._parse_case(), 674 TokenType.IF: lambda self: self._parse_if(), 675 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 676 exp.NextValueFor, 677 this=self._parse_column(), 678 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 679 ), 680 } 681 682 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 683 684 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 685 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 686 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 687 "DECODE": lambda self: self._parse_decode(), 688 "EXTRACT": lambda self: self._parse_extract(), 689 "JSON_OBJECT": lambda self: self._parse_json_object(), 690 "LOG": lambda self: self._parse_logarithm(), 691 "MATCH": lambda self: self._parse_match_against(), 692 "OPENJSON": lambda self: self._parse_open_json(), 693 "POSITION": lambda self: self._parse_position(), 694 "SAFE_CAST": lambda self: self._parse_cast(False), 695 "STRING_AGG": lambda self: self._parse_string_agg(), 696 "SUBSTRING": lambda self: self._parse_substring(), 697 "TRIM": lambda self: self._parse_trim(), 698 "TRY_CAST": lambda self: self._parse_cast(False), 699 "TRY_CONVERT": lambda self: self._parse_convert(False), 700 } 701 702 QUERY_MODIFIER_PARSERS = { 703 "joins": lambda self: list(iter(self._parse_join, None)), 704 "laterals": lambda self: list(iter(self._parse_lateral, None)), 705 "match": lambda self: self._parse_match_recognize(), 706 "where": lambda self: self._parse_where(), 707 "group": lambda self: self._parse_group(), 708 "having": lambda self: self._parse_having(), 709 "qualify": lambda self: self._parse_qualify(), 710 "windows": lambda self: self._parse_window_clause(), 711 "order": lambda self: self._parse_order(), 712 "limit": lambda self: self._parse_limit(), 713 "offset": lambda self: self._parse_offset(), 714 "locks": lambda self: self._parse_locks(), 715 "sample": lambda self: self._parse_table_sample(as_modifier=True), 716 } 717 718 SET_PARSERS = { 719 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 720 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 721 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 722 "TRANSACTION": lambda self: self._parse_set_transaction(), 723 } 724 725 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 726 727 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 728 729 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 730 731 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 732 733 TRANSACTION_CHARACTERISTICS = { 734 "ISOLATION LEVEL REPEATABLE READ", 735 "ISOLATION LEVEL READ COMMITTED", 736 "ISOLATION LEVEL READ UNCOMMITTED", 737 "ISOLATION LEVEL SERIALIZABLE", 738 "READ WRITE", 739 "READ ONLY", 740 } 741 742 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 743 744 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 745 746 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 747 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 748 749 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 750 751 STRICT_CAST = True 752 753 CONVERT_TYPE_FIRST = False 754 755 PREFIXED_PIVOT_COLUMNS = False 756 IDENTIFY_PIVOT_STRINGS = False 757 758 LOG_BASE_FIRST = True 759 LOG_DEFAULTS_TO_LN = False 760 761 __slots__ = ( 762 "error_level", 763 "error_message_context", 764 "sql", 765 "errors", 766 "index_offset", 767 "unnest_column_only", 768 "alias_post_tablesample", 769 "max_errors", 770 "null_ordering", 771 "_tokens", 772 "_index", 773 "_curr", 774 "_next", 775 "_prev", 776 "_prev_comments", 777 "_show_trie", 778 "_set_trie", 779 ) 780 781 def __init__( 782 self, 783 error_level: t.Optional[ErrorLevel] = None, 784 error_message_context: int = 100, 785 index_offset: int = 0, 786 unnest_column_only: bool = False, 787 alias_post_tablesample: bool = False, 788 max_errors: int = 3, 789 null_ordering: t.Optional[str] = None, 790 ): 791 self.error_level = error_level or ErrorLevel.IMMEDIATE 792 self.error_message_context = error_message_context 793 self.index_offset = index_offset 794 self.unnest_column_only = unnest_column_only 795 self.alias_post_tablesample = alias_post_tablesample 796 self.max_errors = max_errors 797 self.null_ordering = null_ordering 798 self.reset() 799 800 def reset(self): 801 self.sql = "" 802 self.errors = [] 803 self._tokens = [] 804 self._index = 0 805 self._curr = None 806 self._next = None 807 self._prev = None 808 self._prev_comments = None 809 810 def parse( 811 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 812 ) -> t.List[t.Optional[exp.Expression]]: 813 """ 814 Parses a list of tokens and returns a list of syntax trees, one tree 815 per parsed SQL statement. 816 817 Args: 818 raw_tokens: the list of tokens. 819 sql: the original SQL string, used to produce helpful debug messages. 820 821 Returns: 822 The list of syntax trees. 823 """ 824 return self._parse( 825 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 826 ) 827 828 def parse_into( 829 self, 830 expression_types: exp.IntoType, 831 raw_tokens: t.List[Token], 832 sql: t.Optional[str] = None, 833 ) -> t.List[t.Optional[exp.Expression]]: 834 """ 835 Parses a list of tokens into a given Expression type. If a collection of Expression 836 types is given instead, this method will try to parse the token list into each one 837 of them, stopping at the first for which the parsing succeeds. 838 839 Args: 840 expression_types: the expression type(s) to try and parse the token list into. 841 raw_tokens: the list of tokens. 842 sql: the original SQL string, used to produce helpful debug messages. 843 844 Returns: 845 The target Expression. 846 """ 847 errors = [] 848 for expression_type in ensure_collection(expression_types): 849 parser = self.EXPRESSION_PARSERS.get(expression_type) 850 if not parser: 851 raise TypeError(f"No parser registered for {expression_type}") 852 try: 853 return self._parse(parser, raw_tokens, sql) 854 except ParseError as e: 855 e.errors[0]["into_expression"] = expression_type 856 errors.append(e) 857 raise ParseError( 858 f"Failed to parse into {expression_types}", 859 errors=merge_errors(errors), 860 ) from errors[-1] 861 862 def _parse( 863 self, 864 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 865 raw_tokens: t.List[Token], 866 sql: t.Optional[str] = None, 867 ) -> t.List[t.Optional[exp.Expression]]: 868 self.reset() 869 self.sql = sql or "" 870 total = len(raw_tokens) 871 chunks: t.List[t.List[Token]] = [[]] 872 873 for i, token in enumerate(raw_tokens): 874 if token.token_type == TokenType.SEMICOLON: 875 if i < total - 1: 876 chunks.append([]) 877 else: 878 chunks[-1].append(token) 879 880 expressions = [] 881 882 for tokens in chunks: 883 self._index = -1 884 self._tokens = tokens 885 self._advance() 886 887 expressions.append(parse_method(self)) 888 889 if self._index < len(self._tokens): 890 self.raise_error("Invalid expression / Unexpected token") 891 892 self.check_errors() 893 894 return expressions 895 896 def check_errors(self) -> None: 897 """ 898 Logs or raises any found errors, depending on the chosen error level setting. 899 """ 900 if self.error_level == ErrorLevel.WARN: 901 for error in self.errors: 902 logger.error(str(error)) 903 elif self.error_level == ErrorLevel.RAISE and self.errors: 904 raise ParseError( 905 concat_messages(self.errors, self.max_errors), 906 errors=merge_errors(self.errors), 907 ) 908 909 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 910 """ 911 Appends an error in the list of recorded errors or raises it, depending on the chosen 912 error level setting. 913 """ 914 token = token or self._curr or self._prev or Token.string("") 915 start = token.start 916 end = token.end + 1 917 start_context = self.sql[max(start - self.error_message_context, 0) : start] 918 highlight = self.sql[start:end] 919 end_context = self.sql[end : end + self.error_message_context] 920 921 error = ParseError.new( 922 f"{message}. Line {token.line}, Col: {token.col}.\n" 923 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 924 description=message, 925 line=token.line, 926 col=token.col, 927 start_context=start_context, 928 highlight=highlight, 929 end_context=end_context, 930 ) 931 932 if self.error_level == ErrorLevel.IMMEDIATE: 933 raise error 934 935 self.errors.append(error) 936 937 def expression( 938 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 939 ) -> E: 940 """ 941 Creates a new, validated Expression. 942 943 Args: 944 exp_class: the expression class to instantiate. 945 comments: an optional list of comments to attach to the expression. 946 kwargs: the arguments to set for the expression along with their respective values. 947 948 Returns: 949 The target expression. 950 """ 951 instance = exp_class(**kwargs) 952 instance.add_comments(comments) if comments else self._add_comments(instance) 953 self.validate_expression(instance) 954 return instance 955 956 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 957 if expression and self._prev_comments: 958 expression.add_comments(self._prev_comments) 959 self._prev_comments = None 960 961 def validate_expression( 962 self, expression: exp.Expression, args: t.Optional[t.List] = None 963 ) -> None: 964 """ 965 Validates an already instantiated expression, making sure that all its mandatory arguments 966 are set. 967 968 Args: 969 expression: the expression to validate. 970 args: an optional list of items that was used to instantiate the expression, if it's a Func. 971 """ 972 if self.error_level == ErrorLevel.IGNORE: 973 return 974 975 for error_message in expression.error_messages(args): 976 self.raise_error(error_message) 977 978 def _find_sql(self, start: Token, end: Token) -> str: 979 return self.sql[start.start : end.end + 1] 980 981 def _advance(self, times: int = 1) -> None: 982 self._index += times 983 self._curr = seq_get(self._tokens, self._index) 984 self._next = seq_get(self._tokens, self._index + 1) 985 if self._index > 0: 986 self._prev = self._tokens[self._index - 1] 987 self._prev_comments = self._prev.comments 988 else: 989 self._prev = None 990 self._prev_comments = None 991 992 def _retreat(self, index: int) -> None: 993 if index != self._index: 994 self._advance(index - self._index) 995 996 def _parse_command(self) -> exp.Command: 997 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 998 999 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1000 start = self._prev 1001 exists = self._parse_exists() if allow_exists else None 1002 1003 self._match(TokenType.ON) 1004 1005 kind = self._match_set(self.CREATABLES) and self._prev 1006 1007 if not kind: 1008 return self._parse_as_command(start) 1009 1010 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1011 this = self._parse_user_defined_function(kind=kind.token_type) 1012 elif kind.token_type == TokenType.TABLE: 1013 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1014 elif kind.token_type == TokenType.COLUMN: 1015 this = self._parse_column() 1016 else: 1017 this = self._parse_id_var() 1018 1019 self._match(TokenType.IS) 1020 1021 return self.expression( 1022 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1023 ) 1024 1025 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1026 def _parse_ttl(self) -> exp.Expression: 1027 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1028 this = self._parse_bitwise() 1029 1030 if self._match_text_seq("DELETE"): 1031 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1032 if self._match_text_seq("RECOMPRESS"): 1033 return self.expression( 1034 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1035 ) 1036 if self._match_text_seq("TO", "DISK"): 1037 return self.expression( 1038 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1039 ) 1040 if self._match_text_seq("TO", "VOLUME"): 1041 return self.expression( 1042 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1043 ) 1044 1045 return this 1046 1047 expressions = self._parse_csv(_parse_ttl_action) 1048 where = self._parse_where() 1049 group = self._parse_group() 1050 1051 aggregates = None 1052 if group and self._match(TokenType.SET): 1053 aggregates = self._parse_csv(self._parse_set_item) 1054 1055 return self.expression( 1056 exp.MergeTreeTTL, 1057 expressions=expressions, 1058 where=where, 1059 group=group, 1060 aggregates=aggregates, 1061 ) 1062 1063 def _parse_statement(self) -> t.Optional[exp.Expression]: 1064 if self._curr is None: 1065 return None 1066 1067 if self._match_set(self.STATEMENT_PARSERS): 1068 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1069 1070 if self._match_set(Tokenizer.COMMANDS): 1071 return self._parse_command() 1072 1073 expression = self._parse_expression() 1074 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1075 return self._parse_query_modifiers(expression) 1076 1077 def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]: 1078 start = self._prev 1079 temporary = self._match(TokenType.TEMPORARY) 1080 materialized = self._match_text_seq("MATERIALIZED") 1081 kind = self._match_set(self.CREATABLES) and self._prev.text 1082 if not kind: 1083 return self._parse_as_command(start) 1084 1085 return self.expression( 1086 exp.Drop, 1087 exists=self._parse_exists(), 1088 this=self._parse_table(schema=True), 1089 kind=kind, 1090 temporary=temporary, 1091 materialized=materialized, 1092 cascade=self._match_text_seq("CASCADE"), 1093 constraints=self._match_text_seq("CONSTRAINTS"), 1094 purge=self._match_text_seq("PURGE"), 1095 ) 1096 1097 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1098 return ( 1099 self._match(TokenType.IF) 1100 and (not not_ or self._match(TokenType.NOT)) 1101 and self._match(TokenType.EXISTS) 1102 ) 1103 1104 def _parse_create(self) -> t.Optional[exp.Expression]: 1105 start = self._prev 1106 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 1107 TokenType.OR, TokenType.REPLACE 1108 ) 1109 unique = self._match(TokenType.UNIQUE) 1110 1111 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1112 self._match(TokenType.TABLE) 1113 1114 properties = None 1115 create_token = self._match_set(self.CREATABLES) and self._prev 1116 1117 if not create_token: 1118 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1119 create_token = self._match_set(self.CREATABLES) and self._prev 1120 1121 if not properties or not create_token: 1122 return self._parse_as_command(start) 1123 1124 exists = self._parse_exists(not_=True) 1125 this = None 1126 expression = None 1127 indexes = None 1128 no_schema_binding = None 1129 begin = None 1130 clone = None 1131 1132 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1133 this = self._parse_user_defined_function(kind=create_token.token_type) 1134 temp_properties = self._parse_properties() 1135 if properties and temp_properties: 1136 properties.expressions.extend(temp_properties.expressions) 1137 elif temp_properties: 1138 properties = temp_properties 1139 1140 self._match(TokenType.ALIAS) 1141 begin = self._match(TokenType.BEGIN) 1142 return_ = self._match_text_seq("RETURN") 1143 expression = self._parse_statement() 1144 1145 if return_: 1146 expression = self.expression(exp.Return, this=expression) 1147 elif create_token.token_type == TokenType.INDEX: 1148 this = self._parse_index(index=self._parse_id_var()) 1149 elif create_token.token_type in self.DB_CREATABLES: 1150 table_parts = self._parse_table_parts(schema=True) 1151 1152 # exp.Properties.Location.POST_NAME 1153 if self._match(TokenType.COMMA): 1154 temp_properties = self._parse_properties(before=True) 1155 if properties and temp_properties: 1156 properties.expressions.extend(temp_properties.expressions) 1157 elif temp_properties: 1158 properties = temp_properties 1159 1160 this = self._parse_schema(this=table_parts) 1161 1162 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1163 temp_properties = self._parse_properties() 1164 if properties and temp_properties: 1165 properties.expressions.extend(temp_properties.expressions) 1166 elif temp_properties: 1167 properties = temp_properties 1168 1169 self._match(TokenType.ALIAS) 1170 1171 # exp.Properties.Location.POST_ALIAS 1172 if not ( 1173 self._match(TokenType.SELECT, advance=False) 1174 or self._match(TokenType.WITH, advance=False) 1175 or self._match(TokenType.L_PAREN, advance=False) 1176 ): 1177 temp_properties = self._parse_properties() 1178 if properties and temp_properties: 1179 properties.expressions.extend(temp_properties.expressions) 1180 elif temp_properties: 1181 properties = temp_properties 1182 1183 expression = self._parse_ddl_select() 1184 1185 if create_token.token_type == TokenType.TABLE: 1186 indexes = [] 1187 while True: 1188 index = self._parse_index() 1189 1190 # exp.Properties.Location.POST_EXPRESSION or exp.Properties.Location.POST_INDEX 1191 temp_properties = self._parse_properties() 1192 if properties and temp_properties: 1193 properties.expressions.extend(temp_properties.expressions) 1194 elif temp_properties: 1195 properties = temp_properties 1196 1197 if not index: 1198 break 1199 else: 1200 self._match(TokenType.COMMA) 1201 indexes.append(index) 1202 elif create_token.token_type == TokenType.VIEW: 1203 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1204 no_schema_binding = True 1205 1206 if self._match_text_seq("CLONE"): 1207 clone = self._parse_table(schema=True) 1208 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1209 clone_kind = ( 1210 self._match(TokenType.L_PAREN) 1211 and self._match_texts(self.CLONE_KINDS) 1212 and self._prev.text.upper() 1213 ) 1214 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1215 self._match(TokenType.R_PAREN) 1216 clone = self.expression( 1217 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1218 ) 1219 1220 return self.expression( 1221 exp.Create, 1222 this=this, 1223 kind=create_token.text, 1224 replace=replace, 1225 unique=unique, 1226 expression=expression, 1227 exists=exists, 1228 properties=properties, 1229 indexes=indexes, 1230 no_schema_binding=no_schema_binding, 1231 begin=begin, 1232 clone=clone, 1233 ) 1234 1235 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1236 # only used for teradata currently 1237 self._match(TokenType.COMMA) 1238 1239 kwargs = { 1240 "no": self._match_text_seq("NO"), 1241 "dual": self._match_text_seq("DUAL"), 1242 "before": self._match_text_seq("BEFORE"), 1243 "default": self._match_text_seq("DEFAULT"), 1244 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1245 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1246 "after": self._match_text_seq("AFTER"), 1247 "minimum": self._match_texts(("MIN", "MINIMUM")), 1248 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1249 } 1250 1251 if self._match_texts(self.PROPERTY_PARSERS): 1252 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1253 try: 1254 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1255 except TypeError: 1256 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1257 1258 return None 1259 1260 def _parse_property(self) -> t.Optional[exp.Expression]: 1261 if self._match_texts(self.PROPERTY_PARSERS): 1262 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1263 1264 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1265 return self._parse_character_set(default=True) 1266 1267 if self._match_text_seq("COMPOUND", "SORTKEY"): 1268 return self._parse_sortkey(compound=True) 1269 1270 if self._match_text_seq("SQL", "SECURITY"): 1271 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1272 1273 assignment = self._match_pair( 1274 TokenType.VAR, TokenType.EQ, advance=False 1275 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1276 1277 if assignment: 1278 key = self._parse_var_or_string() 1279 self._match(TokenType.EQ) 1280 return self.expression(exp.Property, this=key, value=self._parse_column()) 1281 1282 return None 1283 1284 def _parse_stored(self) -> exp.Expression: 1285 self._match(TokenType.ALIAS) 1286 1287 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1288 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1289 1290 return self.expression( 1291 exp.FileFormatProperty, 1292 this=self.expression( 1293 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1294 ) 1295 if input_format or output_format 1296 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1297 ) 1298 1299 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1300 self._match(TokenType.EQ) 1301 self._match(TokenType.ALIAS) 1302 return self.expression(exp_class, this=self._parse_field()) 1303 1304 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Expression]: 1305 properties = [] 1306 1307 while True: 1308 if before: 1309 prop = self._parse_property_before() 1310 else: 1311 prop = self._parse_property() 1312 1313 if not prop: 1314 break 1315 for p in ensure_list(prop): 1316 properties.append(p) 1317 1318 if properties: 1319 return self.expression(exp.Properties, expressions=properties) 1320 1321 return None 1322 1323 def _parse_fallback(self, no: bool = False) -> exp.Expression: 1324 return self.expression( 1325 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1326 ) 1327 1328 def _parse_volatile_property(self) -> exp.Expression: 1329 if self._index >= 2: 1330 pre_volatile_token = self._tokens[self._index - 2] 1331 else: 1332 pre_volatile_token = None 1333 1334 if pre_volatile_token and pre_volatile_token.token_type in ( 1335 TokenType.CREATE, 1336 TokenType.REPLACE, 1337 TokenType.UNIQUE, 1338 ): 1339 return exp.VolatileProperty() 1340 1341 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1342 1343 def _parse_with_property( 1344 self, 1345 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1346 self._match(TokenType.WITH) 1347 if self._match(TokenType.L_PAREN, advance=False): 1348 return self._parse_wrapped_csv(self._parse_property) 1349 1350 if self._match_text_seq("JOURNAL"): 1351 return self._parse_withjournaltable() 1352 1353 if self._match_text_seq("DATA"): 1354 return self._parse_withdata(no=False) 1355 elif self._match_text_seq("NO", "DATA"): 1356 return self._parse_withdata(no=True) 1357 1358 if not self._next: 1359 return None 1360 1361 return self._parse_withisolatedloading() 1362 1363 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1364 def _parse_definer(self) -> t.Optional[exp.Expression]: 1365 self._match(TokenType.EQ) 1366 1367 user = self._parse_id_var() 1368 self._match(TokenType.PARAMETER) 1369 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1370 1371 if not user or not host: 1372 return None 1373 1374 return exp.DefinerProperty(this=f"{user}@{host}") 1375 1376 def _parse_withjournaltable(self) -> exp.Expression: 1377 self._match(TokenType.TABLE) 1378 self._match(TokenType.EQ) 1379 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1380 1381 def _parse_log(self, no: bool = False) -> exp.Expression: 1382 return self.expression(exp.LogProperty, no=no) 1383 1384 def _parse_journal(self, **kwargs) -> exp.Expression: 1385 return self.expression(exp.JournalProperty, **kwargs) 1386 1387 def _parse_checksum(self) -> exp.Expression: 1388 self._match(TokenType.EQ) 1389 1390 on = None 1391 if self._match(TokenType.ON): 1392 on = True 1393 elif self._match_text_seq("OFF"): 1394 on = False 1395 default = self._match(TokenType.DEFAULT) 1396 1397 return self.expression( 1398 exp.ChecksumProperty, 1399 on=on, 1400 default=default, 1401 ) 1402 1403 def _parse_cluster(self) -> t.Optional[exp.Expression]: 1404 if not self._match_text_seq("BY"): 1405 self._retreat(self._index - 1) 1406 return None 1407 return self.expression( 1408 exp.Cluster, 1409 expressions=self._parse_csv(self._parse_ordered), 1410 ) 1411 1412 def _parse_freespace(self) -> exp.Expression: 1413 self._match(TokenType.EQ) 1414 return self.expression( 1415 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1416 ) 1417 1418 def _parse_mergeblockratio(self, no: bool = False, default: bool = False) -> exp.Expression: 1419 if self._match(TokenType.EQ): 1420 return self.expression( 1421 exp.MergeBlockRatioProperty, 1422 this=self._parse_number(), 1423 percent=self._match(TokenType.PERCENT), 1424 ) 1425 return self.expression( 1426 exp.MergeBlockRatioProperty, 1427 no=no, 1428 default=default, 1429 ) 1430 1431 def _parse_datablocksize( 1432 self, 1433 default: t.Optional[bool] = None, 1434 minimum: t.Optional[bool] = None, 1435 maximum: t.Optional[bool] = None, 1436 ) -> exp.Expression: 1437 self._match(TokenType.EQ) 1438 size = self._parse_number() 1439 units = None 1440 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1441 units = self._prev.text 1442 return self.expression( 1443 exp.DataBlocksizeProperty, 1444 size=size, 1445 units=units, 1446 default=default, 1447 minimum=minimum, 1448 maximum=maximum, 1449 ) 1450 1451 def _parse_blockcompression(self) -> exp.Expression: 1452 self._match(TokenType.EQ) 1453 always = self._match_text_seq("ALWAYS") 1454 manual = self._match_text_seq("MANUAL") 1455 never = self._match_text_seq("NEVER") 1456 default = self._match_text_seq("DEFAULT") 1457 autotemp = None 1458 if self._match_text_seq("AUTOTEMP"): 1459 autotemp = self._parse_schema() 1460 1461 return self.expression( 1462 exp.BlockCompressionProperty, 1463 always=always, 1464 manual=manual, 1465 never=never, 1466 default=default, 1467 autotemp=autotemp, 1468 ) 1469 1470 def _parse_withisolatedloading(self) -> exp.Expression: 1471 no = self._match_text_seq("NO") 1472 concurrent = self._match_text_seq("CONCURRENT") 1473 self._match_text_seq("ISOLATED", "LOADING") 1474 for_all = self._match_text_seq("FOR", "ALL") 1475 for_insert = self._match_text_seq("FOR", "INSERT") 1476 for_none = self._match_text_seq("FOR", "NONE") 1477 return self.expression( 1478 exp.IsolatedLoadingProperty, 1479 no=no, 1480 concurrent=concurrent, 1481 for_all=for_all, 1482 for_insert=for_insert, 1483 for_none=for_none, 1484 ) 1485 1486 def _parse_locking(self) -> exp.Expression: 1487 if self._match(TokenType.TABLE): 1488 kind = "TABLE" 1489 elif self._match(TokenType.VIEW): 1490 kind = "VIEW" 1491 elif self._match(TokenType.ROW): 1492 kind = "ROW" 1493 elif self._match_text_seq("DATABASE"): 1494 kind = "DATABASE" 1495 else: 1496 kind = None 1497 1498 if kind in ("DATABASE", "TABLE", "VIEW"): 1499 this = self._parse_table_parts() 1500 else: 1501 this = None 1502 1503 if self._match(TokenType.FOR): 1504 for_or_in = "FOR" 1505 elif self._match(TokenType.IN): 1506 for_or_in = "IN" 1507 else: 1508 for_or_in = None 1509 1510 if self._match_text_seq("ACCESS"): 1511 lock_type = "ACCESS" 1512 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1513 lock_type = "EXCLUSIVE" 1514 elif self._match_text_seq("SHARE"): 1515 lock_type = "SHARE" 1516 elif self._match_text_seq("READ"): 1517 lock_type = "READ" 1518 elif self._match_text_seq("WRITE"): 1519 lock_type = "WRITE" 1520 elif self._match_text_seq("CHECKSUM"): 1521 lock_type = "CHECKSUM" 1522 else: 1523 lock_type = None 1524 1525 override = self._match_text_seq("OVERRIDE") 1526 1527 return self.expression( 1528 exp.LockingProperty, 1529 this=this, 1530 kind=kind, 1531 for_or_in=for_or_in, 1532 lock_type=lock_type, 1533 override=override, 1534 ) 1535 1536 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1537 if self._match(TokenType.PARTITION_BY): 1538 return self._parse_csv(self._parse_conjunction) 1539 return [] 1540 1541 def _parse_partitioned_by(self) -> exp.Expression: 1542 self._match(TokenType.EQ) 1543 return self.expression( 1544 exp.PartitionedByProperty, 1545 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1546 ) 1547 1548 def _parse_withdata(self, no: bool = False) -> exp.Expression: 1549 if self._match_text_seq("AND", "STATISTICS"): 1550 statistics = True 1551 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1552 statistics = False 1553 else: 1554 statistics = None 1555 1556 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1557 1558 def _parse_no_property(self) -> t.Optional[exp.Property]: 1559 if self._match_text_seq("PRIMARY", "INDEX"): 1560 return exp.NoPrimaryIndexProperty() 1561 return None 1562 1563 def _parse_on_property(self) -> t.Optional[exp.Property]: 1564 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1565 return exp.OnCommitProperty() 1566 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1567 return exp.OnCommitProperty(delete=True) 1568 return None 1569 1570 def _parse_distkey(self) -> exp.Expression: 1571 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1572 1573 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1574 table = self._parse_table(schema=True) 1575 options = [] 1576 while self._match_texts(("INCLUDING", "EXCLUDING")): 1577 this = self._prev.text.upper() 1578 id_var = self._parse_id_var() 1579 1580 if not id_var: 1581 return None 1582 1583 options.append( 1584 self.expression( 1585 exp.Property, 1586 this=this, 1587 value=exp.Var(this=id_var.this.upper()), 1588 ) 1589 ) 1590 return self.expression(exp.LikeProperty, this=table, expressions=options) 1591 1592 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1593 return self.expression( 1594 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1595 ) 1596 1597 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1598 self._match(TokenType.EQ) 1599 return self.expression( 1600 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1601 ) 1602 1603 def _parse_returns(self) -> exp.Expression: 1604 value: t.Optional[exp.Expression] 1605 is_table = self._match(TokenType.TABLE) 1606 1607 if is_table: 1608 if self._match(TokenType.LT): 1609 value = self.expression( 1610 exp.Schema, 1611 this="TABLE", 1612 expressions=self._parse_csv(self._parse_struct_types), 1613 ) 1614 if not self._match(TokenType.GT): 1615 self.raise_error("Expecting >") 1616 else: 1617 value = self._parse_schema(exp.Var(this="TABLE")) 1618 else: 1619 value = self._parse_types() 1620 1621 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1622 1623 def _parse_describe(self) -> exp.Expression: 1624 kind = self._match_set(self.CREATABLES) and self._prev.text 1625 this = self._parse_table() 1626 1627 return self.expression(exp.Describe, this=this, kind=kind) 1628 1629 def _parse_insert(self) -> exp.Expression: 1630 overwrite = self._match(TokenType.OVERWRITE) 1631 local = self._match_text_seq("LOCAL") 1632 alternative = None 1633 1634 if self._match_text_seq("DIRECTORY"): 1635 this: t.Optional[exp.Expression] = self.expression( 1636 exp.Directory, 1637 this=self._parse_var_or_string(), 1638 local=local, 1639 row_format=self._parse_row_format(match_row=True), 1640 ) 1641 else: 1642 if self._match(TokenType.OR): 1643 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1644 1645 self._match(TokenType.INTO) 1646 self._match(TokenType.TABLE) 1647 this = self._parse_table(schema=True) 1648 1649 return self.expression( 1650 exp.Insert, 1651 this=this, 1652 exists=self._parse_exists(), 1653 partition=self._parse_partition(), 1654 expression=self._parse_ddl_select(), 1655 conflict=self._parse_on_conflict(), 1656 returning=self._parse_returning(), 1657 overwrite=overwrite, 1658 alternative=alternative, 1659 ) 1660 1661 def _parse_on_conflict(self) -> t.Optional[exp.Expression]: 1662 conflict = self._match_text_seq("ON", "CONFLICT") 1663 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1664 1665 if not (conflict or duplicate): 1666 return None 1667 1668 nothing = None 1669 expressions = None 1670 key = None 1671 constraint = None 1672 1673 if conflict: 1674 if self._match_text_seq("ON", "CONSTRAINT"): 1675 constraint = self._parse_id_var() 1676 else: 1677 key = self._parse_csv(self._parse_value) 1678 1679 self._match_text_seq("DO") 1680 if self._match_text_seq("NOTHING"): 1681 nothing = True 1682 else: 1683 self._match(TokenType.UPDATE) 1684 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1685 1686 return self.expression( 1687 exp.OnConflict, 1688 duplicate=duplicate, 1689 expressions=expressions, 1690 nothing=nothing, 1691 key=key, 1692 constraint=constraint, 1693 ) 1694 1695 def _parse_returning(self) -> t.Optional[exp.Expression]: 1696 if not self._match(TokenType.RETURNING): 1697 return None 1698 1699 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1700 1701 def _parse_row(self) -> t.Optional[exp.Expression]: 1702 if not self._match(TokenType.FORMAT): 1703 return None 1704 return self._parse_row_format() 1705 1706 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1707 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1708 return None 1709 1710 if self._match_text_seq("SERDE"): 1711 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1712 1713 self._match_text_seq("DELIMITED") 1714 1715 kwargs = {} 1716 1717 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1718 kwargs["fields"] = self._parse_string() 1719 if self._match_text_seq("ESCAPED", "BY"): 1720 kwargs["escaped"] = self._parse_string() 1721 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1722 kwargs["collection_items"] = self._parse_string() 1723 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1724 kwargs["map_keys"] = self._parse_string() 1725 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1726 kwargs["lines"] = self._parse_string() 1727 if self._match_text_seq("NULL", "DEFINED", "AS"): 1728 kwargs["null"] = self._parse_string() 1729 1730 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1731 1732 def _parse_load(self) -> exp.Expression: 1733 if self._match_text_seq("DATA"): 1734 local = self._match_text_seq("LOCAL") 1735 self._match_text_seq("INPATH") 1736 inpath = self._parse_string() 1737 overwrite = self._match(TokenType.OVERWRITE) 1738 self._match_pair(TokenType.INTO, TokenType.TABLE) 1739 1740 return self.expression( 1741 exp.LoadData, 1742 this=self._parse_table(schema=True), 1743 local=local, 1744 overwrite=overwrite, 1745 inpath=inpath, 1746 partition=self._parse_partition(), 1747 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1748 serde=self._match_text_seq("SERDE") and self._parse_string(), 1749 ) 1750 return self._parse_as_command(self._prev) 1751 1752 def _parse_delete(self) -> exp.Expression: 1753 self._match(TokenType.FROM) 1754 1755 return self.expression( 1756 exp.Delete, 1757 this=self._parse_table(), 1758 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1759 where=self._parse_where(), 1760 returning=self._parse_returning(), 1761 ) 1762 1763 def _parse_update(self) -> exp.Expression: 1764 return self.expression( 1765 exp.Update, 1766 **{ # type: ignore 1767 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1768 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1769 "from": self._parse_from(modifiers=True), 1770 "where": self._parse_where(), 1771 "returning": self._parse_returning(), 1772 }, 1773 ) 1774 1775 def _parse_uncache(self) -> exp.Expression: 1776 if not self._match(TokenType.TABLE): 1777 self.raise_error("Expecting TABLE after UNCACHE") 1778 1779 return self.expression( 1780 exp.Uncache, 1781 exists=self._parse_exists(), 1782 this=self._parse_table(schema=True), 1783 ) 1784 1785 def _parse_cache(self) -> exp.Expression: 1786 lazy = self._match_text_seq("LAZY") 1787 self._match(TokenType.TABLE) 1788 table = self._parse_table(schema=True) 1789 options = [] 1790 1791 if self._match_text_seq("OPTIONS"): 1792 self._match_l_paren() 1793 k = self._parse_string() 1794 self._match(TokenType.EQ) 1795 v = self._parse_string() 1796 options = [k, v] 1797 self._match_r_paren() 1798 1799 self._match(TokenType.ALIAS) 1800 return self.expression( 1801 exp.Cache, 1802 this=table, 1803 lazy=lazy, 1804 options=options, 1805 expression=self._parse_select(nested=True), 1806 ) 1807 1808 def _parse_partition(self) -> t.Optional[exp.Expression]: 1809 if not self._match(TokenType.PARTITION): 1810 return None 1811 1812 return self.expression( 1813 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1814 ) 1815 1816 def _parse_value(self) -> exp.Expression: 1817 if self._match(TokenType.L_PAREN): 1818 expressions = self._parse_csv(self._parse_conjunction) 1819 self._match_r_paren() 1820 return self.expression(exp.Tuple, expressions=expressions) 1821 1822 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1823 # Source: https://prestodb.io/docs/current/sql/values.html 1824 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1825 1826 def _parse_select( 1827 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1828 ) -> t.Optional[exp.Expression]: 1829 cte = self._parse_with() 1830 if cte: 1831 this = self._parse_statement() 1832 1833 if not this: 1834 self.raise_error("Failed to parse any statement following CTE") 1835 return cte 1836 1837 if "with" in this.arg_types: 1838 this.set("with", cte) 1839 else: 1840 self.raise_error(f"{this.key} does not support CTE") 1841 this = cte 1842 elif self._match(TokenType.SELECT): 1843 comments = self._prev_comments 1844 1845 hint = self._parse_hint() 1846 all_ = self._match(TokenType.ALL) 1847 distinct = self._match(TokenType.DISTINCT) 1848 1849 kind = ( 1850 self._match(TokenType.ALIAS) 1851 and self._match_texts(("STRUCT", "VALUE")) 1852 and self._prev.text 1853 ) 1854 1855 if distinct: 1856 distinct = self.expression( 1857 exp.Distinct, 1858 on=self._parse_value() if self._match(TokenType.ON) else None, 1859 ) 1860 1861 if all_ and distinct: 1862 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1863 1864 limit = self._parse_limit(top=True) 1865 expressions = self._parse_csv(self._parse_expression) 1866 1867 this = self.expression( 1868 exp.Select, 1869 kind=kind, 1870 hint=hint, 1871 distinct=distinct, 1872 expressions=expressions, 1873 limit=limit, 1874 ) 1875 this.comments = comments 1876 1877 into = self._parse_into() 1878 if into: 1879 this.set("into", into) 1880 1881 from_ = self._parse_from() 1882 if from_: 1883 this.set("from", from_) 1884 1885 this = self._parse_query_modifiers(this) 1886 elif (table or nested) and self._match(TokenType.L_PAREN): 1887 this = self._parse_table() if table else self._parse_select(nested=True) 1888 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1889 self._match_r_paren() 1890 1891 # early return so that subquery unions aren't parsed again 1892 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1893 # Union ALL should be a property of the top select node, not the subquery 1894 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1895 elif self._match(TokenType.VALUES): 1896 this = self.expression( 1897 exp.Values, 1898 expressions=self._parse_csv(self._parse_value), 1899 alias=self._parse_table_alias(), 1900 ) 1901 else: 1902 this = None 1903 1904 return self._parse_set_operations(this) 1905 1906 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1907 if not skip_with_token and not self._match(TokenType.WITH): 1908 return None 1909 1910 comments = self._prev_comments 1911 recursive = self._match(TokenType.RECURSIVE) 1912 1913 expressions = [] 1914 while True: 1915 expressions.append(self._parse_cte()) 1916 1917 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1918 break 1919 else: 1920 self._match(TokenType.WITH) 1921 1922 return self.expression( 1923 exp.With, comments=comments, expressions=expressions, recursive=recursive 1924 ) 1925 1926 def _parse_cte(self) -> exp.Expression: 1927 alias = self._parse_table_alias() 1928 if not alias or not alias.this: 1929 self.raise_error("Expected CTE to have alias") 1930 1931 self._match(TokenType.ALIAS) 1932 1933 return self.expression( 1934 exp.CTE, 1935 this=self._parse_wrapped(self._parse_statement), 1936 alias=alias, 1937 ) 1938 1939 def _parse_table_alias( 1940 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1941 ) -> t.Optional[exp.Expression]: 1942 any_token = self._match(TokenType.ALIAS) 1943 alias = ( 1944 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1945 or self._parse_string_as_identifier() 1946 ) 1947 1948 index = self._index 1949 if self._match(TokenType.L_PAREN): 1950 columns = self._parse_csv(self._parse_function_parameter) 1951 self._match_r_paren() if columns else self._retreat(index) 1952 else: 1953 columns = None 1954 1955 if not alias and not columns: 1956 return None 1957 1958 return self.expression(exp.TableAlias, this=alias, columns=columns) 1959 1960 def _parse_subquery( 1961 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1962 ) -> exp.Expression: 1963 return self.expression( 1964 exp.Subquery, 1965 this=this, 1966 pivots=self._parse_pivots(), 1967 alias=self._parse_table_alias() if parse_alias else None, 1968 ) 1969 1970 def _parse_query_modifiers( 1971 self, this: t.Optional[exp.Expression] 1972 ) -> t.Optional[exp.Expression]: 1973 if isinstance(this, self.MODIFIABLES): 1974 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1975 expression = parser(self) 1976 1977 if expression: 1978 this.set(key, expression) 1979 return this 1980 1981 def _parse_hint(self) -> t.Optional[exp.Expression]: 1982 if self._match(TokenType.HINT): 1983 hints = self._parse_csv(self._parse_function) 1984 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 1985 self.raise_error("Expected */ after HINT") 1986 return self.expression(exp.Hint, expressions=hints) 1987 1988 return None 1989 1990 def _parse_into(self) -> t.Optional[exp.Expression]: 1991 if not self._match(TokenType.INTO): 1992 return None 1993 1994 temp = self._match(TokenType.TEMPORARY) 1995 unlogged = self._match_text_seq("UNLOGGED") 1996 self._match(TokenType.TABLE) 1997 1998 return self.expression( 1999 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2000 ) 2001 2002 def _parse_from(self, modifiers: bool = False) -> t.Optional[exp.Expression]: 2003 if not self._match(TokenType.FROM): 2004 return None 2005 2006 comments = self._prev_comments 2007 this = self._parse_table() 2008 2009 return self.expression( 2010 exp.From, 2011 comments=comments, 2012 this=self._parse_query_modifiers(this) if modifiers else this, 2013 ) 2014 2015 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 2016 if not self._match(TokenType.MATCH_RECOGNIZE): 2017 return None 2018 2019 self._match_l_paren() 2020 2021 partition = self._parse_partition_by() 2022 order = self._parse_order() 2023 measures = ( 2024 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2025 ) 2026 2027 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2028 rows = exp.Var(this="ONE ROW PER MATCH") 2029 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2030 text = "ALL ROWS PER MATCH" 2031 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2032 text += f" SHOW EMPTY MATCHES" 2033 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2034 text += f" OMIT EMPTY MATCHES" 2035 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2036 text += f" WITH UNMATCHED ROWS" 2037 rows = exp.Var(this=text) 2038 else: 2039 rows = None 2040 2041 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2042 text = "AFTER MATCH SKIP" 2043 if self._match_text_seq("PAST", "LAST", "ROW"): 2044 text += f" PAST LAST ROW" 2045 elif self._match_text_seq("TO", "NEXT", "ROW"): 2046 text += f" TO NEXT ROW" 2047 elif self._match_text_seq("TO", "FIRST"): 2048 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2049 elif self._match_text_seq("TO", "LAST"): 2050 text += f" TO LAST {self._advance_any().text}" # type: ignore 2051 after = exp.Var(this=text) 2052 else: 2053 after = None 2054 2055 if self._match_text_seq("PATTERN"): 2056 self._match_l_paren() 2057 2058 if not self._curr: 2059 self.raise_error("Expecting )", self._curr) 2060 2061 paren = 1 2062 start = self._curr 2063 2064 while self._curr and paren > 0: 2065 if self._curr.token_type == TokenType.L_PAREN: 2066 paren += 1 2067 if self._curr.token_type == TokenType.R_PAREN: 2068 paren -= 1 2069 end = self._prev 2070 self._advance() 2071 if paren > 0: 2072 self.raise_error("Expecting )", self._curr) 2073 pattern = exp.Var(this=self._find_sql(start, end)) 2074 else: 2075 pattern = None 2076 2077 define = ( 2078 self._parse_csv( 2079 lambda: self.expression( 2080 exp.Alias, 2081 alias=self._parse_id_var(any_token=True), 2082 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2083 ) 2084 ) 2085 if self._match_text_seq("DEFINE") 2086 else None 2087 ) 2088 2089 self._match_r_paren() 2090 2091 return self.expression( 2092 exp.MatchRecognize, 2093 partition_by=partition, 2094 order=order, 2095 measures=measures, 2096 rows=rows, 2097 after=after, 2098 pattern=pattern, 2099 define=define, 2100 alias=self._parse_table_alias(), 2101 ) 2102 2103 def _parse_lateral(self) -> t.Optional[exp.Expression]: 2104 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2105 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2106 2107 if outer_apply or cross_apply: 2108 this = self._parse_select(table=True) 2109 view = None 2110 outer = not cross_apply 2111 elif self._match(TokenType.LATERAL): 2112 this = self._parse_select(table=True) 2113 view = self._match(TokenType.VIEW) 2114 outer = self._match(TokenType.OUTER) 2115 else: 2116 return None 2117 2118 if not this: 2119 this = self._parse_function() or self._parse_id_var(any_token=False) 2120 while self._match(TokenType.DOT): 2121 this = exp.Dot( 2122 this=this, 2123 expression=self._parse_function() or self._parse_id_var(any_token=False), 2124 ) 2125 2126 table_alias: t.Optional[exp.Expression] 2127 2128 if view: 2129 table = self._parse_id_var(any_token=False) 2130 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2131 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 2132 else: 2133 table_alias = self._parse_table_alias() 2134 2135 expression = self.expression( 2136 exp.Lateral, 2137 this=this, 2138 view=view, 2139 outer=outer, 2140 alias=table_alias, 2141 ) 2142 2143 return expression 2144 2145 def _parse_join_side_and_kind( 2146 self, 2147 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2148 return ( 2149 self._match(TokenType.NATURAL) and self._prev, 2150 self._match_set(self.JOIN_SIDES) and self._prev, 2151 self._match_set(self.JOIN_KINDS) and self._prev, 2152 ) 2153 2154 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 2155 if self._match(TokenType.COMMA): 2156 return self.expression(exp.Join, this=self._parse_table()) 2157 2158 index = self._index 2159 natural, side, kind = self._parse_join_side_and_kind() 2160 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2161 join = self._match(TokenType.JOIN) 2162 2163 if not skip_join_token and not join: 2164 self._retreat(index) 2165 kind = None 2166 natural = None 2167 side = None 2168 2169 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2170 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2171 2172 if not skip_join_token and not join and not outer_apply and not cross_apply: 2173 return None 2174 2175 if outer_apply: 2176 side = Token(TokenType.LEFT, "LEFT") 2177 2178 kwargs: t.Dict[ 2179 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 2180 ] = {"this": self._parse_table()} 2181 2182 if natural: 2183 kwargs["natural"] = True 2184 if side: 2185 kwargs["side"] = side.text 2186 if kind: 2187 kwargs["kind"] = kind.text 2188 if hint: 2189 kwargs["hint"] = hint 2190 2191 if self._match(TokenType.ON): 2192 kwargs["on"] = self._parse_conjunction() 2193 elif self._match(TokenType.USING): 2194 kwargs["using"] = self._parse_wrapped_id_vars() 2195 2196 return self.expression(exp.Join, **kwargs) # type: ignore 2197 2198 def _parse_index( 2199 self, 2200 index: t.Optional[exp.Expression] = None, 2201 ) -> t.Optional[exp.Expression]: 2202 if index: 2203 unique = None 2204 primary = None 2205 amp = None 2206 2207 self._match(TokenType.ON) 2208 self._match(TokenType.TABLE) # hive 2209 table = self._parse_table_parts(schema=True) 2210 else: 2211 unique = self._match(TokenType.UNIQUE) 2212 primary = self._match_text_seq("PRIMARY") 2213 amp = self._match_text_seq("AMP") 2214 if not self._match(TokenType.INDEX): 2215 return None 2216 index = self._parse_id_var() 2217 table = None 2218 2219 if self._match(TokenType.L_PAREN, advance=False): 2220 columns = self._parse_wrapped_csv(self._parse_ordered) 2221 else: 2222 columns = None 2223 2224 return self.expression( 2225 exp.Index, 2226 this=index, 2227 table=table, 2228 columns=columns, 2229 unique=unique, 2230 primary=primary, 2231 amp=amp, 2232 ) 2233 2234 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2235 return ( 2236 (not schema and self._parse_function()) 2237 or self._parse_id_var(any_token=False) 2238 or self._parse_string_as_identifier() 2239 or self._parse_placeholder() 2240 ) 2241 2242 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2243 catalog = None 2244 db = None 2245 table = self._parse_table_part(schema=schema) 2246 2247 while self._match(TokenType.DOT): 2248 if catalog: 2249 # This allows nesting the table in arbitrarily many dot expressions if needed 2250 table = self.expression( 2251 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2252 ) 2253 else: 2254 catalog = db 2255 db = table 2256 table = self._parse_table_part(schema=schema) 2257 2258 if not table: 2259 self.raise_error(f"Expected table name but got {self._curr}") 2260 2261 return self.expression( 2262 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2263 ) 2264 2265 def _parse_table( 2266 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2267 ) -> t.Optional[exp.Expression]: 2268 lateral = self._parse_lateral() 2269 if lateral: 2270 return lateral 2271 2272 unnest = self._parse_unnest() 2273 if unnest: 2274 return unnest 2275 2276 values = self._parse_derived_table_values() 2277 if values: 2278 return values 2279 2280 subquery = self._parse_select(table=True) 2281 if subquery: 2282 if not subquery.args.get("pivots"): 2283 subquery.set("pivots", self._parse_pivots()) 2284 return subquery 2285 2286 this: exp.Expression = self._parse_table_parts(schema=schema) 2287 2288 if schema: 2289 return self._parse_schema(this=this) 2290 2291 if self.alias_post_tablesample: 2292 table_sample = self._parse_table_sample() 2293 2294 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2295 if alias: 2296 this.set("alias", alias) 2297 2298 if not this.args.get("pivots"): 2299 this.set("pivots", self._parse_pivots()) 2300 2301 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2302 this.set( 2303 "hints", 2304 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2305 ) 2306 self._match_r_paren() 2307 2308 if not self.alias_post_tablesample: 2309 table_sample = self._parse_table_sample() 2310 2311 if table_sample: 2312 table_sample.set("this", this) 2313 this = table_sample 2314 2315 return this 2316 2317 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2318 if not self._match(TokenType.UNNEST): 2319 return None 2320 2321 expressions = self._parse_wrapped_csv(self._parse_type) 2322 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2323 alias = self._parse_table_alias() 2324 2325 if alias and self.unnest_column_only: 2326 if alias.args.get("columns"): 2327 self.raise_error("Unexpected extra column alias in unnest.") 2328 alias.set("columns", [alias.this]) 2329 alias.set("this", None) 2330 2331 offset = None 2332 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2333 self._match(TokenType.ALIAS) 2334 offset = self._parse_id_var() or exp.Identifier(this="offset") 2335 2336 return self.expression( 2337 exp.Unnest, 2338 expressions=expressions, 2339 ordinality=ordinality, 2340 alias=alias, 2341 offset=offset, 2342 ) 2343 2344 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2345 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2346 if not is_derived and not self._match(TokenType.VALUES): 2347 return None 2348 2349 expressions = self._parse_csv(self._parse_value) 2350 2351 if is_derived: 2352 self._match_r_paren() 2353 2354 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2355 2356 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]: 2357 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2358 as_modifier and self._match_text_seq("USING", "SAMPLE") 2359 ): 2360 return None 2361 2362 bucket_numerator = None 2363 bucket_denominator = None 2364 bucket_field = None 2365 percent = None 2366 rows = None 2367 size = None 2368 seed = None 2369 2370 kind = ( 2371 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2372 ) 2373 method = self._parse_var(tokens=(TokenType.ROW,)) 2374 2375 self._match(TokenType.L_PAREN) 2376 2377 num = self._parse_number() 2378 2379 if self._match_text_seq("BUCKET"): 2380 bucket_numerator = self._parse_number() 2381 self._match_text_seq("OUT", "OF") 2382 bucket_denominator = bucket_denominator = self._parse_number() 2383 self._match(TokenType.ON) 2384 bucket_field = self._parse_field() 2385 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2386 percent = num 2387 elif self._match(TokenType.ROWS): 2388 rows = num 2389 else: 2390 size = num 2391 2392 self._match(TokenType.R_PAREN) 2393 2394 if self._match(TokenType.L_PAREN): 2395 method = self._parse_var() 2396 seed = self._match(TokenType.COMMA) and self._parse_number() 2397 self._match_r_paren() 2398 elif self._match_texts(("SEED", "REPEATABLE")): 2399 seed = self._parse_wrapped(self._parse_number) 2400 2401 return self.expression( 2402 exp.TableSample, 2403 method=method, 2404 bucket_numerator=bucket_numerator, 2405 bucket_denominator=bucket_denominator, 2406 bucket_field=bucket_field, 2407 percent=percent, 2408 rows=rows, 2409 size=size, 2410 seed=seed, 2411 kind=kind, 2412 ) 2413 2414 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2415 return list(iter(self._parse_pivot, None)) 2416 2417 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2418 index = self._index 2419 2420 if self._match(TokenType.PIVOT): 2421 unpivot = False 2422 elif self._match(TokenType.UNPIVOT): 2423 unpivot = True 2424 else: 2425 return None 2426 2427 expressions = [] 2428 field = None 2429 2430 if not self._match(TokenType.L_PAREN): 2431 self._retreat(index) 2432 return None 2433 2434 if unpivot: 2435 expressions = self._parse_csv(self._parse_column) 2436 else: 2437 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2438 2439 if not expressions: 2440 self.raise_error("Failed to parse PIVOT's aggregation list") 2441 2442 if not self._match(TokenType.FOR): 2443 self.raise_error("Expecting FOR") 2444 2445 value = self._parse_column() 2446 2447 if not self._match(TokenType.IN): 2448 self.raise_error("Expecting IN") 2449 2450 field = self._parse_in(value, alias=True) 2451 2452 self._match_r_paren() 2453 2454 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2455 2456 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2457 pivot.set("alias", self._parse_table_alias()) 2458 2459 if not unpivot: 2460 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2461 2462 columns: t.List[exp.Expression] = [] 2463 for fld in pivot.args["field"].expressions: 2464 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2465 for name in names: 2466 if self.PREFIXED_PIVOT_COLUMNS: 2467 name = f"{name}_{field_name}" if name else field_name 2468 else: 2469 name = f"{field_name}_{name}" if name else field_name 2470 2471 columns.append(exp.to_identifier(name)) 2472 2473 pivot.set("columns", columns) 2474 2475 return pivot 2476 2477 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2478 return [agg.alias for agg in aggregations] 2479 2480 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2481 if not skip_where_token and not self._match(TokenType.WHERE): 2482 return None 2483 2484 return self.expression( 2485 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2486 ) 2487 2488 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2489 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2490 return None 2491 2492 elements = defaultdict(list) 2493 2494 while True: 2495 expressions = self._parse_csv(self._parse_conjunction) 2496 if expressions: 2497 elements["expressions"].extend(expressions) 2498 2499 grouping_sets = self._parse_grouping_sets() 2500 if grouping_sets: 2501 elements["grouping_sets"].extend(grouping_sets) 2502 2503 rollup = None 2504 cube = None 2505 totals = None 2506 2507 with_ = self._match(TokenType.WITH) 2508 if self._match(TokenType.ROLLUP): 2509 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2510 elements["rollup"].extend(ensure_list(rollup)) 2511 2512 if self._match(TokenType.CUBE): 2513 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2514 elements["cube"].extend(ensure_list(cube)) 2515 2516 if self._match_text_seq("TOTALS"): 2517 totals = True 2518 elements["totals"] = True # type: ignore 2519 2520 if not (grouping_sets or rollup or cube or totals): 2521 break 2522 2523 return self.expression(exp.Group, **elements) # type: ignore 2524 2525 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2526 if not self._match(TokenType.GROUPING_SETS): 2527 return None 2528 2529 return self._parse_wrapped_csv(self._parse_grouping_set) 2530 2531 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2532 if self._match(TokenType.L_PAREN): 2533 grouping_set = self._parse_csv(self._parse_column) 2534 self._match_r_paren() 2535 return self.expression(exp.Tuple, expressions=grouping_set) 2536 2537 return self._parse_column() 2538 2539 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2540 if not skip_having_token and not self._match(TokenType.HAVING): 2541 return None 2542 return self.expression(exp.Having, this=self._parse_conjunction()) 2543 2544 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2545 if not self._match(TokenType.QUALIFY): 2546 return None 2547 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2548 2549 def _parse_order( 2550 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2551 ) -> t.Optional[exp.Expression]: 2552 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2553 return this 2554 2555 return self.expression( 2556 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2557 ) 2558 2559 def _parse_sort( 2560 self, exp_class: t.Type[exp.Expression], *texts: str 2561 ) -> t.Optional[exp.Expression]: 2562 if not self._match_text_seq(*texts): 2563 return None 2564 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2565 2566 def _parse_ordered(self) -> exp.Expression: 2567 this = self._parse_conjunction() 2568 self._match(TokenType.ASC) 2569 is_desc = self._match(TokenType.DESC) 2570 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2571 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2572 desc = is_desc or False 2573 asc = not desc 2574 nulls_first = is_nulls_first or False 2575 explicitly_null_ordered = is_nulls_first or is_nulls_last 2576 if ( 2577 not explicitly_null_ordered 2578 and ( 2579 (asc and self.null_ordering == "nulls_are_small") 2580 or (desc and self.null_ordering != "nulls_are_small") 2581 ) 2582 and self.null_ordering != "nulls_are_last" 2583 ): 2584 nulls_first = True 2585 2586 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2587 2588 def _parse_limit( 2589 self, this: t.Optional[exp.Expression] = None, top: bool = False 2590 ) -> t.Optional[exp.Expression]: 2591 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2592 limit_paren = self._match(TokenType.L_PAREN) 2593 limit_exp = self.expression( 2594 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2595 ) 2596 2597 if limit_paren: 2598 self._match_r_paren() 2599 2600 return limit_exp 2601 2602 if self._match(TokenType.FETCH): 2603 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2604 direction = self._prev.text if direction else "FIRST" 2605 2606 count = self._parse_number() 2607 percent = self._match(TokenType.PERCENT) 2608 2609 self._match_set((TokenType.ROW, TokenType.ROWS)) 2610 2611 only = self._match_text_seq("ONLY") 2612 with_ties = self._match_text_seq("WITH", "TIES") 2613 2614 if only and with_ties: 2615 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2616 2617 return self.expression( 2618 exp.Fetch, 2619 direction=direction, 2620 count=count, 2621 percent=percent, 2622 with_ties=with_ties, 2623 ) 2624 2625 return this 2626 2627 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2628 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2629 return this 2630 2631 count = self._parse_number() 2632 self._match_set((TokenType.ROW, TokenType.ROWS)) 2633 return self.expression(exp.Offset, this=this, expression=count) 2634 2635 def _parse_locks(self) -> t.List[exp.Expression]: 2636 # Lists are invariant, so we need to use a type hint here 2637 locks: t.List[exp.Expression] = [] 2638 2639 while True: 2640 if self._match_text_seq("FOR", "UPDATE"): 2641 update = True 2642 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2643 "LOCK", "IN", "SHARE", "MODE" 2644 ): 2645 update = False 2646 else: 2647 break 2648 2649 expressions = None 2650 if self._match_text_seq("OF"): 2651 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2652 2653 wait: t.Optional[bool | exp.Expression] = None 2654 if self._match_text_seq("NOWAIT"): 2655 wait = True 2656 elif self._match_text_seq("WAIT"): 2657 wait = self._parse_primary() 2658 elif self._match_text_seq("SKIP", "LOCKED"): 2659 wait = False 2660 2661 locks.append( 2662 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2663 ) 2664 2665 return locks 2666 2667 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2668 if not self._match_set(self.SET_OPERATIONS): 2669 return this 2670 2671 token_type = self._prev.token_type 2672 2673 if token_type == TokenType.UNION: 2674 expression = exp.Union 2675 elif token_type == TokenType.EXCEPT: 2676 expression = exp.Except 2677 else: 2678 expression = exp.Intersect 2679 2680 return self.expression( 2681 expression, 2682 this=this, 2683 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2684 expression=self._parse_set_operations(self._parse_select(nested=True)), 2685 ) 2686 2687 def _parse_expression(self) -> t.Optional[exp.Expression]: 2688 return self._parse_alias(self._parse_conjunction()) 2689 2690 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2691 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2692 2693 def _parse_equality(self) -> t.Optional[exp.Expression]: 2694 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2695 2696 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2697 return self._parse_tokens(self._parse_range, self.COMPARISON) 2698 2699 def _parse_range(self) -> t.Optional[exp.Expression]: 2700 this = self._parse_bitwise() 2701 negate = self._match(TokenType.NOT) 2702 2703 if self._match_set(self.RANGE_PARSERS): 2704 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2705 if not expression: 2706 return this 2707 2708 this = expression 2709 elif self._match(TokenType.ISNULL): 2710 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2711 2712 # Postgres supports ISNULL and NOTNULL for conditions. 2713 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2714 if self._match(TokenType.NOTNULL): 2715 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2716 this = self.expression(exp.Not, this=this) 2717 2718 if negate: 2719 this = self.expression(exp.Not, this=this) 2720 2721 if self._match(TokenType.IS): 2722 this = self._parse_is(this) 2723 2724 return this 2725 2726 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2727 index = self._index - 1 2728 negate = self._match(TokenType.NOT) 2729 if self._match_text_seq("DISTINCT", "FROM"): 2730 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2731 return self.expression(klass, this=this, expression=self._parse_expression()) 2732 2733 expression = self._parse_null() or self._parse_boolean() 2734 if not expression: 2735 self._retreat(index) 2736 return None 2737 2738 this = self.expression(exp.Is, this=this, expression=expression) 2739 return self.expression(exp.Not, this=this) if negate else this 2740 2741 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.Expression: 2742 unnest = self._parse_unnest() 2743 if unnest: 2744 this = self.expression(exp.In, this=this, unnest=unnest) 2745 elif self._match(TokenType.L_PAREN): 2746 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2747 2748 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2749 this = self.expression(exp.In, this=this, query=expressions[0]) 2750 else: 2751 this = self.expression(exp.In, this=this, expressions=expressions) 2752 2753 self._match_r_paren(this) 2754 else: 2755 this = self.expression(exp.In, this=this, field=self._parse_field()) 2756 2757 return this 2758 2759 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2760 low = self._parse_bitwise() 2761 self._match(TokenType.AND) 2762 high = self._parse_bitwise() 2763 return self.expression(exp.Between, this=this, low=low, high=high) 2764 2765 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2766 if not self._match(TokenType.ESCAPE): 2767 return this 2768 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2769 2770 def _parse_interval(self) -> t.Optional[exp.Expression]: 2771 if not self._match(TokenType.INTERVAL): 2772 return None 2773 2774 this = self._parse_primary() or self._parse_term() 2775 unit = self._parse_function() or self._parse_var() 2776 2777 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2778 # each INTERVAL expression into this canonical form so it's easy to transpile 2779 if this and isinstance(this, exp.Literal): 2780 if this.is_number: 2781 this = exp.Literal.string(this.name) 2782 2783 # Try to not clutter Snowflake's multi-part intervals like INTERVAL '1 day, 1 year' 2784 parts = this.name.split() 2785 if not unit and len(parts) <= 2: 2786 this = exp.Literal.string(seq_get(parts, 0)) 2787 unit = self.expression(exp.Var, this=seq_get(parts, 1)) 2788 2789 return self.expression(exp.Interval, this=this, unit=unit) 2790 2791 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2792 this = self._parse_term() 2793 2794 while True: 2795 if self._match_set(self.BITWISE): 2796 this = self.expression( 2797 self.BITWISE[self._prev.token_type], 2798 this=this, 2799 expression=self._parse_term(), 2800 ) 2801 elif self._match_pair(TokenType.LT, TokenType.LT): 2802 this = self.expression( 2803 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2804 ) 2805 elif self._match_pair(TokenType.GT, TokenType.GT): 2806 this = self.expression( 2807 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2808 ) 2809 else: 2810 break 2811 2812 return this 2813 2814 def _parse_term(self) -> t.Optional[exp.Expression]: 2815 return self._parse_tokens(self._parse_factor, self.TERM) 2816 2817 def _parse_factor(self) -> t.Optional[exp.Expression]: 2818 return self._parse_tokens(self._parse_unary, self.FACTOR) 2819 2820 def _parse_unary(self) -> t.Optional[exp.Expression]: 2821 if self._match_set(self.UNARY_PARSERS): 2822 return self.UNARY_PARSERS[self._prev.token_type](self) 2823 return self._parse_at_time_zone(self._parse_type()) 2824 2825 def _parse_type(self) -> t.Optional[exp.Expression]: 2826 interval = self._parse_interval() 2827 if interval: 2828 return interval 2829 2830 index = self._index 2831 data_type = self._parse_types(check_func=True) 2832 this = self._parse_column() 2833 2834 if data_type: 2835 if isinstance(this, exp.Literal): 2836 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2837 if parser: 2838 return parser(self, this, data_type) 2839 return self.expression(exp.Cast, this=this, to=data_type) 2840 if not data_type.expressions: 2841 self._retreat(index) 2842 return self._parse_column() 2843 return data_type 2844 2845 return this 2846 2847 def _parse_type_size(self) -> t.Optional[exp.Expression]: 2848 this = self._parse_type() 2849 if not this: 2850 return None 2851 2852 return self.expression( 2853 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2854 ) 2855 2856 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2857 index = self._index 2858 2859 prefix = self._match_text_seq("SYSUDTLIB", ".") 2860 2861 if not self._match_set(self.TYPE_TOKENS): 2862 return None 2863 2864 type_token = self._prev.token_type 2865 2866 if type_token == TokenType.PSEUDO_TYPE: 2867 return self.expression(exp.PseudoType, this=self._prev.text) 2868 2869 nested = type_token in self.NESTED_TYPE_TOKENS 2870 is_struct = type_token == TokenType.STRUCT 2871 expressions = None 2872 maybe_func = False 2873 2874 if self._match(TokenType.L_PAREN): 2875 if is_struct: 2876 expressions = self._parse_csv(self._parse_struct_types) 2877 elif nested: 2878 expressions = self._parse_csv(self._parse_types) 2879 else: 2880 expressions = self._parse_csv(self._parse_type_size) 2881 2882 if not expressions or not self._match(TokenType.R_PAREN): 2883 self._retreat(index) 2884 return None 2885 2886 maybe_func = True 2887 2888 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2889 this = exp.DataType( 2890 this=exp.DataType.Type.ARRAY, 2891 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2892 nested=True, 2893 ) 2894 2895 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2896 this = exp.DataType( 2897 this=exp.DataType.Type.ARRAY, 2898 expressions=[this], 2899 nested=True, 2900 ) 2901 2902 return this 2903 2904 if self._match(TokenType.L_BRACKET): 2905 self._retreat(index) 2906 return None 2907 2908 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2909 if nested and self._match(TokenType.LT): 2910 if is_struct: 2911 expressions = self._parse_csv(self._parse_struct_types) 2912 else: 2913 expressions = self._parse_csv(self._parse_types) 2914 2915 if not self._match(TokenType.GT): 2916 self.raise_error("Expecting >") 2917 2918 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2919 values = self._parse_csv(self._parse_conjunction) 2920 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2921 2922 value: t.Optional[exp.Expression] = None 2923 if type_token in self.TIMESTAMPS: 2924 if self._match_text_seq("WITH", "TIME", "ZONE") or type_token == TokenType.TIMESTAMPTZ: 2925 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2926 elif ( 2927 self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE") 2928 or type_token == TokenType.TIMESTAMPLTZ 2929 ): 2930 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2931 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 2932 if type_token == TokenType.TIME: 2933 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2934 else: 2935 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2936 2937 maybe_func = maybe_func and value is None 2938 2939 if value is None: 2940 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2941 elif type_token == TokenType.INTERVAL: 2942 unit = self._parse_var() 2943 2944 if not unit: 2945 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 2946 else: 2947 value = self.expression(exp.Interval, unit=unit) 2948 2949 if maybe_func and check_func: 2950 index2 = self._index 2951 peek = self._parse_string() 2952 2953 if not peek: 2954 self._retreat(index) 2955 return None 2956 2957 self._retreat(index2) 2958 2959 if value: 2960 return value 2961 2962 return exp.DataType( 2963 this=exp.DataType.Type[type_token.value.upper()], 2964 expressions=expressions, 2965 nested=nested, 2966 values=values, 2967 prefix=prefix, 2968 ) 2969 2970 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 2971 this = self._parse_type() or self._parse_id_var() 2972 self._match(TokenType.COLON) 2973 return self._parse_column_def(this) 2974 2975 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2976 if not self._match_text_seq("AT", "TIME", "ZONE"): 2977 return this 2978 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 2979 2980 def _parse_column(self) -> t.Optional[exp.Expression]: 2981 this = self._parse_field() 2982 if isinstance(this, exp.Identifier): 2983 this = self.expression(exp.Column, this=this) 2984 elif not this: 2985 return self._parse_bracket(this) 2986 this = self._parse_bracket(this) 2987 2988 while self._match_set(self.COLUMN_OPERATORS): 2989 op_token = self._prev.token_type 2990 op = self.COLUMN_OPERATORS.get(op_token) 2991 2992 if op_token == TokenType.DCOLON: 2993 field = self._parse_types() 2994 if not field: 2995 self.raise_error("Expected type") 2996 elif op and self._curr: 2997 self._advance() 2998 value = self._prev.text 2999 field = ( 3000 exp.Literal.number(value) 3001 if self._prev.token_type == TokenType.NUMBER 3002 else exp.Literal.string(value) 3003 ) 3004 else: 3005 field = ( 3006 self._parse_star() 3007 or self._parse_function(anonymous=True) 3008 or self._parse_id_var() 3009 ) 3010 3011 if isinstance(field, exp.Func): 3012 # bigquery allows function calls like x.y.count(...) 3013 # SAFE.SUBSTR(...) 3014 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3015 this = self._replace_columns_with_dots(this) 3016 3017 if op: 3018 this = op(self, this, field) 3019 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3020 this = self.expression( 3021 exp.Column, 3022 this=field, 3023 table=this.this, 3024 db=this.args.get("table"), 3025 catalog=this.args.get("db"), 3026 ) 3027 else: 3028 this = self.expression(exp.Dot, this=this, expression=field) 3029 this = self._parse_bracket(this) 3030 3031 return this 3032 3033 def _parse_primary(self) -> t.Optional[exp.Expression]: 3034 if self._match_set(self.PRIMARY_PARSERS): 3035 token_type = self._prev.token_type 3036 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3037 3038 if token_type == TokenType.STRING: 3039 expressions = [primary] 3040 while self._match(TokenType.STRING): 3041 expressions.append(exp.Literal.string(self._prev.text)) 3042 if len(expressions) > 1: 3043 return self.expression(exp.Concat, expressions=expressions) 3044 return primary 3045 3046 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3047 return exp.Literal.number(f"0.{self._prev.text}") 3048 3049 if self._match(TokenType.L_PAREN): 3050 comments = self._prev_comments 3051 query = self._parse_select() 3052 3053 if query: 3054 expressions = [query] 3055 else: 3056 expressions = self._parse_csv(self._parse_expression) 3057 3058 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3059 3060 if isinstance(this, exp.Subqueryable): 3061 this = self._parse_set_operations( 3062 self._parse_subquery(this=this, parse_alias=False) 3063 ) 3064 elif len(expressions) > 1: 3065 this = self.expression(exp.Tuple, expressions=expressions) 3066 else: 3067 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3068 3069 if this: 3070 this.add_comments(comments) 3071 self._match_r_paren(expression=this) 3072 3073 return this 3074 3075 return None 3076 3077 def _parse_field( 3078 self, 3079 any_token: bool = False, 3080 tokens: t.Optional[t.Collection[TokenType]] = None, 3081 ) -> t.Optional[exp.Expression]: 3082 return ( 3083 self._parse_primary() 3084 or self._parse_function() 3085 or self._parse_id_var(any_token=any_token, tokens=tokens) 3086 ) 3087 3088 def _parse_function( 3089 self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False 3090 ) -> t.Optional[exp.Expression]: 3091 if not self._curr: 3092 return None 3093 3094 token_type = self._curr.token_type 3095 3096 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3097 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3098 3099 if not self._next or self._next.token_type != TokenType.L_PAREN: 3100 if token_type in self.NO_PAREN_FUNCTIONS: 3101 self._advance() 3102 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3103 3104 return None 3105 3106 if token_type not in self.FUNC_TOKENS: 3107 return None 3108 3109 this = self._curr.text 3110 upper = this.upper() 3111 self._advance(2) 3112 3113 parser = self.FUNCTION_PARSERS.get(upper) 3114 3115 if parser and not anonymous: 3116 this = parser(self) 3117 else: 3118 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3119 3120 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3121 this = self.expression(subquery_predicate, this=self._parse_select()) 3122 self._match_r_paren() 3123 return this 3124 3125 if functions is None: 3126 functions = self.FUNCTIONS 3127 3128 function = functions.get(upper) 3129 3130 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3131 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3132 3133 if function and not anonymous: 3134 this = function(args) 3135 self.validate_expression(this, args) 3136 else: 3137 this = self.expression(exp.Anonymous, this=this, expressions=args) 3138 3139 self._match_r_paren(this) 3140 return self._parse_window(this) 3141 3142 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3143 return self._parse_column_def(self._parse_id_var()) 3144 3145 def _parse_user_defined_function( 3146 self, kind: t.Optional[TokenType] = None 3147 ) -> t.Optional[exp.Expression]: 3148 this = self._parse_id_var() 3149 3150 while self._match(TokenType.DOT): 3151 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3152 3153 if not self._match(TokenType.L_PAREN): 3154 return this 3155 3156 expressions = self._parse_csv(self._parse_function_parameter) 3157 self._match_r_paren() 3158 return self.expression( 3159 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3160 ) 3161 3162 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 3163 literal = self._parse_primary() 3164 if literal: 3165 return self.expression(exp.Introducer, this=token.text, expression=literal) 3166 3167 return self.expression(exp.Identifier, this=token.text) 3168 3169 def _parse_session_parameter(self) -> exp.Expression: 3170 kind = None 3171 this = self._parse_id_var() or self._parse_primary() 3172 3173 if this and self._match(TokenType.DOT): 3174 kind = this.name 3175 this = self._parse_var() or self._parse_primary() 3176 3177 return self.expression(exp.SessionParameter, this=this, kind=kind) 3178 3179 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3180 index = self._index 3181 3182 if self._match(TokenType.L_PAREN): 3183 expressions = self._parse_csv(self._parse_id_var) 3184 3185 if not self._match(TokenType.R_PAREN): 3186 self._retreat(index) 3187 else: 3188 expressions = [self._parse_id_var()] 3189 3190 if self._match_set(self.LAMBDAS): 3191 return self.LAMBDAS[self._prev.token_type](self, expressions) 3192 3193 self._retreat(index) 3194 3195 this: t.Optional[exp.Expression] 3196 3197 if self._match(TokenType.DISTINCT): 3198 this = self.expression( 3199 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3200 ) 3201 else: 3202 this = self._parse_select_or_expression(alias=alias) 3203 3204 if isinstance(this, exp.EQ): 3205 left = this.this 3206 if isinstance(left, exp.Column): 3207 left.replace(exp.Var(this=left.text("this"))) 3208 3209 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3210 3211 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3212 index = self._index 3213 3214 try: 3215 if self._parse_select(nested=True): 3216 return this 3217 except Exception: 3218 pass 3219 finally: 3220 self._retreat(index) 3221 3222 if not self._match(TokenType.L_PAREN): 3223 return this 3224 3225 args = self._parse_csv( 3226 lambda: self._parse_constraint() 3227 or self._parse_column_def(self._parse_field(any_token=True)) 3228 ) 3229 self._match_r_paren() 3230 return self.expression(exp.Schema, this=this, expressions=args) 3231 3232 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3233 # column defs are not really columns, they're identifiers 3234 if isinstance(this, exp.Column): 3235 this = this.this 3236 kind = self._parse_types() 3237 3238 if self._match_text_seq("FOR", "ORDINALITY"): 3239 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3240 3241 constraints = [] 3242 while True: 3243 constraint = self._parse_column_constraint() 3244 if not constraint: 3245 break 3246 constraints.append(constraint) 3247 3248 if not kind and not constraints: 3249 return this 3250 3251 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3252 3253 def _parse_auto_increment(self) -> exp.Expression: 3254 start = None 3255 increment = None 3256 3257 if self._match(TokenType.L_PAREN, advance=False): 3258 args = self._parse_wrapped_csv(self._parse_bitwise) 3259 start = seq_get(args, 0) 3260 increment = seq_get(args, 1) 3261 elif self._match_text_seq("START"): 3262 start = self._parse_bitwise() 3263 self._match_text_seq("INCREMENT") 3264 increment = self._parse_bitwise() 3265 3266 if start and increment: 3267 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3268 3269 return exp.AutoIncrementColumnConstraint() 3270 3271 def _parse_compress(self) -> exp.Expression: 3272 if self._match(TokenType.L_PAREN, advance=False): 3273 return self.expression( 3274 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3275 ) 3276 3277 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3278 3279 def _parse_generated_as_identity(self) -> exp.Expression: 3280 if self._match_text_seq("BY", "DEFAULT"): 3281 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3282 this = self.expression( 3283 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3284 ) 3285 else: 3286 self._match_text_seq("ALWAYS") 3287 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3288 3289 self._match(TokenType.ALIAS) 3290 identity = self._match_text_seq("IDENTITY") 3291 3292 if self._match(TokenType.L_PAREN): 3293 if self._match_text_seq("START", "WITH"): 3294 this.set("start", self._parse_bitwise()) 3295 if self._match_text_seq("INCREMENT", "BY"): 3296 this.set("increment", self._parse_bitwise()) 3297 if self._match_text_seq("MINVALUE"): 3298 this.set("minvalue", self._parse_bitwise()) 3299 if self._match_text_seq("MAXVALUE"): 3300 this.set("maxvalue", self._parse_bitwise()) 3301 3302 if self._match_text_seq("CYCLE"): 3303 this.set("cycle", True) 3304 elif self._match_text_seq("NO", "CYCLE"): 3305 this.set("cycle", False) 3306 3307 if not identity: 3308 this.set("expression", self._parse_bitwise()) 3309 3310 self._match_r_paren() 3311 3312 return this 3313 3314 def _parse_inline(self) -> t.Optional[exp.Expression]: 3315 self._match_text_seq("LENGTH") 3316 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3317 3318 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 3319 if self._match_text_seq("NULL"): 3320 return self.expression(exp.NotNullColumnConstraint) 3321 if self._match_text_seq("CASESPECIFIC"): 3322 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3323 return None 3324 3325 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3326 if self._match(TokenType.CONSTRAINT): 3327 this = self._parse_id_var() 3328 else: 3329 this = None 3330 3331 if self._match_texts(self.CONSTRAINT_PARSERS): 3332 return self.expression( 3333 exp.ColumnConstraint, 3334 this=this, 3335 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3336 ) 3337 3338 return this 3339 3340 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3341 if not self._match(TokenType.CONSTRAINT): 3342 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3343 3344 this = self._parse_id_var() 3345 expressions = [] 3346 3347 while True: 3348 constraint = self._parse_unnamed_constraint() or self._parse_function() 3349 if not constraint: 3350 break 3351 expressions.append(constraint) 3352 3353 return self.expression(exp.Constraint, this=this, expressions=expressions) 3354 3355 def _parse_unnamed_constraint( 3356 self, constraints: t.Optional[t.Collection[str]] = None 3357 ) -> t.Optional[exp.Expression]: 3358 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3359 return None 3360 3361 constraint = self._prev.text.upper() 3362 if constraint not in self.CONSTRAINT_PARSERS: 3363 self.raise_error(f"No parser found for schema constraint {constraint}.") 3364 3365 return self.CONSTRAINT_PARSERS[constraint](self) 3366 3367 def _parse_unique(self) -> exp.Expression: 3368 if not self._match(TokenType.L_PAREN, advance=False): 3369 return self.expression(exp.UniqueColumnConstraint) 3370 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 3371 3372 def _parse_key_constraint_options(self) -> t.List[str]: 3373 options = [] 3374 while True: 3375 if not self._curr: 3376 break 3377 3378 if self._match(TokenType.ON): 3379 action = None 3380 on = self._advance_any() and self._prev.text 3381 3382 if self._match_text_seq("NO", "ACTION"): 3383 action = "NO ACTION" 3384 elif self._match_text_seq("CASCADE"): 3385 action = "CASCADE" 3386 elif self._match_pair(TokenType.SET, TokenType.NULL): 3387 action = "SET NULL" 3388 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3389 action = "SET DEFAULT" 3390 else: 3391 self.raise_error("Invalid key constraint") 3392 3393 options.append(f"ON {on} {action}") 3394 elif self._match_text_seq("NOT", "ENFORCED"): 3395 options.append("NOT ENFORCED") 3396 elif self._match_text_seq("DEFERRABLE"): 3397 options.append("DEFERRABLE") 3398 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3399 options.append("INITIALLY DEFERRED") 3400 elif self._match_text_seq("NORELY"): 3401 options.append("NORELY") 3402 elif self._match_text_seq("MATCH", "FULL"): 3403 options.append("MATCH FULL") 3404 else: 3405 break 3406 3407 return options 3408 3409 def _parse_references(self, match: bool = True) -> t.Optional[exp.Expression]: 3410 if match and not self._match(TokenType.REFERENCES): 3411 return None 3412 3413 expressions = None 3414 this = self._parse_id_var() 3415 3416 if self._match(TokenType.L_PAREN, advance=False): 3417 expressions = self._parse_wrapped_id_vars() 3418 3419 options = self._parse_key_constraint_options() 3420 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3421 3422 def _parse_foreign_key(self) -> exp.Expression: 3423 expressions = self._parse_wrapped_id_vars() 3424 reference = self._parse_references() 3425 options = {} 3426 3427 while self._match(TokenType.ON): 3428 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3429 self.raise_error("Expected DELETE or UPDATE") 3430 3431 kind = self._prev.text.lower() 3432 3433 if self._match_text_seq("NO", "ACTION"): 3434 action = "NO ACTION" 3435 elif self._match(TokenType.SET): 3436 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3437 action = "SET " + self._prev.text.upper() 3438 else: 3439 self._advance() 3440 action = self._prev.text.upper() 3441 3442 options[kind] = action 3443 3444 return self.expression( 3445 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3446 ) 3447 3448 def _parse_primary_key(self) -> exp.Expression: 3449 desc = ( 3450 self._match_set((TokenType.ASC, TokenType.DESC)) 3451 and self._prev.token_type == TokenType.DESC 3452 ) 3453 3454 if not self._match(TokenType.L_PAREN, advance=False): 3455 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3456 3457 expressions = self._parse_wrapped_csv(self._parse_field) 3458 options = self._parse_key_constraint_options() 3459 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3460 3461 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3462 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3463 return this 3464 3465 bracket_kind = self._prev.token_type 3466 expressions: t.List[t.Optional[exp.Expression]] 3467 3468 if self._match(TokenType.COLON): 3469 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3470 else: 3471 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3472 3473 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3474 if bracket_kind == TokenType.L_BRACE: 3475 this = self.expression(exp.Struct, expressions=expressions) 3476 elif not this or this.name.upper() == "ARRAY": 3477 this = self.expression(exp.Array, expressions=expressions) 3478 else: 3479 expressions = apply_index_offset(this, expressions, -self.index_offset) 3480 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3481 3482 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3483 self.raise_error("Expected ]") 3484 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3485 self.raise_error("Expected }") 3486 3487 self._add_comments(this) 3488 return self._parse_bracket(this) 3489 3490 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3491 if self._match(TokenType.COLON): 3492 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3493 return this 3494 3495 def _parse_case(self) -> t.Optional[exp.Expression]: 3496 ifs = [] 3497 default = None 3498 3499 expression = self._parse_conjunction() 3500 3501 while self._match(TokenType.WHEN): 3502 this = self._parse_conjunction() 3503 self._match(TokenType.THEN) 3504 then = self._parse_conjunction() 3505 ifs.append(self.expression(exp.If, this=this, true=then)) 3506 3507 if self._match(TokenType.ELSE): 3508 default = self._parse_conjunction() 3509 3510 if not self._match(TokenType.END): 3511 self.raise_error("Expected END after CASE", self._prev) 3512 3513 return self._parse_window( 3514 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3515 ) 3516 3517 def _parse_if(self) -> t.Optional[exp.Expression]: 3518 if self._match(TokenType.L_PAREN): 3519 args = self._parse_csv(self._parse_conjunction) 3520 this = exp.If.from_arg_list(args) 3521 self.validate_expression(this, args) 3522 self._match_r_paren() 3523 else: 3524 index = self._index - 1 3525 condition = self._parse_conjunction() 3526 3527 if not condition: 3528 self._retreat(index) 3529 return None 3530 3531 self._match(TokenType.THEN) 3532 true = self._parse_conjunction() 3533 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3534 self._match(TokenType.END) 3535 this = self.expression(exp.If, this=condition, true=true, false=false) 3536 3537 return self._parse_window(this) 3538 3539 def _parse_extract(self) -> exp.Expression: 3540 this = self._parse_function() or self._parse_var() or self._parse_type() 3541 3542 if self._match(TokenType.FROM): 3543 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3544 3545 if not self._match(TokenType.COMMA): 3546 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3547 3548 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3549 3550 def _parse_cast(self, strict: bool) -> exp.Expression: 3551 this = self._parse_conjunction() 3552 3553 if not self._match(TokenType.ALIAS): 3554 if self._match(TokenType.COMMA): 3555 return self.expression( 3556 exp.CastToStrType, this=this, expression=self._parse_string() 3557 ) 3558 else: 3559 self.raise_error("Expected AS after CAST") 3560 3561 to = self._parse_types() 3562 3563 if not to: 3564 self.raise_error("Expected TYPE after CAST") 3565 elif to.this == exp.DataType.Type.CHAR: 3566 if self._match(TokenType.CHARACTER_SET): 3567 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3568 3569 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3570 3571 def _parse_string_agg(self) -> exp.Expression: 3572 expression: t.Optional[exp.Expression] 3573 3574 if self._match(TokenType.DISTINCT): 3575 args = self._parse_csv(self._parse_conjunction) 3576 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3577 else: 3578 args = self._parse_csv(self._parse_conjunction) 3579 expression = seq_get(args, 0) 3580 3581 index = self._index 3582 if not self._match(TokenType.R_PAREN): 3583 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3584 order = self._parse_order(this=expression) 3585 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3586 3587 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3588 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3589 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3590 if not self._match_text_seq("WITHIN", "GROUP"): 3591 self._retreat(index) 3592 this = exp.GroupConcat.from_arg_list(args) 3593 self.validate_expression(this, args) 3594 return this 3595 3596 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3597 order = self._parse_order(this=expression) 3598 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3599 3600 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3601 to: t.Optional[exp.Expression] 3602 this = self._parse_bitwise() 3603 3604 if self._match(TokenType.USING): 3605 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3606 elif self._match(TokenType.COMMA): 3607 to = self._parse_bitwise() 3608 else: 3609 to = None 3610 3611 # Swap the argument order if needed to produce the correct AST 3612 if self.CONVERT_TYPE_FIRST: 3613 this, to = to, this 3614 3615 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3616 3617 def _parse_decode(self) -> t.Optional[exp.Expression]: 3618 """ 3619 There are generally two variants of the DECODE function: 3620 3621 - DECODE(bin, charset) 3622 - DECODE(expression, search, result [, search, result] ... [, default]) 3623 3624 The second variant will always be parsed into a CASE expression. Note that NULL 3625 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3626 instead of relying on pattern matching. 3627 """ 3628 args = self._parse_csv(self._parse_conjunction) 3629 3630 if len(args) < 3: 3631 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3632 3633 expression, *expressions = args 3634 if not expression: 3635 return None 3636 3637 ifs = [] 3638 for search, result in zip(expressions[::2], expressions[1::2]): 3639 if not search or not result: 3640 return None 3641 3642 if isinstance(search, exp.Literal): 3643 ifs.append( 3644 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3645 ) 3646 elif isinstance(search, exp.Null): 3647 ifs.append( 3648 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3649 ) 3650 else: 3651 cond = exp.or_( 3652 exp.EQ(this=expression.copy(), expression=search), 3653 exp.and_( 3654 exp.Is(this=expression.copy(), expression=exp.Null()), 3655 exp.Is(this=search.copy(), expression=exp.Null()), 3656 copy=False, 3657 ), 3658 copy=False, 3659 ) 3660 ifs.append(exp.If(this=cond, true=result)) 3661 3662 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3663 3664 def _parse_json_key_value(self) -> t.Optional[exp.Expression]: 3665 self._match_text_seq("KEY") 3666 key = self._parse_field() 3667 self._match(TokenType.COLON) 3668 self._match_text_seq("VALUE") 3669 value = self._parse_field() 3670 if not key and not value: 3671 return None 3672 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3673 3674 def _parse_json_object(self) -> exp.Expression: 3675 expressions = self._parse_csv(self._parse_json_key_value) 3676 3677 null_handling = None 3678 if self._match_text_seq("NULL", "ON", "NULL"): 3679 null_handling = "NULL ON NULL" 3680 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3681 null_handling = "ABSENT ON NULL" 3682 3683 unique_keys = None 3684 if self._match_text_seq("WITH", "UNIQUE"): 3685 unique_keys = True 3686 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3687 unique_keys = False 3688 3689 self._match_text_seq("KEYS") 3690 3691 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3692 format_json = self._match_text_seq("FORMAT", "JSON") 3693 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3694 3695 return self.expression( 3696 exp.JSONObject, 3697 expressions=expressions, 3698 null_handling=null_handling, 3699 unique_keys=unique_keys, 3700 return_type=return_type, 3701 format_json=format_json, 3702 encoding=encoding, 3703 ) 3704 3705 def _parse_logarithm(self) -> exp.Expression: 3706 # Default argument order is base, expression 3707 args = self._parse_csv(self._parse_range) 3708 3709 if len(args) > 1: 3710 if not self.LOG_BASE_FIRST: 3711 args.reverse() 3712 return exp.Log.from_arg_list(args) 3713 3714 return self.expression( 3715 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3716 ) 3717 3718 def _parse_match_against(self) -> exp.Expression: 3719 expressions = self._parse_csv(self._parse_column) 3720 3721 self._match_text_seq(")", "AGAINST", "(") 3722 3723 this = self._parse_string() 3724 3725 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3726 modifier = "IN NATURAL LANGUAGE MODE" 3727 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3728 modifier = f"{modifier} WITH QUERY EXPANSION" 3729 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3730 modifier = "IN BOOLEAN MODE" 3731 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3732 modifier = "WITH QUERY EXPANSION" 3733 else: 3734 modifier = None 3735 3736 return self.expression( 3737 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3738 ) 3739 3740 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3741 def _parse_open_json(self) -> exp.Expression: 3742 this = self._parse_bitwise() 3743 path = self._match(TokenType.COMMA) and self._parse_string() 3744 3745 def _parse_open_json_column_def() -> exp.Expression: 3746 this = self._parse_field(any_token=True) 3747 kind = self._parse_types() 3748 path = self._parse_string() 3749 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3750 return self.expression( 3751 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3752 ) 3753 3754 expressions = None 3755 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3756 self._match_l_paren() 3757 expressions = self._parse_csv(_parse_open_json_column_def) 3758 3759 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3760 3761 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3762 args = self._parse_csv(self._parse_bitwise) 3763 3764 if self._match(TokenType.IN): 3765 return self.expression( 3766 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3767 ) 3768 3769 if haystack_first: 3770 haystack = seq_get(args, 0) 3771 needle = seq_get(args, 1) 3772 else: 3773 needle = seq_get(args, 0) 3774 haystack = seq_get(args, 1) 3775 3776 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3777 3778 self.validate_expression(this, args) 3779 3780 return this 3781 3782 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3783 args = self._parse_csv(self._parse_table) 3784 return exp.JoinHint(this=func_name.upper(), expressions=args) 3785 3786 def _parse_substring(self) -> exp.Expression: 3787 # Postgres supports the form: substring(string [from int] [for int]) 3788 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3789 3790 args = self._parse_csv(self._parse_bitwise) 3791 3792 if self._match(TokenType.FROM): 3793 args.append(self._parse_bitwise()) 3794 if self._match(TokenType.FOR): 3795 args.append(self._parse_bitwise()) 3796 3797 this = exp.Substring.from_arg_list(args) 3798 self.validate_expression(this, args) 3799 3800 return this 3801 3802 def _parse_trim(self) -> exp.Expression: 3803 # https://www.w3resource.com/sql/character-functions/trim.php 3804 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3805 3806 position = None 3807 collation = None 3808 3809 if self._match_texts(self.TRIM_TYPES): 3810 position = self._prev.text.upper() 3811 3812 expression = self._parse_bitwise() 3813 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3814 this = self._parse_bitwise() 3815 else: 3816 this = expression 3817 expression = None 3818 3819 if self._match(TokenType.COLLATE): 3820 collation = self._parse_bitwise() 3821 3822 return self.expression( 3823 exp.Trim, 3824 this=this, 3825 position=position, 3826 expression=expression, 3827 collation=collation, 3828 ) 3829 3830 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3831 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3832 3833 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3834 return self._parse_window(self._parse_id_var(), alias=True) 3835 3836 def _parse_respect_or_ignore_nulls( 3837 self, this: t.Optional[exp.Expression] 3838 ) -> t.Optional[exp.Expression]: 3839 if self._match_text_seq("IGNORE", "NULLS"): 3840 return self.expression(exp.IgnoreNulls, this=this) 3841 if self._match_text_seq("RESPECT", "NULLS"): 3842 return self.expression(exp.RespectNulls, this=this) 3843 return this 3844 3845 def _parse_window( 3846 self, this: t.Optional[exp.Expression], alias: bool = False 3847 ) -> t.Optional[exp.Expression]: 3848 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3849 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3850 self._match_r_paren() 3851 3852 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3853 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3854 if self._match_text_seq("WITHIN", "GROUP"): 3855 order = self._parse_wrapped(self._parse_order) 3856 this = self.expression(exp.WithinGroup, this=this, expression=order) 3857 3858 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3859 # Some dialects choose to implement and some do not. 3860 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3861 3862 # There is some code above in _parse_lambda that handles 3863 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3864 3865 # The below changes handle 3866 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3867 3868 # Oracle allows both formats 3869 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3870 # and Snowflake chose to do the same for familiarity 3871 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3872 this = self._parse_respect_or_ignore_nulls(this) 3873 3874 # bigquery select from window x AS (partition by ...) 3875 if alias: 3876 over = None 3877 self._match(TokenType.ALIAS) 3878 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 3879 return this 3880 else: 3881 over = self._prev.text.upper() 3882 3883 if not self._match(TokenType.L_PAREN): 3884 return self.expression( 3885 exp.Window, this=this, alias=self._parse_id_var(False), over=over 3886 ) 3887 3888 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3889 3890 first = self._match(TokenType.FIRST) 3891 if self._match_text_seq("LAST"): 3892 first = False 3893 3894 partition = self._parse_partition_by() 3895 order = self._parse_order() 3896 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3897 3898 if kind: 3899 self._match(TokenType.BETWEEN) 3900 start = self._parse_window_spec() 3901 self._match(TokenType.AND) 3902 end = self._parse_window_spec() 3903 3904 spec = self.expression( 3905 exp.WindowSpec, 3906 kind=kind, 3907 start=start["value"], 3908 start_side=start["side"], 3909 end=end["value"], 3910 end_side=end["side"], 3911 ) 3912 else: 3913 spec = None 3914 3915 self._match_r_paren() 3916 3917 return self.expression( 3918 exp.Window, 3919 this=this, 3920 partition_by=partition, 3921 order=order, 3922 spec=spec, 3923 alias=window_alias, 3924 over=over, 3925 first=first, 3926 ) 3927 3928 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3929 self._match(TokenType.BETWEEN) 3930 3931 return { 3932 "value": ( 3933 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 3934 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 3935 or self._parse_bitwise() 3936 ), 3937 "side": self._match_texts(("PRECEDING", "FOLLOWING")) and self._prev.text, 3938 } 3939 3940 def _parse_alias( 3941 self, this: t.Optional[exp.Expression], explicit: bool = False 3942 ) -> t.Optional[exp.Expression]: 3943 any_token = self._match(TokenType.ALIAS) 3944 3945 if explicit and not any_token: 3946 return this 3947 3948 if self._match(TokenType.L_PAREN): 3949 aliases = self.expression( 3950 exp.Aliases, 3951 this=this, 3952 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3953 ) 3954 self._match_r_paren(aliases) 3955 return aliases 3956 3957 alias = self._parse_id_var(any_token) 3958 3959 if alias: 3960 return self.expression(exp.Alias, this=this, alias=alias) 3961 3962 return this 3963 3964 def _parse_id_var( 3965 self, 3966 any_token: bool = True, 3967 tokens: t.Optional[t.Collection[TokenType]] = None, 3968 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3969 ) -> t.Optional[exp.Expression]: 3970 identifier = self._parse_identifier() 3971 3972 if identifier: 3973 return identifier 3974 3975 prefix = "" 3976 3977 if prefix_tokens: 3978 while self._match_set(prefix_tokens): 3979 prefix += self._prev.text 3980 3981 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3982 quoted = self._prev.token_type == TokenType.STRING 3983 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3984 3985 return None 3986 3987 def _parse_string(self) -> t.Optional[exp.Expression]: 3988 if self._match(TokenType.STRING): 3989 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 3990 return self._parse_placeholder() 3991 3992 def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]: 3993 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 3994 3995 def _parse_number(self) -> t.Optional[exp.Expression]: 3996 if self._match(TokenType.NUMBER): 3997 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 3998 return self._parse_placeholder() 3999 4000 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4001 if self._match(TokenType.IDENTIFIER): 4002 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4003 return self._parse_placeholder() 4004 4005 def _parse_var( 4006 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4007 ) -> t.Optional[exp.Expression]: 4008 if ( 4009 (any_token and self._advance_any()) 4010 or self._match(TokenType.VAR) 4011 or (self._match_set(tokens) if tokens else False) 4012 ): 4013 return self.expression(exp.Var, this=self._prev.text) 4014 return self._parse_placeholder() 4015 4016 def _advance_any(self) -> t.Optional[Token]: 4017 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4018 self._advance() 4019 return self._prev 4020 return None 4021 4022 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4023 return self._parse_var() or self._parse_string() 4024 4025 def _parse_null(self) -> t.Optional[exp.Expression]: 4026 if self._match(TokenType.NULL): 4027 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4028 return None 4029 4030 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4031 if self._match(TokenType.TRUE): 4032 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4033 if self._match(TokenType.FALSE): 4034 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4035 return None 4036 4037 def _parse_star(self) -> t.Optional[exp.Expression]: 4038 if self._match(TokenType.STAR): 4039 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4040 return None 4041 4042 def _parse_parameter(self) -> exp.Expression: 4043 wrapped = self._match(TokenType.L_BRACE) 4044 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4045 self._match(TokenType.R_BRACE) 4046 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4047 4048 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4049 if self._match_set(self.PLACEHOLDER_PARSERS): 4050 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4051 if placeholder: 4052 return placeholder 4053 self._advance(-1) 4054 return None 4055 4056 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4057 if not self._match(TokenType.EXCEPT): 4058 return None 4059 if self._match(TokenType.L_PAREN, advance=False): 4060 return self._parse_wrapped_csv(self._parse_column) 4061 return self._parse_csv(self._parse_column) 4062 4063 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4064 if not self._match(TokenType.REPLACE): 4065 return None 4066 if self._match(TokenType.L_PAREN, advance=False): 4067 return self._parse_wrapped_csv(self._parse_expression) 4068 return self._parse_csv(self._parse_expression) 4069 4070 def _parse_csv( 4071 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4072 ) -> t.List[t.Optional[exp.Expression]]: 4073 parse_result = parse_method() 4074 items = [parse_result] if parse_result is not None else [] 4075 4076 while self._match(sep): 4077 self._add_comments(parse_result) 4078 parse_result = parse_method() 4079 if parse_result is not None: 4080 items.append(parse_result) 4081 4082 return items 4083 4084 def _parse_tokens( 4085 self, parse_method: t.Callable, expressions: t.Dict 4086 ) -> t.Optional[exp.Expression]: 4087 this = parse_method() 4088 4089 while self._match_set(expressions): 4090 this = self.expression( 4091 expressions[self._prev.token_type], 4092 this=this, 4093 comments=self._prev_comments, 4094 expression=parse_method(), 4095 ) 4096 4097 return this 4098 4099 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4100 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4101 4102 def _parse_wrapped_csv( 4103 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4104 ) -> t.List[t.Optional[exp.Expression]]: 4105 return self._parse_wrapped( 4106 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4107 ) 4108 4109 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4110 wrapped = self._match(TokenType.L_PAREN) 4111 if not wrapped and not optional: 4112 self.raise_error("Expecting (") 4113 parse_result = parse_method() 4114 if wrapped: 4115 self._match_r_paren() 4116 return parse_result 4117 4118 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4119 return self._parse_select() or self._parse_set_operations( 4120 self._parse_expression() if alias else self._parse_conjunction() 4121 ) 4122 4123 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4124 return self._parse_set_operations( 4125 self._parse_select(nested=True, parse_subquery_alias=False) 4126 ) 4127 4128 def _parse_transaction(self) -> exp.Expression: 4129 this = None 4130 if self._match_texts(self.TRANSACTION_KIND): 4131 this = self._prev.text 4132 4133 self._match_texts({"TRANSACTION", "WORK"}) 4134 4135 modes = [] 4136 while True: 4137 mode = [] 4138 while self._match(TokenType.VAR): 4139 mode.append(self._prev.text) 4140 4141 if mode: 4142 modes.append(" ".join(mode)) 4143 if not self._match(TokenType.COMMA): 4144 break 4145 4146 return self.expression(exp.Transaction, this=this, modes=modes) 4147 4148 def _parse_commit_or_rollback(self) -> exp.Expression: 4149 chain = None 4150 savepoint = None 4151 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4152 4153 self._match_texts({"TRANSACTION", "WORK"}) 4154 4155 if self._match_text_seq("TO"): 4156 self._match_text_seq("SAVEPOINT") 4157 savepoint = self._parse_id_var() 4158 4159 if self._match(TokenType.AND): 4160 chain = not self._match_text_seq("NO") 4161 self._match_text_seq("CHAIN") 4162 4163 if is_rollback: 4164 return self.expression(exp.Rollback, savepoint=savepoint) 4165 return self.expression(exp.Commit, chain=chain) 4166 4167 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4168 if not self._match_text_seq("ADD"): 4169 return None 4170 4171 self._match(TokenType.COLUMN) 4172 exists_column = self._parse_exists(not_=True) 4173 expression = self._parse_column_def(self._parse_field(any_token=True)) 4174 4175 if expression: 4176 expression.set("exists", exists_column) 4177 4178 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4179 if self._match_texts(("FIRST", "AFTER")): 4180 position = self._prev.text 4181 column_position = self.expression( 4182 exp.ColumnPosition, this=self._parse_column(), position=position 4183 ) 4184 expression.set("position", column_position) 4185 4186 return expression 4187 4188 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 4189 drop = self._match(TokenType.DROP) and self._parse_drop() 4190 if drop and not isinstance(drop, exp.Command): 4191 drop.set("kind", drop.args.get("kind", "COLUMN")) 4192 return drop 4193 4194 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4195 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 4196 return self.expression( 4197 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4198 ) 4199 4200 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 4201 this = None 4202 kind = self._prev.token_type 4203 4204 if kind == TokenType.CONSTRAINT: 4205 this = self._parse_id_var() 4206 4207 if self._match_text_seq("CHECK"): 4208 expression = self._parse_wrapped(self._parse_conjunction) 4209 enforced = self._match_text_seq("ENFORCED") 4210 4211 return self.expression( 4212 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4213 ) 4214 4215 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4216 expression = self._parse_foreign_key() 4217 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4218 expression = self._parse_primary_key() 4219 else: 4220 expression = None 4221 4222 return self.expression(exp.AddConstraint, this=this, expression=expression) 4223 4224 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4225 index = self._index - 1 4226 4227 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4228 return self._parse_csv(self._parse_add_constraint) 4229 4230 self._retreat(index) 4231 return self._parse_csv(self._parse_add_column) 4232 4233 def _parse_alter_table_alter(self) -> exp.Expression: 4234 self._match(TokenType.COLUMN) 4235 column = self._parse_field(any_token=True) 4236 4237 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4238 return self.expression(exp.AlterColumn, this=column, drop=True) 4239 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4240 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4241 4242 self._match_text_seq("SET", "DATA") 4243 return self.expression( 4244 exp.AlterColumn, 4245 this=column, 4246 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4247 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4248 using=self._match(TokenType.USING) and self._parse_conjunction(), 4249 ) 4250 4251 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4252 index = self._index - 1 4253 4254 partition_exists = self._parse_exists() 4255 if self._match(TokenType.PARTITION, advance=False): 4256 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4257 4258 self._retreat(index) 4259 return self._parse_csv(self._parse_drop_column) 4260 4261 def _parse_alter_table_rename(self) -> exp.Expression: 4262 self._match_text_seq("TO") 4263 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4264 4265 def _parse_alter(self) -> t.Optional[exp.Expression]: 4266 start = self._prev 4267 4268 if not self._match(TokenType.TABLE): 4269 return self._parse_as_command(start) 4270 4271 exists = self._parse_exists() 4272 this = self._parse_table(schema=True) 4273 4274 if self._next: 4275 self._advance() 4276 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4277 4278 if parser: 4279 actions = ensure_list(parser(self)) 4280 4281 if not self._curr: 4282 return self.expression( 4283 exp.AlterTable, 4284 this=this, 4285 exists=exists, 4286 actions=actions, 4287 ) 4288 return self._parse_as_command(start) 4289 4290 def _parse_merge(self) -> exp.Expression: 4291 self._match(TokenType.INTO) 4292 target = self._parse_table() 4293 4294 self._match(TokenType.USING) 4295 using = self._parse_table() 4296 4297 self._match(TokenType.ON) 4298 on = self._parse_conjunction() 4299 4300 whens = [] 4301 while self._match(TokenType.WHEN): 4302 matched = not self._match(TokenType.NOT) 4303 self._match_text_seq("MATCHED") 4304 source = ( 4305 False 4306 if self._match_text_seq("BY", "TARGET") 4307 else self._match_text_seq("BY", "SOURCE") 4308 ) 4309 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4310 4311 self._match(TokenType.THEN) 4312 4313 if self._match(TokenType.INSERT): 4314 _this = self._parse_star() 4315 if _this: 4316 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4317 else: 4318 then = self.expression( 4319 exp.Insert, 4320 this=self._parse_value(), 4321 expression=self._match(TokenType.VALUES) and self._parse_value(), 4322 ) 4323 elif self._match(TokenType.UPDATE): 4324 expressions = self._parse_star() 4325 if expressions: 4326 then = self.expression(exp.Update, expressions=expressions) 4327 else: 4328 then = self.expression( 4329 exp.Update, 4330 expressions=self._match(TokenType.SET) 4331 and self._parse_csv(self._parse_equality), 4332 ) 4333 elif self._match(TokenType.DELETE): 4334 then = self.expression(exp.Var, this=self._prev.text) 4335 else: 4336 then = None 4337 4338 whens.append( 4339 self.expression( 4340 exp.When, 4341 matched=matched, 4342 source=source, 4343 condition=condition, 4344 then=then, 4345 ) 4346 ) 4347 4348 return self.expression( 4349 exp.Merge, 4350 this=target, 4351 using=using, 4352 on=on, 4353 expressions=whens, 4354 ) 4355 4356 def _parse_show(self) -> t.Optional[exp.Expression]: 4357 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 4358 if parser: 4359 return parser(self) 4360 self._advance() 4361 return self.expression(exp.Show, this=self._prev.text.upper()) 4362 4363 def _parse_set_item_assignment( 4364 self, kind: t.Optional[str] = None 4365 ) -> t.Optional[exp.Expression]: 4366 index = self._index 4367 4368 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4369 return self._parse_set_transaction(global_=kind == "GLOBAL") 4370 4371 left = self._parse_primary() or self._parse_id_var() 4372 4373 if not self._match_texts(("=", "TO")): 4374 self._retreat(index) 4375 return None 4376 4377 right = self._parse_statement() or self._parse_id_var() 4378 this = self.expression( 4379 exp.EQ, 4380 this=left, 4381 expression=right, 4382 ) 4383 4384 return self.expression( 4385 exp.SetItem, 4386 this=this, 4387 kind=kind, 4388 ) 4389 4390 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4391 self._match_text_seq("TRANSACTION") 4392 characteristics = self._parse_csv( 4393 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4394 ) 4395 return self.expression( 4396 exp.SetItem, 4397 expressions=characteristics, 4398 kind="TRANSACTION", 4399 **{"global": global_}, # type: ignore 4400 ) 4401 4402 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4403 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 4404 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4405 4406 def _parse_set(self) -> exp.Expression: 4407 index = self._index 4408 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4409 4410 if self._curr: 4411 self._retreat(index) 4412 return self._parse_as_command(self._prev) 4413 4414 return set_ 4415 4416 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]: 4417 for option in options: 4418 if self._match_text_seq(*option.split(" ")): 4419 return exp.Var(this=option) 4420 return None 4421 4422 def _parse_as_command(self, start: Token) -> exp.Command: 4423 while self._curr: 4424 self._advance() 4425 text = self._find_sql(start, self._prev) 4426 size = len(start.text) 4427 return exp.Command(this=text[:size], expression=text[size:]) 4428 4429 def _find_parser( 4430 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4431 ) -> t.Optional[t.Callable]: 4432 if not self._curr: 4433 return None 4434 4435 index = self._index 4436 this = [] 4437 while True: 4438 # The current token might be multiple words 4439 curr = self._curr.text.upper() 4440 key = curr.split(" ") 4441 this.append(curr) 4442 self._advance() 4443 result, trie = in_trie(trie, key) 4444 if result == 0: 4445 break 4446 if result == 2: 4447 subparser = parsers[" ".join(this)] 4448 return subparser 4449 self._retreat(index) 4450 return None 4451 4452 def _match(self, token_type, advance=True, expression=None): 4453 if not self._curr: 4454 return None 4455 4456 if self._curr.token_type == token_type: 4457 if advance: 4458 self._advance() 4459 self._add_comments(expression) 4460 return True 4461 4462 return None 4463 4464 def _match_set(self, types, advance=True): 4465 if not self._curr: 4466 return None 4467 4468 if self._curr.token_type in types: 4469 if advance: 4470 self._advance() 4471 return True 4472 4473 return None 4474 4475 def _match_pair(self, token_type_a, token_type_b, advance=True): 4476 if not self._curr or not self._next: 4477 return None 4478 4479 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4480 if advance: 4481 self._advance(2) 4482 return True 4483 4484 return None 4485 4486 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4487 if not self._match(TokenType.L_PAREN, expression=expression): 4488 self.raise_error("Expecting (") 4489 4490 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4491 if not self._match(TokenType.R_PAREN, expression=expression): 4492 self.raise_error("Expecting )") 4493 4494 def _match_texts(self, texts, advance=True): 4495 if self._curr and self._curr.text.upper() in texts: 4496 if advance: 4497 self._advance() 4498 return True 4499 return False 4500 4501 def _match_text_seq(self, *texts, advance=True): 4502 index = self._index 4503 for text in texts: 4504 if self._curr and self._curr.text.upper() == text: 4505 self._advance() 4506 else: 4507 self._retreat(index) 4508 return False 4509 4510 if not advance: 4511 self._retreat(index) 4512 4513 return True 4514 4515 def _replace_columns_with_dots( 4516 self, this: t.Optional[exp.Expression] 4517 ) -> t.Optional[exp.Expression]: 4518 if isinstance(this, exp.Dot): 4519 exp.replace_children(this, self._replace_columns_with_dots) 4520 elif isinstance(this, exp.Column): 4521 exp.replace_children(this, self._replace_columns_with_dots) 4522 table = this.args.get("table") 4523 this = ( 4524 self.expression(exp.Dot, this=table, expression=this.this) 4525 if table 4526 else self.expression(exp.Var, this=this.name) 4527 ) 4528 elif isinstance(this, exp.Identifier): 4529 this = self.expression(exp.Var, this=this.name) 4530 4531 return this 4532 4533 def _replace_lambda( 4534 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4535 ) -> t.Optional[exp.Expression]: 4536 if not node: 4537 return node 4538 4539 for column in node.find_all(exp.Column): 4540 if column.parts[0].name in lambda_variables: 4541 dot_or_id = column.to_dot() if column.table else column.this 4542 parent = column.parent 4543 4544 while isinstance(parent, exp.Dot): 4545 if not isinstance(parent.parent, exp.Dot): 4546 parent.replace(dot_or_id) 4547 break 4548 parent = parent.parent 4549 else: 4550 if column is node: 4551 node = dot_or_id 4552 else: 4553 column.replace(dot_or_id) 4554 return node
Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer
and produces
a parsed syntax tree.
Arguments:
- error_level: the desired error level. Default: ErrorLevel.RAISE
- error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
- index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
- alias_post_tablesample: If the table alias comes after tablesample. Default: False
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
781 def __init__( 782 self, 783 error_level: t.Optional[ErrorLevel] = None, 784 error_message_context: int = 100, 785 index_offset: int = 0, 786 unnest_column_only: bool = False, 787 alias_post_tablesample: bool = False, 788 max_errors: int = 3, 789 null_ordering: t.Optional[str] = None, 790 ): 791 self.error_level = error_level or ErrorLevel.IMMEDIATE 792 self.error_message_context = error_message_context 793 self.index_offset = index_offset 794 self.unnest_column_only = unnest_column_only 795 self.alias_post_tablesample = alias_post_tablesample 796 self.max_errors = max_errors 797 self.null_ordering = null_ordering 798 self.reset()
810 def parse( 811 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 812 ) -> t.List[t.Optional[exp.Expression]]: 813 """ 814 Parses a list of tokens and returns a list of syntax trees, one tree 815 per parsed SQL statement. 816 817 Args: 818 raw_tokens: the list of tokens. 819 sql: the original SQL string, used to produce helpful debug messages. 820 821 Returns: 822 The list of syntax trees. 823 """ 824 return self._parse( 825 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 826 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The list of syntax trees.
828 def parse_into( 829 self, 830 expression_types: exp.IntoType, 831 raw_tokens: t.List[Token], 832 sql: t.Optional[str] = None, 833 ) -> t.List[t.Optional[exp.Expression]]: 834 """ 835 Parses a list of tokens into a given Expression type. If a collection of Expression 836 types is given instead, this method will try to parse the token list into each one 837 of them, stopping at the first for which the parsing succeeds. 838 839 Args: 840 expression_types: the expression type(s) to try and parse the token list into. 841 raw_tokens: the list of tokens. 842 sql: the original SQL string, used to produce helpful debug messages. 843 844 Returns: 845 The target Expression. 846 """ 847 errors = [] 848 for expression_type in ensure_collection(expression_types): 849 parser = self.EXPRESSION_PARSERS.get(expression_type) 850 if not parser: 851 raise TypeError(f"No parser registered for {expression_type}") 852 try: 853 return self._parse(parser, raw_tokens, sql) 854 except ParseError as e: 855 e.errors[0]["into_expression"] = expression_type 856 errors.append(e) 857 raise ParseError( 858 f"Failed to parse into {expression_types}", 859 errors=merge_errors(errors), 860 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: the expression type(s) to try and parse the token list into.
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
896 def check_errors(self) -> None: 897 """ 898 Logs or raises any found errors, depending on the chosen error level setting. 899 """ 900 if self.error_level == ErrorLevel.WARN: 901 for error in self.errors: 902 logger.error(str(error)) 903 elif self.error_level == ErrorLevel.RAISE and self.errors: 904 raise ParseError( 905 concat_messages(self.errors, self.max_errors), 906 errors=merge_errors(self.errors), 907 )
Logs or raises any found errors, depending on the chosen error level setting.
909 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 910 """ 911 Appends an error in the list of recorded errors or raises it, depending on the chosen 912 error level setting. 913 """ 914 token = token or self._curr or self._prev or Token.string("") 915 start = token.start 916 end = token.end + 1 917 start_context = self.sql[max(start - self.error_message_context, 0) : start] 918 highlight = self.sql[start:end] 919 end_context = self.sql[end : end + self.error_message_context] 920 921 error = ParseError.new( 922 f"{message}. Line {token.line}, Col: {token.col}.\n" 923 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 924 description=message, 925 line=token.line, 926 col=token.col, 927 start_context=start_context, 928 highlight=highlight, 929 end_context=end_context, 930 ) 931 932 if self.error_level == ErrorLevel.IMMEDIATE: 933 raise error 934 935 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
937 def expression( 938 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 939 ) -> E: 940 """ 941 Creates a new, validated Expression. 942 943 Args: 944 exp_class: the expression class to instantiate. 945 comments: an optional list of comments to attach to the expression. 946 kwargs: the arguments to set for the expression along with their respective values. 947 948 Returns: 949 The target expression. 950 """ 951 instance = exp_class(**kwargs) 952 instance.add_comments(comments) if comments else self._add_comments(instance) 953 self.validate_expression(instance) 954 return instance
Creates a new, validated Expression.
Arguments:
- exp_class: the expression class to instantiate.
- comments: an optional list of comments to attach to the expression.
- kwargs: the arguments to set for the expression along with their respective values.
Returns:
The target expression.
961 def validate_expression( 962 self, expression: exp.Expression, args: t.Optional[t.List] = None 963 ) -> None: 964 """ 965 Validates an already instantiated expression, making sure that all its mandatory arguments 966 are set. 967 968 Args: 969 expression: the expression to validate. 970 args: an optional list of items that was used to instantiate the expression, if it's a Func. 971 """ 972 if self.error_level == ErrorLevel.IGNORE: 973 return 974 975 for error_message in expression.error_messages(args): 976 self.raise_error(error_message)
Validates an already instantiated expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: the expression to validate.
- args: an optional list of items that was used to instantiate the expression, if it's a Func.