sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_collection, ensure_list, seq_get 10from sqlglot.tokens import Token, Tokenizer, TokenType 11from sqlglot.trie import in_trie, new_trie 12 13if t.TYPE_CHECKING: 14 from sqlglot._typing import E 15 16logger = logging.getLogger("sqlglot") 17 18 19def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 20 if len(args) == 1 and args[0].is_star: 21 return exp.StarMap(this=args[0]) 22 23 keys = [] 24 values = [] 25 for i in range(0, len(args), 2): 26 keys.append(args[i]) 27 values.append(args[i + 1]) 28 return exp.VarMap( 29 keys=exp.Array(expressions=keys), 30 values=exp.Array(expressions=values), 31 ) 32 33 34def parse_like(args: t.List) -> exp.Expression: 35 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 36 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 37 38 39def binary_range_parser( 40 expr_type: t.Type[exp.Expression], 41) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 42 return lambda self, this: self._parse_escape( 43 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 44 ) 45 46 47class _Parser(type): 48 def __new__(cls, clsname, bases, attrs): 49 klass = super().__new__(cls, clsname, bases, attrs) 50 klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 51 klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS) 52 53 return klass 54 55 56class Parser(metaclass=_Parser): 57 """ 58 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 59 a parsed syntax tree. 60 61 Args: 62 error_level: the desired error level. 63 Default: ErrorLevel.IMMEDIATE 64 error_message_context: determines the amount of context to capture from a 65 query string when displaying the error message (in number of characters). 66 Default: 50. 67 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 68 Default: 0 69 alias_post_tablesample: If the table alias comes after tablesample. 70 Default: False 71 max_errors: Maximum number of error messages to include in a raised ParseError. 72 This is only relevant if error_level is ErrorLevel.RAISE. 73 Default: 3 74 null_ordering: Indicates the default null ordering method to use if not explicitly set. 75 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 76 Default: "nulls_are_small" 77 """ 78 79 FUNCTIONS: t.Dict[str, t.Callable] = { 80 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 81 "DATE_TO_DATE_STR": lambda args: exp.Cast( 82 this=seq_get(args, 0), 83 to=exp.DataType(this=exp.DataType.Type.TEXT), 84 ), 85 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 86 "IFNULL": exp.Coalesce.from_arg_list, 87 "LIKE": parse_like, 88 "TIME_TO_TIME_STR": lambda args: exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 93 this=exp.Cast( 94 this=seq_get(args, 0), 95 to=exp.DataType(this=exp.DataType.Type.TEXT), 96 ), 97 start=exp.Literal.number(1), 98 length=exp.Literal.number(10), 99 ), 100 "VAR_MAP": parse_var_map, 101 } 102 103 NO_PAREN_FUNCTIONS = { 104 TokenType.CURRENT_DATE: exp.CurrentDate, 105 TokenType.CURRENT_DATETIME: exp.CurrentDate, 106 TokenType.CURRENT_TIME: exp.CurrentTime, 107 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 108 TokenType.CURRENT_USER: exp.CurrentUser, 109 } 110 111 JOIN_HINTS: t.Set[str] = set() 112 113 NESTED_TYPE_TOKENS = { 114 TokenType.ARRAY, 115 TokenType.MAP, 116 TokenType.NULLABLE, 117 TokenType.STRUCT, 118 } 119 120 TYPE_TOKENS = { 121 TokenType.BIT, 122 TokenType.BOOLEAN, 123 TokenType.TINYINT, 124 TokenType.UTINYINT, 125 TokenType.SMALLINT, 126 TokenType.USMALLINT, 127 TokenType.INT, 128 TokenType.UINT, 129 TokenType.BIGINT, 130 TokenType.UBIGINT, 131 TokenType.INT128, 132 TokenType.UINT128, 133 TokenType.INT256, 134 TokenType.UINT256, 135 TokenType.FLOAT, 136 TokenType.DOUBLE, 137 TokenType.CHAR, 138 TokenType.NCHAR, 139 TokenType.VARCHAR, 140 TokenType.NVARCHAR, 141 TokenType.TEXT, 142 TokenType.MEDIUMTEXT, 143 TokenType.LONGTEXT, 144 TokenType.MEDIUMBLOB, 145 TokenType.LONGBLOB, 146 TokenType.BINARY, 147 TokenType.VARBINARY, 148 TokenType.JSON, 149 TokenType.JSONB, 150 TokenType.INTERVAL, 151 TokenType.TIME, 152 TokenType.TIMESTAMP, 153 TokenType.TIMESTAMPTZ, 154 TokenType.TIMESTAMPLTZ, 155 TokenType.DATETIME, 156 TokenType.DATETIME64, 157 TokenType.DATE, 158 TokenType.INT4RANGE, 159 TokenType.INT4MULTIRANGE, 160 TokenType.INT8RANGE, 161 TokenType.INT8MULTIRANGE, 162 TokenType.NUMRANGE, 163 TokenType.NUMMULTIRANGE, 164 TokenType.TSRANGE, 165 TokenType.TSMULTIRANGE, 166 TokenType.TSTZRANGE, 167 TokenType.TSTZMULTIRANGE, 168 TokenType.DATERANGE, 169 TokenType.DATEMULTIRANGE, 170 TokenType.DECIMAL, 171 TokenType.BIGDECIMAL, 172 TokenType.UUID, 173 TokenType.GEOGRAPHY, 174 TokenType.GEOMETRY, 175 TokenType.HLLSKETCH, 176 TokenType.HSTORE, 177 TokenType.PSEUDO_TYPE, 178 TokenType.SUPER, 179 TokenType.SERIAL, 180 TokenType.SMALLSERIAL, 181 TokenType.BIGSERIAL, 182 TokenType.XML, 183 TokenType.UNIQUEIDENTIFIER, 184 TokenType.MONEY, 185 TokenType.SMALLMONEY, 186 TokenType.ROWVERSION, 187 TokenType.IMAGE, 188 TokenType.VARIANT, 189 TokenType.OBJECT, 190 TokenType.INET, 191 *NESTED_TYPE_TOKENS, 192 } 193 194 SUBQUERY_PREDICATES = { 195 TokenType.ANY: exp.Any, 196 TokenType.ALL: exp.All, 197 TokenType.EXISTS: exp.Exists, 198 TokenType.SOME: exp.Any, 199 } 200 201 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 202 203 DB_CREATABLES = { 204 TokenType.DATABASE, 205 TokenType.SCHEMA, 206 TokenType.TABLE, 207 TokenType.VIEW, 208 TokenType.DICTIONARY, 209 } 210 211 CREATABLES = { 212 TokenType.COLUMN, 213 TokenType.FUNCTION, 214 TokenType.INDEX, 215 TokenType.PROCEDURE, 216 *DB_CREATABLES, 217 } 218 219 ID_VAR_TOKENS = { 220 TokenType.VAR, 221 TokenType.ANTI, 222 TokenType.APPLY, 223 TokenType.ASC, 224 TokenType.AUTO_INCREMENT, 225 TokenType.BEGIN, 226 TokenType.CACHE, 227 TokenType.COLLATE, 228 TokenType.COMMAND, 229 TokenType.COMMENT, 230 TokenType.COMMIT, 231 TokenType.CONSTRAINT, 232 TokenType.DEFAULT, 233 TokenType.DELETE, 234 TokenType.DESC, 235 TokenType.DESCRIBE, 236 TokenType.DICTIONARY, 237 TokenType.DIV, 238 TokenType.END, 239 TokenType.EXECUTE, 240 TokenType.ESCAPE, 241 TokenType.FALSE, 242 TokenType.FIRST, 243 TokenType.FILTER, 244 TokenType.FORMAT, 245 TokenType.FULL, 246 TokenType.IF, 247 TokenType.IS, 248 TokenType.ISNULL, 249 TokenType.INTERVAL, 250 TokenType.KEEP, 251 TokenType.LEFT, 252 TokenType.LOAD, 253 TokenType.MERGE, 254 TokenType.NATURAL, 255 TokenType.NEXT, 256 TokenType.OFFSET, 257 TokenType.ORDINALITY, 258 TokenType.OVERWRITE, 259 TokenType.PARTITION, 260 TokenType.PERCENT, 261 TokenType.PIVOT, 262 TokenType.PRAGMA, 263 TokenType.RANGE, 264 TokenType.REFERENCES, 265 TokenType.RIGHT, 266 TokenType.ROW, 267 TokenType.ROWS, 268 TokenType.SEMI, 269 TokenType.SET, 270 TokenType.SETTINGS, 271 TokenType.SHOW, 272 TokenType.TEMPORARY, 273 TokenType.TOP, 274 TokenType.TRUE, 275 TokenType.UNIQUE, 276 TokenType.UNPIVOT, 277 TokenType.VOLATILE, 278 TokenType.WINDOW, 279 *CREATABLES, 280 *SUBQUERY_PREDICATES, 281 *TYPE_TOKENS, 282 *NO_PAREN_FUNCTIONS, 283 } 284 285 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 286 287 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 288 TokenType.APPLY, 289 TokenType.ASOF, 290 TokenType.FULL, 291 TokenType.LEFT, 292 TokenType.LOCK, 293 TokenType.NATURAL, 294 TokenType.OFFSET, 295 TokenType.RIGHT, 296 TokenType.WINDOW, 297 } 298 299 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 300 301 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 302 303 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 304 305 FUNC_TOKENS = { 306 TokenType.COMMAND, 307 TokenType.CURRENT_DATE, 308 TokenType.CURRENT_DATETIME, 309 TokenType.CURRENT_TIMESTAMP, 310 TokenType.CURRENT_TIME, 311 TokenType.CURRENT_USER, 312 TokenType.FILTER, 313 TokenType.FIRST, 314 TokenType.FORMAT, 315 TokenType.GLOB, 316 TokenType.IDENTIFIER, 317 TokenType.INDEX, 318 TokenType.ISNULL, 319 TokenType.ILIKE, 320 TokenType.LIKE, 321 TokenType.MERGE, 322 TokenType.OFFSET, 323 TokenType.PRIMARY_KEY, 324 TokenType.RANGE, 325 TokenType.REPLACE, 326 TokenType.ROW, 327 TokenType.UNNEST, 328 TokenType.VAR, 329 TokenType.LEFT, 330 TokenType.RIGHT, 331 TokenType.DATE, 332 TokenType.DATETIME, 333 TokenType.TABLE, 334 TokenType.TIMESTAMP, 335 TokenType.TIMESTAMPTZ, 336 TokenType.WINDOW, 337 *TYPE_TOKENS, 338 *SUBQUERY_PREDICATES, 339 } 340 341 CONJUNCTION = { 342 TokenType.AND: exp.And, 343 TokenType.OR: exp.Or, 344 } 345 346 EQUALITY = { 347 TokenType.EQ: exp.EQ, 348 TokenType.NEQ: exp.NEQ, 349 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 350 } 351 352 COMPARISON = { 353 TokenType.GT: exp.GT, 354 TokenType.GTE: exp.GTE, 355 TokenType.LT: exp.LT, 356 TokenType.LTE: exp.LTE, 357 } 358 359 BITWISE = { 360 TokenType.AMP: exp.BitwiseAnd, 361 TokenType.CARET: exp.BitwiseXor, 362 TokenType.PIPE: exp.BitwiseOr, 363 TokenType.DPIPE: exp.DPipe, 364 } 365 366 TERM = { 367 TokenType.DASH: exp.Sub, 368 TokenType.PLUS: exp.Add, 369 TokenType.MOD: exp.Mod, 370 TokenType.COLLATE: exp.Collate, 371 } 372 373 FACTOR = { 374 TokenType.DIV: exp.IntDiv, 375 TokenType.LR_ARROW: exp.Distance, 376 TokenType.SLASH: exp.Div, 377 TokenType.STAR: exp.Mul, 378 } 379 380 TIMESTAMPS = { 381 TokenType.TIME, 382 TokenType.TIMESTAMP, 383 TokenType.TIMESTAMPTZ, 384 TokenType.TIMESTAMPLTZ, 385 } 386 387 SET_OPERATIONS = { 388 TokenType.UNION, 389 TokenType.INTERSECT, 390 TokenType.EXCEPT, 391 } 392 393 JOIN_METHODS = { 394 TokenType.NATURAL, 395 TokenType.ASOF, 396 } 397 398 JOIN_SIDES = { 399 TokenType.LEFT, 400 TokenType.RIGHT, 401 TokenType.FULL, 402 } 403 404 JOIN_KINDS = { 405 TokenType.INNER, 406 TokenType.OUTER, 407 TokenType.CROSS, 408 TokenType.SEMI, 409 TokenType.ANTI, 410 } 411 412 LAMBDAS = { 413 TokenType.ARROW: lambda self, expressions: self.expression( 414 exp.Lambda, 415 this=self._replace_lambda( 416 self._parse_conjunction(), 417 {node.name for node in expressions}, 418 ), 419 expressions=expressions, 420 ), 421 TokenType.FARROW: lambda self, expressions: self.expression( 422 exp.Kwarg, 423 this=exp.Var(this=expressions[0].name), 424 expression=self._parse_conjunction(), 425 ), 426 } 427 428 COLUMN_OPERATORS = { 429 TokenType.DOT: None, 430 TokenType.DCOLON: lambda self, this, to: self.expression( 431 exp.Cast if self.STRICT_CAST else exp.TryCast, 432 this=this, 433 to=to, 434 ), 435 TokenType.ARROW: lambda self, this, path: self.expression( 436 exp.JSONExtract, 437 this=this, 438 expression=path, 439 ), 440 TokenType.DARROW: lambda self, this, path: self.expression( 441 exp.JSONExtractScalar, 442 this=this, 443 expression=path, 444 ), 445 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 446 exp.JSONBExtract, 447 this=this, 448 expression=path, 449 ), 450 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 451 exp.JSONBExtractScalar, 452 this=this, 453 expression=path, 454 ), 455 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 456 exp.JSONBContains, 457 this=this, 458 expression=key, 459 ), 460 } 461 462 EXPRESSION_PARSERS = { 463 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"), 464 exp.Column: lambda self: self._parse_column(), 465 exp.Condition: lambda self: self._parse_conjunction(), 466 exp.DataType: lambda self: self._parse_types(), 467 exp.Expression: lambda self: self._parse_statement(), 468 exp.From: lambda self: self._parse_from(), 469 exp.Group: lambda self: self._parse_group(), 470 exp.Having: lambda self: self._parse_having(), 471 exp.Identifier: lambda self: self._parse_id_var(), 472 exp.Join: lambda self: self._parse_join(), 473 exp.Lambda: lambda self: self._parse_lambda(), 474 exp.Lateral: lambda self: self._parse_lateral(), 475 exp.Limit: lambda self: self._parse_limit(), 476 exp.Offset: lambda self: self._parse_offset(), 477 exp.Order: lambda self: self._parse_order(), 478 exp.Ordered: lambda self: self._parse_ordered(), 479 exp.Properties: lambda self: self._parse_properties(), 480 exp.Qualify: lambda self: self._parse_qualify(), 481 exp.Returning: lambda self: self._parse_returning(), 482 exp.Sort: lambda self: self._parse_sort(exp.Sort, "SORT", "BY"), 483 exp.Table: lambda self: self._parse_table_parts(), 484 exp.TableAlias: lambda self: self._parse_table_alias(), 485 exp.Where: lambda self: self._parse_where(), 486 exp.Window: lambda self: self._parse_named_window(), 487 exp.With: lambda self: self._parse_with(), 488 "JOIN_TYPE": lambda self: self._parse_join_parts(), 489 } 490 491 STATEMENT_PARSERS = { 492 TokenType.ALTER: lambda self: self._parse_alter(), 493 TokenType.BEGIN: lambda self: self._parse_transaction(), 494 TokenType.CACHE: lambda self: self._parse_cache(), 495 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 496 TokenType.COMMENT: lambda self: self._parse_comment(), 497 TokenType.CREATE: lambda self: self._parse_create(), 498 TokenType.DELETE: lambda self: self._parse_delete(), 499 TokenType.DESC: lambda self: self._parse_describe(), 500 TokenType.DESCRIBE: lambda self: self._parse_describe(), 501 TokenType.DROP: lambda self: self._parse_drop(), 502 TokenType.END: lambda self: self._parse_commit_or_rollback(), 503 TokenType.FROM: lambda self: exp.select("*").from_( 504 t.cast(exp.From, self._parse_from(skip_from_token=True)) 505 ), 506 TokenType.INSERT: lambda self: self._parse_insert(), 507 TokenType.LOAD: lambda self: self._parse_load(), 508 TokenType.MERGE: lambda self: self._parse_merge(), 509 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 510 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 511 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 512 TokenType.SET: lambda self: self._parse_set(), 513 TokenType.UNCACHE: lambda self: self._parse_uncache(), 514 TokenType.UPDATE: lambda self: self._parse_update(), 515 TokenType.USE: lambda self: self.expression( 516 exp.Use, 517 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 518 and exp.Var(this=self._prev.text), 519 this=self._parse_table(schema=False), 520 ), 521 } 522 523 UNARY_PARSERS = { 524 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 525 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 526 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 527 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 528 } 529 530 PRIMARY_PARSERS = { 531 TokenType.STRING: lambda self, token: self.expression( 532 exp.Literal, this=token.text, is_string=True 533 ), 534 TokenType.NUMBER: lambda self, token: self.expression( 535 exp.Literal, this=token.text, is_string=False 536 ), 537 TokenType.STAR: lambda self, _: self.expression( 538 exp.Star, 539 **{"except": self._parse_except(), "replace": self._parse_replace()}, 540 ), 541 TokenType.NULL: lambda self, _: self.expression(exp.Null), 542 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 543 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 544 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 545 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 546 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 547 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 548 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 549 exp.National, this=token.text 550 ), 551 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 552 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 553 } 554 555 PLACEHOLDER_PARSERS = { 556 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 557 TokenType.PARAMETER: lambda self: self._parse_parameter(), 558 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 559 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 560 else None, 561 } 562 563 RANGE_PARSERS = { 564 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 565 TokenType.GLOB: binary_range_parser(exp.Glob), 566 TokenType.ILIKE: binary_range_parser(exp.ILike), 567 TokenType.IN: lambda self, this: self._parse_in(this), 568 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 569 TokenType.IS: lambda self, this: self._parse_is(this), 570 TokenType.LIKE: binary_range_parser(exp.Like), 571 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 572 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 573 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 574 } 575 576 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 577 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 578 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 579 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 580 "CHARACTER SET": lambda self: self._parse_character_set(), 581 "CHECKSUM": lambda self: self._parse_checksum(), 582 "CLUSTER": lambda self: self._parse_cluster(), 583 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 584 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 585 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 586 "DEFINER": lambda self: self._parse_definer(), 587 "DETERMINISTIC": lambda self: self.expression( 588 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 589 ), 590 "DISTKEY": lambda self: self._parse_distkey(), 591 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 592 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 593 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 594 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 595 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 596 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 597 "FREESPACE": lambda self: self._parse_freespace(), 598 "IMMUTABLE": lambda self: self.expression( 599 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 600 ), 601 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 602 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 603 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 604 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 605 "LIKE": lambda self: self._parse_create_like(), 606 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 607 "LOCK": lambda self: self._parse_locking(), 608 "LOCKING": lambda self: self._parse_locking(), 609 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 610 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 611 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 612 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 613 "NO": lambda self: self._parse_no_property(), 614 "ON": lambda self: self._parse_on_property(), 615 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 616 "PARTITION BY": lambda self: self._parse_partitioned_by(), 617 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 618 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 619 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 620 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 621 "RETURNS": lambda self: self._parse_returns(), 622 "ROW": lambda self: self._parse_row(), 623 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 624 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 625 "SETTINGS": lambda self: self.expression( 626 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 627 ), 628 "SORTKEY": lambda self: self._parse_sortkey(), 629 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 630 "STABLE": lambda self: self.expression( 631 exp.StabilityProperty, this=exp.Literal.string("STABLE") 632 ), 633 "STORED": lambda self: self._parse_stored(), 634 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 635 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 636 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 637 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 638 "TTL": lambda self: self._parse_ttl(), 639 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 640 "VOLATILE": lambda self: self._parse_volatile_property(), 641 "WITH": lambda self: self._parse_with_property(), 642 } 643 644 CONSTRAINT_PARSERS = { 645 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 646 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 647 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 648 "CHARACTER SET": lambda self: self.expression( 649 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 650 ), 651 "CHECK": lambda self: self.expression( 652 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 653 ), 654 "COLLATE": lambda self: self.expression( 655 exp.CollateColumnConstraint, this=self._parse_var() 656 ), 657 "COMMENT": lambda self: self.expression( 658 exp.CommentColumnConstraint, this=self._parse_string() 659 ), 660 "COMPRESS": lambda self: self._parse_compress(), 661 "DEFAULT": lambda self: self.expression( 662 exp.DefaultColumnConstraint, this=self._parse_bitwise() 663 ), 664 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 665 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 666 "FORMAT": lambda self: self.expression( 667 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 668 ), 669 "GENERATED": lambda self: self._parse_generated_as_identity(), 670 "IDENTITY": lambda self: self._parse_auto_increment(), 671 "INLINE": lambda self: self._parse_inline(), 672 "LIKE": lambda self: self._parse_create_like(), 673 "NOT": lambda self: self._parse_not_constraint(), 674 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 675 "ON": lambda self: self._match(TokenType.UPDATE) 676 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 677 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 678 "PRIMARY KEY": lambda self: self._parse_primary_key(), 679 "REFERENCES": lambda self: self._parse_references(match=False), 680 "TITLE": lambda self: self.expression( 681 exp.TitleColumnConstraint, this=self._parse_var_or_string() 682 ), 683 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 684 "UNIQUE": lambda self: self._parse_unique(), 685 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 686 } 687 688 ALTER_PARSERS = { 689 "ADD": lambda self: self._parse_alter_table_add(), 690 "ALTER": lambda self: self._parse_alter_table_alter(), 691 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 692 "DROP": lambda self: self._parse_alter_table_drop(), 693 "RENAME": lambda self: self._parse_alter_table_rename(), 694 } 695 696 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 697 698 NO_PAREN_FUNCTION_PARSERS = { 699 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 700 TokenType.CASE: lambda self: self._parse_case(), 701 TokenType.IF: lambda self: self._parse_if(), 702 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 703 exp.NextValueFor, 704 this=self._parse_column(), 705 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 706 ), 707 } 708 709 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 710 711 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 712 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 713 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 714 "DECODE": lambda self: self._parse_decode(), 715 "EXTRACT": lambda self: self._parse_extract(), 716 "JSON_OBJECT": lambda self: self._parse_json_object(), 717 "LOG": lambda self: self._parse_logarithm(), 718 "MATCH": lambda self: self._parse_match_against(), 719 "OPENJSON": lambda self: self._parse_open_json(), 720 "POSITION": lambda self: self._parse_position(), 721 "SAFE_CAST": lambda self: self._parse_cast(False), 722 "STRING_AGG": lambda self: self._parse_string_agg(), 723 "SUBSTRING": lambda self: self._parse_substring(), 724 "TRIM": lambda self: self._parse_trim(), 725 "TRY_CAST": lambda self: self._parse_cast(False), 726 "TRY_CONVERT": lambda self: self._parse_convert(False), 727 } 728 729 QUERY_MODIFIER_PARSERS = { 730 "joins": lambda self: list(iter(self._parse_join, None)), 731 "laterals": lambda self: list(iter(self._parse_lateral, None)), 732 "match": lambda self: self._parse_match_recognize(), 733 "where": lambda self: self._parse_where(), 734 "group": lambda self: self._parse_group(), 735 "having": lambda self: self._parse_having(), 736 "qualify": lambda self: self._parse_qualify(), 737 "windows": lambda self: self._parse_window_clause(), 738 "order": lambda self: self._parse_order(), 739 "limit": lambda self: self._parse_limit(), 740 "offset": lambda self: self._parse_offset(), 741 "locks": lambda self: self._parse_locks(), 742 "sample": lambda self: self._parse_table_sample(as_modifier=True), 743 } 744 745 SET_PARSERS = { 746 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 747 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 748 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 749 "TRANSACTION": lambda self: self._parse_set_transaction(), 750 } 751 752 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 753 754 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 755 756 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 757 758 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 759 760 TRANSACTION_CHARACTERISTICS = { 761 "ISOLATION LEVEL REPEATABLE READ", 762 "ISOLATION LEVEL READ COMMITTED", 763 "ISOLATION LEVEL READ UNCOMMITTED", 764 "ISOLATION LEVEL SERIALIZABLE", 765 "READ WRITE", 766 "READ ONLY", 767 } 768 769 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 770 771 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 772 773 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 774 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 775 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 776 777 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 778 779 STRICT_CAST = True 780 781 CONVERT_TYPE_FIRST = False 782 783 PREFIXED_PIVOT_COLUMNS = False 784 IDENTIFY_PIVOT_STRINGS = False 785 786 LOG_BASE_FIRST = True 787 LOG_DEFAULTS_TO_LN = False 788 789 __slots__ = ( 790 "error_level", 791 "error_message_context", 792 "sql", 793 "errors", 794 "index_offset", 795 "unnest_column_only", 796 "alias_post_tablesample", 797 "max_errors", 798 "null_ordering", 799 "_tokens", 800 "_index", 801 "_curr", 802 "_next", 803 "_prev", 804 "_prev_comments", 805 "_show_trie", 806 "_set_trie", 807 ) 808 809 def __init__( 810 self, 811 error_level: t.Optional[ErrorLevel] = None, 812 error_message_context: int = 100, 813 index_offset: int = 0, 814 unnest_column_only: bool = False, 815 alias_post_tablesample: bool = False, 816 max_errors: int = 3, 817 null_ordering: t.Optional[str] = None, 818 ): 819 self.error_level = error_level or ErrorLevel.IMMEDIATE 820 self.error_message_context = error_message_context 821 self.index_offset = index_offset 822 self.unnest_column_only = unnest_column_only 823 self.alias_post_tablesample = alias_post_tablesample 824 self.max_errors = max_errors 825 self.null_ordering = null_ordering 826 self.reset() 827 828 def reset(self): 829 self.sql = "" 830 self.errors = [] 831 self._tokens = [] 832 self._index = 0 833 self._curr = None 834 self._next = None 835 self._prev = None 836 self._prev_comments = None 837 838 def parse( 839 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 840 ) -> t.List[t.Optional[exp.Expression]]: 841 """ 842 Parses a list of tokens and returns a list of syntax trees, one tree 843 per parsed SQL statement. 844 845 Args: 846 raw_tokens: the list of tokens. 847 sql: the original SQL string, used to produce helpful debug messages. 848 849 Returns: 850 The list of syntax trees. 851 """ 852 return self._parse( 853 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 854 ) 855 856 def parse_into( 857 self, 858 expression_types: exp.IntoType, 859 raw_tokens: t.List[Token], 860 sql: t.Optional[str] = None, 861 ) -> t.List[t.Optional[exp.Expression]]: 862 """ 863 Parses a list of tokens into a given Expression type. If a collection of Expression 864 types is given instead, this method will try to parse the token list into each one 865 of them, stopping at the first for which the parsing succeeds. 866 867 Args: 868 expression_types: the expression type(s) to try and parse the token list into. 869 raw_tokens: the list of tokens. 870 sql: the original SQL string, used to produce helpful debug messages. 871 872 Returns: 873 The target Expression. 874 """ 875 errors = [] 876 for expression_type in ensure_collection(expression_types): 877 parser = self.EXPRESSION_PARSERS.get(expression_type) 878 if not parser: 879 raise TypeError(f"No parser registered for {expression_type}") 880 try: 881 return self._parse(parser, raw_tokens, sql) 882 except ParseError as e: 883 e.errors[0]["into_expression"] = expression_type 884 errors.append(e) 885 raise ParseError( 886 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 887 errors=merge_errors(errors), 888 ) from errors[-1] 889 890 def _parse( 891 self, 892 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 893 raw_tokens: t.List[Token], 894 sql: t.Optional[str] = None, 895 ) -> t.List[t.Optional[exp.Expression]]: 896 self.reset() 897 self.sql = sql or "" 898 total = len(raw_tokens) 899 chunks: t.List[t.List[Token]] = [[]] 900 901 for i, token in enumerate(raw_tokens): 902 if token.token_type == TokenType.SEMICOLON: 903 if i < total - 1: 904 chunks.append([]) 905 else: 906 chunks[-1].append(token) 907 908 expressions = [] 909 910 for tokens in chunks: 911 self._index = -1 912 self._tokens = tokens 913 self._advance() 914 915 expressions.append(parse_method(self)) 916 917 if self._index < len(self._tokens): 918 self.raise_error("Invalid expression / Unexpected token") 919 920 self.check_errors() 921 922 return expressions 923 924 def check_errors(self) -> None: 925 """ 926 Logs or raises any found errors, depending on the chosen error level setting. 927 """ 928 if self.error_level == ErrorLevel.WARN: 929 for error in self.errors: 930 logger.error(str(error)) 931 elif self.error_level == ErrorLevel.RAISE and self.errors: 932 raise ParseError( 933 concat_messages(self.errors, self.max_errors), 934 errors=merge_errors(self.errors), 935 ) 936 937 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 938 """ 939 Appends an error in the list of recorded errors or raises it, depending on the chosen 940 error level setting. 941 """ 942 token = token or self._curr or self._prev or Token.string("") 943 start = token.start 944 end = token.end + 1 945 start_context = self.sql[max(start - self.error_message_context, 0) : start] 946 highlight = self.sql[start:end] 947 end_context = self.sql[end : end + self.error_message_context] 948 949 error = ParseError.new( 950 f"{message}. Line {token.line}, Col: {token.col}.\n" 951 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 952 description=message, 953 line=token.line, 954 col=token.col, 955 start_context=start_context, 956 highlight=highlight, 957 end_context=end_context, 958 ) 959 960 if self.error_level == ErrorLevel.IMMEDIATE: 961 raise error 962 963 self.errors.append(error) 964 965 def expression( 966 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 967 ) -> E: 968 """ 969 Creates a new, validated Expression. 970 971 Args: 972 exp_class: the expression class to instantiate. 973 comments: an optional list of comments to attach to the expression. 974 kwargs: the arguments to set for the expression along with their respective values. 975 976 Returns: 977 The target expression. 978 """ 979 instance = exp_class(**kwargs) 980 instance.add_comments(comments) if comments else self._add_comments(instance) 981 self.validate_expression(instance) 982 return instance 983 984 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 985 if expression and self._prev_comments: 986 expression.add_comments(self._prev_comments) 987 self._prev_comments = None 988 989 def validate_expression( 990 self, expression: exp.Expression, args: t.Optional[t.List] = None 991 ) -> None: 992 """ 993 Validates an already instantiated expression, making sure that all its mandatory arguments 994 are set. 995 996 Args: 997 expression: the expression to validate. 998 args: an optional list of items that was used to instantiate the expression, if it's a Func. 999 """ 1000 if self.error_level == ErrorLevel.IGNORE: 1001 return 1002 1003 for error_message in expression.error_messages(args): 1004 self.raise_error(error_message) 1005 1006 def _find_sql(self, start: Token, end: Token) -> str: 1007 return self.sql[start.start : end.end + 1] 1008 1009 def _advance(self, times: int = 1) -> None: 1010 self._index += times 1011 self._curr = seq_get(self._tokens, self._index) 1012 self._next = seq_get(self._tokens, self._index + 1) 1013 if self._index > 0: 1014 self._prev = self._tokens[self._index - 1] 1015 self._prev_comments = self._prev.comments 1016 else: 1017 self._prev = None 1018 self._prev_comments = None 1019 1020 def _retreat(self, index: int) -> None: 1021 if index != self._index: 1022 self._advance(index - self._index) 1023 1024 def _parse_command(self) -> exp.Command: 1025 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1026 1027 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1028 start = self._prev 1029 exists = self._parse_exists() if allow_exists else None 1030 1031 self._match(TokenType.ON) 1032 1033 kind = self._match_set(self.CREATABLES) and self._prev 1034 1035 if not kind: 1036 return self._parse_as_command(start) 1037 1038 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1039 this = self._parse_user_defined_function(kind=kind.token_type) 1040 elif kind.token_type == TokenType.TABLE: 1041 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1042 elif kind.token_type == TokenType.COLUMN: 1043 this = self._parse_column() 1044 else: 1045 this = self._parse_id_var() 1046 1047 self._match(TokenType.IS) 1048 1049 return self.expression( 1050 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1051 ) 1052 1053 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1054 def _parse_ttl(self) -> exp.Expression: 1055 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1056 this = self._parse_bitwise() 1057 1058 if self._match_text_seq("DELETE"): 1059 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1060 if self._match_text_seq("RECOMPRESS"): 1061 return self.expression( 1062 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1063 ) 1064 if self._match_text_seq("TO", "DISK"): 1065 return self.expression( 1066 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1067 ) 1068 if self._match_text_seq("TO", "VOLUME"): 1069 return self.expression( 1070 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1071 ) 1072 1073 return this 1074 1075 expressions = self._parse_csv(_parse_ttl_action) 1076 where = self._parse_where() 1077 group = self._parse_group() 1078 1079 aggregates = None 1080 if group and self._match(TokenType.SET): 1081 aggregates = self._parse_csv(self._parse_set_item) 1082 1083 return self.expression( 1084 exp.MergeTreeTTL, 1085 expressions=expressions, 1086 where=where, 1087 group=group, 1088 aggregates=aggregates, 1089 ) 1090 1091 def _parse_statement(self) -> t.Optional[exp.Expression]: 1092 if self._curr is None: 1093 return None 1094 1095 if self._match_set(self.STATEMENT_PARSERS): 1096 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1097 1098 if self._match_set(Tokenizer.COMMANDS): 1099 return self._parse_command() 1100 1101 expression = self._parse_expression() 1102 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1103 return self._parse_query_modifiers(expression) 1104 1105 def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]: 1106 start = self._prev 1107 temporary = self._match(TokenType.TEMPORARY) 1108 materialized = self._match_text_seq("MATERIALIZED") 1109 kind = self._match_set(self.CREATABLES) and self._prev.text 1110 if not kind: 1111 return self._parse_as_command(start) 1112 1113 return self.expression( 1114 exp.Drop, 1115 exists=self._parse_exists(), 1116 this=self._parse_table(schema=True), 1117 kind=kind, 1118 temporary=temporary, 1119 materialized=materialized, 1120 cascade=self._match_text_seq("CASCADE"), 1121 constraints=self._match_text_seq("CONSTRAINTS"), 1122 purge=self._match_text_seq("PURGE"), 1123 ) 1124 1125 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1126 return ( 1127 self._match(TokenType.IF) 1128 and (not not_ or self._match(TokenType.NOT)) 1129 and self._match(TokenType.EXISTS) 1130 ) 1131 1132 def _parse_create(self) -> t.Optional[exp.Expression]: 1133 start = self._prev 1134 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 1135 TokenType.OR, TokenType.REPLACE 1136 ) 1137 unique = self._match(TokenType.UNIQUE) 1138 1139 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1140 self._match(TokenType.TABLE) 1141 1142 properties = None 1143 create_token = self._match_set(self.CREATABLES) and self._prev 1144 1145 if not create_token: 1146 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1147 create_token = self._match_set(self.CREATABLES) and self._prev 1148 1149 if not properties or not create_token: 1150 return self._parse_as_command(start) 1151 1152 exists = self._parse_exists(not_=True) 1153 this = None 1154 expression = None 1155 indexes = None 1156 no_schema_binding = None 1157 begin = None 1158 clone = None 1159 1160 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1161 this = self._parse_user_defined_function(kind=create_token.token_type) 1162 temp_properties = self._parse_properties() 1163 if properties and temp_properties: 1164 properties.expressions.extend(temp_properties.expressions) 1165 elif temp_properties: 1166 properties = temp_properties 1167 1168 self._match(TokenType.ALIAS) 1169 begin = self._match(TokenType.BEGIN) 1170 return_ = self._match_text_seq("RETURN") 1171 expression = self._parse_statement() 1172 1173 if return_: 1174 expression = self.expression(exp.Return, this=expression) 1175 elif create_token.token_type == TokenType.INDEX: 1176 this = self._parse_index(index=self._parse_id_var()) 1177 elif create_token.token_type in self.DB_CREATABLES: 1178 table_parts = self._parse_table_parts(schema=True) 1179 1180 # exp.Properties.Location.POST_NAME 1181 if self._match(TokenType.COMMA): 1182 temp_properties = self._parse_properties(before=True) 1183 if properties and temp_properties: 1184 properties.expressions.extend(temp_properties.expressions) 1185 elif temp_properties: 1186 properties = temp_properties 1187 1188 this = self._parse_schema(this=table_parts) 1189 1190 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1191 temp_properties = self._parse_properties() 1192 if properties and temp_properties: 1193 properties.expressions.extend(temp_properties.expressions) 1194 elif temp_properties: 1195 properties = temp_properties 1196 1197 self._match(TokenType.ALIAS) 1198 1199 # exp.Properties.Location.POST_ALIAS 1200 if not ( 1201 self._match(TokenType.SELECT, advance=False) 1202 or self._match(TokenType.WITH, advance=False) 1203 or self._match(TokenType.L_PAREN, advance=False) 1204 ): 1205 temp_properties = self._parse_properties() 1206 if properties and temp_properties: 1207 properties.expressions.extend(temp_properties.expressions) 1208 elif temp_properties: 1209 properties = temp_properties 1210 1211 expression = self._parse_ddl_select() 1212 1213 if create_token.token_type == TokenType.TABLE: 1214 indexes = [] 1215 while True: 1216 index = self._parse_index() 1217 1218 # exp.Properties.Location.POST_EXPRESSION or exp.Properties.Location.POST_INDEX 1219 temp_properties = self._parse_properties() 1220 if properties and temp_properties: 1221 properties.expressions.extend(temp_properties.expressions) 1222 elif temp_properties: 1223 properties = temp_properties 1224 1225 if not index: 1226 break 1227 else: 1228 self._match(TokenType.COMMA) 1229 indexes.append(index) 1230 elif create_token.token_type == TokenType.VIEW: 1231 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1232 no_schema_binding = True 1233 1234 if self._match_text_seq("CLONE"): 1235 clone = self._parse_table(schema=True) 1236 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1237 clone_kind = ( 1238 self._match(TokenType.L_PAREN) 1239 and self._match_texts(self.CLONE_KINDS) 1240 and self._prev.text.upper() 1241 ) 1242 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1243 self._match(TokenType.R_PAREN) 1244 clone = self.expression( 1245 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1246 ) 1247 1248 return self.expression( 1249 exp.Create, 1250 this=this, 1251 kind=create_token.text, 1252 replace=replace, 1253 unique=unique, 1254 expression=expression, 1255 exists=exists, 1256 properties=properties, 1257 indexes=indexes, 1258 no_schema_binding=no_schema_binding, 1259 begin=begin, 1260 clone=clone, 1261 ) 1262 1263 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1264 # only used for teradata currently 1265 self._match(TokenType.COMMA) 1266 1267 kwargs = { 1268 "no": self._match_text_seq("NO"), 1269 "dual": self._match_text_seq("DUAL"), 1270 "before": self._match_text_seq("BEFORE"), 1271 "default": self._match_text_seq("DEFAULT"), 1272 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1273 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1274 "after": self._match_text_seq("AFTER"), 1275 "minimum": self._match_texts(("MIN", "MINIMUM")), 1276 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1277 } 1278 1279 if self._match_texts(self.PROPERTY_PARSERS): 1280 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1281 try: 1282 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1283 except TypeError: 1284 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1285 1286 return None 1287 1288 def _parse_property(self) -> t.Optional[exp.Expression]: 1289 if self._match_texts(self.PROPERTY_PARSERS): 1290 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1291 1292 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1293 return self._parse_character_set(default=True) 1294 1295 if self._match_text_seq("COMPOUND", "SORTKEY"): 1296 return self._parse_sortkey(compound=True) 1297 1298 if self._match_text_seq("SQL", "SECURITY"): 1299 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1300 1301 assignment = self._match_pair( 1302 TokenType.VAR, TokenType.EQ, advance=False 1303 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1304 1305 if assignment: 1306 key = self._parse_var_or_string() 1307 self._match(TokenType.EQ) 1308 return self.expression(exp.Property, this=key, value=self._parse_column()) 1309 1310 return None 1311 1312 def _parse_stored(self) -> exp.Expression: 1313 self._match(TokenType.ALIAS) 1314 1315 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1316 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1317 1318 return self.expression( 1319 exp.FileFormatProperty, 1320 this=self.expression( 1321 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1322 ) 1323 if input_format or output_format 1324 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1325 ) 1326 1327 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1328 self._match(TokenType.EQ) 1329 self._match(TokenType.ALIAS) 1330 return self.expression(exp_class, this=self._parse_field()) 1331 1332 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Expression]: 1333 properties = [] 1334 1335 while True: 1336 if before: 1337 prop = self._parse_property_before() 1338 else: 1339 prop = self._parse_property() 1340 1341 if not prop: 1342 break 1343 for p in ensure_list(prop): 1344 properties.append(p) 1345 1346 if properties: 1347 return self.expression(exp.Properties, expressions=properties) 1348 1349 return None 1350 1351 def _parse_fallback(self, no: bool = False) -> exp.Expression: 1352 return self.expression( 1353 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1354 ) 1355 1356 def _parse_volatile_property(self) -> exp.Expression: 1357 if self._index >= 2: 1358 pre_volatile_token = self._tokens[self._index - 2] 1359 else: 1360 pre_volatile_token = None 1361 1362 if pre_volatile_token and pre_volatile_token.token_type in ( 1363 TokenType.CREATE, 1364 TokenType.REPLACE, 1365 TokenType.UNIQUE, 1366 ): 1367 return exp.VolatileProperty() 1368 1369 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1370 1371 def _parse_with_property( 1372 self, 1373 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1374 self._match(TokenType.WITH) 1375 if self._match(TokenType.L_PAREN, advance=False): 1376 return self._parse_wrapped_csv(self._parse_property) 1377 1378 if self._match_text_seq("JOURNAL"): 1379 return self._parse_withjournaltable() 1380 1381 if self._match_text_seq("DATA"): 1382 return self._parse_withdata(no=False) 1383 elif self._match_text_seq("NO", "DATA"): 1384 return self._parse_withdata(no=True) 1385 1386 if not self._next: 1387 return None 1388 1389 return self._parse_withisolatedloading() 1390 1391 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1392 def _parse_definer(self) -> t.Optional[exp.Expression]: 1393 self._match(TokenType.EQ) 1394 1395 user = self._parse_id_var() 1396 self._match(TokenType.PARAMETER) 1397 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1398 1399 if not user or not host: 1400 return None 1401 1402 return exp.DefinerProperty(this=f"{user}@{host}") 1403 1404 def _parse_withjournaltable(self) -> exp.Expression: 1405 self._match(TokenType.TABLE) 1406 self._match(TokenType.EQ) 1407 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1408 1409 def _parse_log(self, no: bool = False) -> exp.Expression: 1410 return self.expression(exp.LogProperty, no=no) 1411 1412 def _parse_journal(self, **kwargs) -> exp.Expression: 1413 return self.expression(exp.JournalProperty, **kwargs) 1414 1415 def _parse_checksum(self) -> exp.Expression: 1416 self._match(TokenType.EQ) 1417 1418 on = None 1419 if self._match(TokenType.ON): 1420 on = True 1421 elif self._match_text_seq("OFF"): 1422 on = False 1423 default = self._match(TokenType.DEFAULT) 1424 1425 return self.expression( 1426 exp.ChecksumProperty, 1427 on=on, 1428 default=default, 1429 ) 1430 1431 def _parse_cluster(self) -> t.Optional[exp.Expression]: 1432 if not self._match_text_seq("BY"): 1433 self._retreat(self._index - 1) 1434 return None 1435 return self.expression( 1436 exp.Cluster, 1437 expressions=self._parse_csv(self._parse_ordered), 1438 ) 1439 1440 def _parse_freespace(self) -> exp.Expression: 1441 self._match(TokenType.EQ) 1442 return self.expression( 1443 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1444 ) 1445 1446 def _parse_mergeblockratio(self, no: bool = False, default: bool = False) -> exp.Expression: 1447 if self._match(TokenType.EQ): 1448 return self.expression( 1449 exp.MergeBlockRatioProperty, 1450 this=self._parse_number(), 1451 percent=self._match(TokenType.PERCENT), 1452 ) 1453 return self.expression( 1454 exp.MergeBlockRatioProperty, 1455 no=no, 1456 default=default, 1457 ) 1458 1459 def _parse_datablocksize( 1460 self, 1461 default: t.Optional[bool] = None, 1462 minimum: t.Optional[bool] = None, 1463 maximum: t.Optional[bool] = None, 1464 ) -> exp.Expression: 1465 self._match(TokenType.EQ) 1466 size = self._parse_number() 1467 units = None 1468 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1469 units = self._prev.text 1470 return self.expression( 1471 exp.DataBlocksizeProperty, 1472 size=size, 1473 units=units, 1474 default=default, 1475 minimum=minimum, 1476 maximum=maximum, 1477 ) 1478 1479 def _parse_blockcompression(self) -> exp.Expression: 1480 self._match(TokenType.EQ) 1481 always = self._match_text_seq("ALWAYS") 1482 manual = self._match_text_seq("MANUAL") 1483 never = self._match_text_seq("NEVER") 1484 default = self._match_text_seq("DEFAULT") 1485 autotemp = None 1486 if self._match_text_seq("AUTOTEMP"): 1487 autotemp = self._parse_schema() 1488 1489 return self.expression( 1490 exp.BlockCompressionProperty, 1491 always=always, 1492 manual=manual, 1493 never=never, 1494 default=default, 1495 autotemp=autotemp, 1496 ) 1497 1498 def _parse_withisolatedloading(self) -> exp.Expression: 1499 no = self._match_text_seq("NO") 1500 concurrent = self._match_text_seq("CONCURRENT") 1501 self._match_text_seq("ISOLATED", "LOADING") 1502 for_all = self._match_text_seq("FOR", "ALL") 1503 for_insert = self._match_text_seq("FOR", "INSERT") 1504 for_none = self._match_text_seq("FOR", "NONE") 1505 return self.expression( 1506 exp.IsolatedLoadingProperty, 1507 no=no, 1508 concurrent=concurrent, 1509 for_all=for_all, 1510 for_insert=for_insert, 1511 for_none=for_none, 1512 ) 1513 1514 def _parse_locking(self) -> exp.Expression: 1515 if self._match(TokenType.TABLE): 1516 kind = "TABLE" 1517 elif self._match(TokenType.VIEW): 1518 kind = "VIEW" 1519 elif self._match(TokenType.ROW): 1520 kind = "ROW" 1521 elif self._match_text_seq("DATABASE"): 1522 kind = "DATABASE" 1523 else: 1524 kind = None 1525 1526 if kind in ("DATABASE", "TABLE", "VIEW"): 1527 this = self._parse_table_parts() 1528 else: 1529 this = None 1530 1531 if self._match(TokenType.FOR): 1532 for_or_in = "FOR" 1533 elif self._match(TokenType.IN): 1534 for_or_in = "IN" 1535 else: 1536 for_or_in = None 1537 1538 if self._match_text_seq("ACCESS"): 1539 lock_type = "ACCESS" 1540 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1541 lock_type = "EXCLUSIVE" 1542 elif self._match_text_seq("SHARE"): 1543 lock_type = "SHARE" 1544 elif self._match_text_seq("READ"): 1545 lock_type = "READ" 1546 elif self._match_text_seq("WRITE"): 1547 lock_type = "WRITE" 1548 elif self._match_text_seq("CHECKSUM"): 1549 lock_type = "CHECKSUM" 1550 else: 1551 lock_type = None 1552 1553 override = self._match_text_seq("OVERRIDE") 1554 1555 return self.expression( 1556 exp.LockingProperty, 1557 this=this, 1558 kind=kind, 1559 for_or_in=for_or_in, 1560 lock_type=lock_type, 1561 override=override, 1562 ) 1563 1564 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1565 if self._match(TokenType.PARTITION_BY): 1566 return self._parse_csv(self._parse_conjunction) 1567 return [] 1568 1569 def _parse_partitioned_by(self) -> exp.Expression: 1570 self._match(TokenType.EQ) 1571 return self.expression( 1572 exp.PartitionedByProperty, 1573 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1574 ) 1575 1576 def _parse_withdata(self, no: bool = False) -> exp.Expression: 1577 if self._match_text_seq("AND", "STATISTICS"): 1578 statistics = True 1579 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1580 statistics = False 1581 else: 1582 statistics = None 1583 1584 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1585 1586 def _parse_no_property(self) -> t.Optional[exp.Property]: 1587 if self._match_text_seq("PRIMARY", "INDEX"): 1588 return exp.NoPrimaryIndexProperty() 1589 return None 1590 1591 def _parse_on_property(self) -> t.Optional[exp.Property]: 1592 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1593 return exp.OnCommitProperty() 1594 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1595 return exp.OnCommitProperty(delete=True) 1596 return None 1597 1598 def _parse_distkey(self) -> exp.Expression: 1599 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1600 1601 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1602 table = self._parse_table(schema=True) 1603 options = [] 1604 while self._match_texts(("INCLUDING", "EXCLUDING")): 1605 this = self._prev.text.upper() 1606 id_var = self._parse_id_var() 1607 1608 if not id_var: 1609 return None 1610 1611 options.append( 1612 self.expression( 1613 exp.Property, 1614 this=this, 1615 value=exp.Var(this=id_var.this.upper()), 1616 ) 1617 ) 1618 return self.expression(exp.LikeProperty, this=table, expressions=options) 1619 1620 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1621 return self.expression( 1622 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1623 ) 1624 1625 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1626 self._match(TokenType.EQ) 1627 return self.expression( 1628 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1629 ) 1630 1631 def _parse_returns(self) -> exp.Expression: 1632 value: t.Optional[exp.Expression] 1633 is_table = self._match(TokenType.TABLE) 1634 1635 if is_table: 1636 if self._match(TokenType.LT): 1637 value = self.expression( 1638 exp.Schema, 1639 this="TABLE", 1640 expressions=self._parse_csv(self._parse_struct_types), 1641 ) 1642 if not self._match(TokenType.GT): 1643 self.raise_error("Expecting >") 1644 else: 1645 value = self._parse_schema(exp.Var(this="TABLE")) 1646 else: 1647 value = self._parse_types() 1648 1649 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1650 1651 def _parse_describe(self) -> exp.Expression: 1652 kind = self._match_set(self.CREATABLES) and self._prev.text 1653 this = self._parse_table() 1654 1655 return self.expression(exp.Describe, this=this, kind=kind) 1656 1657 def _parse_insert(self) -> exp.Expression: 1658 overwrite = self._match(TokenType.OVERWRITE) 1659 local = self._match_text_seq("LOCAL") 1660 alternative = None 1661 1662 if self._match_text_seq("DIRECTORY"): 1663 this: t.Optional[exp.Expression] = self.expression( 1664 exp.Directory, 1665 this=self._parse_var_or_string(), 1666 local=local, 1667 row_format=self._parse_row_format(match_row=True), 1668 ) 1669 else: 1670 if self._match(TokenType.OR): 1671 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1672 1673 self._match(TokenType.INTO) 1674 self._match(TokenType.TABLE) 1675 this = self._parse_table(schema=True) 1676 1677 return self.expression( 1678 exp.Insert, 1679 this=this, 1680 exists=self._parse_exists(), 1681 partition=self._parse_partition(), 1682 expression=self._parse_ddl_select(), 1683 conflict=self._parse_on_conflict(), 1684 returning=self._parse_returning(), 1685 overwrite=overwrite, 1686 alternative=alternative, 1687 ) 1688 1689 def _parse_on_conflict(self) -> t.Optional[exp.Expression]: 1690 conflict = self._match_text_seq("ON", "CONFLICT") 1691 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1692 1693 if not (conflict or duplicate): 1694 return None 1695 1696 nothing = None 1697 expressions = None 1698 key = None 1699 constraint = None 1700 1701 if conflict: 1702 if self._match_text_seq("ON", "CONSTRAINT"): 1703 constraint = self._parse_id_var() 1704 else: 1705 key = self._parse_csv(self._parse_value) 1706 1707 self._match_text_seq("DO") 1708 if self._match_text_seq("NOTHING"): 1709 nothing = True 1710 else: 1711 self._match(TokenType.UPDATE) 1712 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1713 1714 return self.expression( 1715 exp.OnConflict, 1716 duplicate=duplicate, 1717 expressions=expressions, 1718 nothing=nothing, 1719 key=key, 1720 constraint=constraint, 1721 ) 1722 1723 def _parse_returning(self) -> t.Optional[exp.Expression]: 1724 if not self._match(TokenType.RETURNING): 1725 return None 1726 1727 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1728 1729 def _parse_row(self) -> t.Optional[exp.Expression]: 1730 if not self._match(TokenType.FORMAT): 1731 return None 1732 return self._parse_row_format() 1733 1734 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1735 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1736 return None 1737 1738 if self._match_text_seq("SERDE"): 1739 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1740 1741 self._match_text_seq("DELIMITED") 1742 1743 kwargs = {} 1744 1745 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1746 kwargs["fields"] = self._parse_string() 1747 if self._match_text_seq("ESCAPED", "BY"): 1748 kwargs["escaped"] = self._parse_string() 1749 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1750 kwargs["collection_items"] = self._parse_string() 1751 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1752 kwargs["map_keys"] = self._parse_string() 1753 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1754 kwargs["lines"] = self._parse_string() 1755 if self._match_text_seq("NULL", "DEFINED", "AS"): 1756 kwargs["null"] = self._parse_string() 1757 1758 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1759 1760 def _parse_load(self) -> exp.Expression: 1761 if self._match_text_seq("DATA"): 1762 local = self._match_text_seq("LOCAL") 1763 self._match_text_seq("INPATH") 1764 inpath = self._parse_string() 1765 overwrite = self._match(TokenType.OVERWRITE) 1766 self._match_pair(TokenType.INTO, TokenType.TABLE) 1767 1768 return self.expression( 1769 exp.LoadData, 1770 this=self._parse_table(schema=True), 1771 local=local, 1772 overwrite=overwrite, 1773 inpath=inpath, 1774 partition=self._parse_partition(), 1775 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1776 serde=self._match_text_seq("SERDE") and self._parse_string(), 1777 ) 1778 return self._parse_as_command(self._prev) 1779 1780 def _parse_delete(self) -> exp.Expression: 1781 self._match(TokenType.FROM) 1782 1783 return self.expression( 1784 exp.Delete, 1785 this=self._parse_table(), 1786 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1787 where=self._parse_where(), 1788 returning=self._parse_returning(), 1789 ) 1790 1791 def _parse_update(self) -> exp.Expression: 1792 return self.expression( 1793 exp.Update, 1794 **{ # type: ignore 1795 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1796 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1797 "from": self._parse_from(modifiers=True), 1798 "where": self._parse_where(), 1799 "returning": self._parse_returning(), 1800 }, 1801 ) 1802 1803 def _parse_uncache(self) -> exp.Expression: 1804 if not self._match(TokenType.TABLE): 1805 self.raise_error("Expecting TABLE after UNCACHE") 1806 1807 return self.expression( 1808 exp.Uncache, 1809 exists=self._parse_exists(), 1810 this=self._parse_table(schema=True), 1811 ) 1812 1813 def _parse_cache(self) -> exp.Expression: 1814 lazy = self._match_text_seq("LAZY") 1815 self._match(TokenType.TABLE) 1816 table = self._parse_table(schema=True) 1817 options = [] 1818 1819 if self._match_text_seq("OPTIONS"): 1820 self._match_l_paren() 1821 k = self._parse_string() 1822 self._match(TokenType.EQ) 1823 v = self._parse_string() 1824 options = [k, v] 1825 self._match_r_paren() 1826 1827 self._match(TokenType.ALIAS) 1828 return self.expression( 1829 exp.Cache, 1830 this=table, 1831 lazy=lazy, 1832 options=options, 1833 expression=self._parse_select(nested=True), 1834 ) 1835 1836 def _parse_partition(self) -> t.Optional[exp.Expression]: 1837 if not self._match(TokenType.PARTITION): 1838 return None 1839 1840 return self.expression( 1841 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1842 ) 1843 1844 def _parse_value(self) -> exp.Expression: 1845 if self._match(TokenType.L_PAREN): 1846 expressions = self._parse_csv(self._parse_conjunction) 1847 self._match_r_paren() 1848 return self.expression(exp.Tuple, expressions=expressions) 1849 1850 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1851 # Source: https://prestodb.io/docs/current/sql/values.html 1852 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1853 1854 def _parse_select( 1855 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1856 ) -> t.Optional[exp.Expression]: 1857 cte = self._parse_with() 1858 if cte: 1859 this = self._parse_statement() 1860 1861 if not this: 1862 self.raise_error("Failed to parse any statement following CTE") 1863 return cte 1864 1865 if "with" in this.arg_types: 1866 this.set("with", cte) 1867 else: 1868 self.raise_error(f"{this.key} does not support CTE") 1869 this = cte 1870 elif self._match(TokenType.SELECT): 1871 comments = self._prev_comments 1872 1873 hint = self._parse_hint() 1874 all_ = self._match(TokenType.ALL) 1875 distinct = self._match(TokenType.DISTINCT) 1876 1877 kind = ( 1878 self._match(TokenType.ALIAS) 1879 and self._match_texts(("STRUCT", "VALUE")) 1880 and self._prev.text 1881 ) 1882 1883 if distinct: 1884 distinct = self.expression( 1885 exp.Distinct, 1886 on=self._parse_value() if self._match(TokenType.ON) else None, 1887 ) 1888 1889 if all_ and distinct: 1890 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1891 1892 limit = self._parse_limit(top=True) 1893 expressions = self._parse_csv(self._parse_expression) 1894 1895 this = self.expression( 1896 exp.Select, 1897 kind=kind, 1898 hint=hint, 1899 distinct=distinct, 1900 expressions=expressions, 1901 limit=limit, 1902 ) 1903 this.comments = comments 1904 1905 into = self._parse_into() 1906 if into: 1907 this.set("into", into) 1908 1909 from_ = self._parse_from() 1910 if from_: 1911 this.set("from", from_) 1912 1913 this = self._parse_query_modifiers(this) 1914 elif (table or nested) and self._match(TokenType.L_PAREN): 1915 if self._match(TokenType.PIVOT): 1916 this = self._parse_simplified_pivot() 1917 elif self._match(TokenType.FROM): 1918 this = exp.select("*").from_( 1919 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1920 ) 1921 else: 1922 this = self._parse_table() if table else self._parse_select(nested=True) 1923 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1924 1925 self._match_r_paren() 1926 1927 # early return so that subquery unions aren't parsed again 1928 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1929 # Union ALL should be a property of the top select node, not the subquery 1930 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1931 elif self._match(TokenType.VALUES): 1932 this = self.expression( 1933 exp.Values, 1934 expressions=self._parse_csv(self._parse_value), 1935 alias=self._parse_table_alias(), 1936 ) 1937 else: 1938 this = None 1939 1940 return self._parse_set_operations(this) 1941 1942 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1943 if not skip_with_token and not self._match(TokenType.WITH): 1944 return None 1945 1946 comments = self._prev_comments 1947 recursive = self._match(TokenType.RECURSIVE) 1948 1949 expressions = [] 1950 while True: 1951 expressions.append(self._parse_cte()) 1952 1953 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1954 break 1955 else: 1956 self._match(TokenType.WITH) 1957 1958 return self.expression( 1959 exp.With, comments=comments, expressions=expressions, recursive=recursive 1960 ) 1961 1962 def _parse_cte(self) -> exp.Expression: 1963 alias = self._parse_table_alias() 1964 if not alias or not alias.this: 1965 self.raise_error("Expected CTE to have alias") 1966 1967 self._match(TokenType.ALIAS) 1968 1969 return self.expression( 1970 exp.CTE, 1971 this=self._parse_wrapped(self._parse_statement), 1972 alias=alias, 1973 ) 1974 1975 def _parse_table_alias( 1976 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1977 ) -> t.Optional[exp.Expression]: 1978 any_token = self._match(TokenType.ALIAS) 1979 alias = ( 1980 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1981 or self._parse_string_as_identifier() 1982 ) 1983 1984 index = self._index 1985 if self._match(TokenType.L_PAREN): 1986 columns = self._parse_csv(self._parse_function_parameter) 1987 self._match_r_paren() if columns else self._retreat(index) 1988 else: 1989 columns = None 1990 1991 if not alias and not columns: 1992 return None 1993 1994 return self.expression(exp.TableAlias, this=alias, columns=columns) 1995 1996 def _parse_subquery( 1997 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1998 ) -> t.Optional[exp.Expression]: 1999 if not this: 2000 return None 2001 return self.expression( 2002 exp.Subquery, 2003 this=this, 2004 pivots=self._parse_pivots(), 2005 alias=self._parse_table_alias() if parse_alias else None, 2006 ) 2007 2008 def _parse_query_modifiers( 2009 self, this: t.Optional[exp.Expression] 2010 ) -> t.Optional[exp.Expression]: 2011 if isinstance(this, self.MODIFIABLES): 2012 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 2013 expression = parser(self) 2014 2015 if expression: 2016 this.set(key, expression) 2017 return this 2018 2019 def _parse_hint(self) -> t.Optional[exp.Expression]: 2020 if self._match(TokenType.HINT): 2021 hints = self._parse_csv(self._parse_function) 2022 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2023 self.raise_error("Expected */ after HINT") 2024 return self.expression(exp.Hint, expressions=hints) 2025 2026 return None 2027 2028 def _parse_into(self) -> t.Optional[exp.Expression]: 2029 if not self._match(TokenType.INTO): 2030 return None 2031 2032 temp = self._match(TokenType.TEMPORARY) 2033 unlogged = self._match_text_seq("UNLOGGED") 2034 self._match(TokenType.TABLE) 2035 2036 return self.expression( 2037 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2038 ) 2039 2040 def _parse_from( 2041 self, modifiers: bool = False, skip_from_token: bool = False 2042 ) -> t.Optional[exp.From]: 2043 if not skip_from_token and not self._match(TokenType.FROM): 2044 return None 2045 2046 comments = self._prev_comments 2047 this = self._parse_table() 2048 2049 return self.expression( 2050 exp.From, 2051 comments=comments, 2052 this=self._parse_query_modifiers(this) if modifiers else this, 2053 ) 2054 2055 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 2056 if not self._match(TokenType.MATCH_RECOGNIZE): 2057 return None 2058 2059 self._match_l_paren() 2060 2061 partition = self._parse_partition_by() 2062 order = self._parse_order() 2063 measures = ( 2064 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2065 ) 2066 2067 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2068 rows = exp.Var(this="ONE ROW PER MATCH") 2069 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2070 text = "ALL ROWS PER MATCH" 2071 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2072 text += f" SHOW EMPTY MATCHES" 2073 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2074 text += f" OMIT EMPTY MATCHES" 2075 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2076 text += f" WITH UNMATCHED ROWS" 2077 rows = exp.Var(this=text) 2078 else: 2079 rows = None 2080 2081 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2082 text = "AFTER MATCH SKIP" 2083 if self._match_text_seq("PAST", "LAST", "ROW"): 2084 text += f" PAST LAST ROW" 2085 elif self._match_text_seq("TO", "NEXT", "ROW"): 2086 text += f" TO NEXT ROW" 2087 elif self._match_text_seq("TO", "FIRST"): 2088 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2089 elif self._match_text_seq("TO", "LAST"): 2090 text += f" TO LAST {self._advance_any().text}" # type: ignore 2091 after = exp.Var(this=text) 2092 else: 2093 after = None 2094 2095 if self._match_text_seq("PATTERN"): 2096 self._match_l_paren() 2097 2098 if not self._curr: 2099 self.raise_error("Expecting )", self._curr) 2100 2101 paren = 1 2102 start = self._curr 2103 2104 while self._curr and paren > 0: 2105 if self._curr.token_type == TokenType.L_PAREN: 2106 paren += 1 2107 if self._curr.token_type == TokenType.R_PAREN: 2108 paren -= 1 2109 end = self._prev 2110 self._advance() 2111 if paren > 0: 2112 self.raise_error("Expecting )", self._curr) 2113 pattern = exp.Var(this=self._find_sql(start, end)) 2114 else: 2115 pattern = None 2116 2117 define = ( 2118 self._parse_csv( 2119 lambda: self.expression( 2120 exp.Alias, 2121 alias=self._parse_id_var(any_token=True), 2122 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2123 ) 2124 ) 2125 if self._match_text_seq("DEFINE") 2126 else None 2127 ) 2128 2129 self._match_r_paren() 2130 2131 return self.expression( 2132 exp.MatchRecognize, 2133 partition_by=partition, 2134 order=order, 2135 measures=measures, 2136 rows=rows, 2137 after=after, 2138 pattern=pattern, 2139 define=define, 2140 alias=self._parse_table_alias(), 2141 ) 2142 2143 def _parse_lateral(self) -> t.Optional[exp.Expression]: 2144 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2145 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2146 2147 if outer_apply or cross_apply: 2148 this = self._parse_select(table=True) 2149 view = None 2150 outer = not cross_apply 2151 elif self._match(TokenType.LATERAL): 2152 this = self._parse_select(table=True) 2153 view = self._match(TokenType.VIEW) 2154 outer = self._match(TokenType.OUTER) 2155 else: 2156 return None 2157 2158 if not this: 2159 this = self._parse_function() or self._parse_id_var(any_token=False) 2160 while self._match(TokenType.DOT): 2161 this = exp.Dot( 2162 this=this, 2163 expression=self._parse_function() or self._parse_id_var(any_token=False), 2164 ) 2165 2166 table_alias: t.Optional[exp.Expression] 2167 2168 if view: 2169 table = self._parse_id_var(any_token=False) 2170 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2171 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 2172 else: 2173 table_alias = self._parse_table_alias() 2174 2175 expression = self.expression( 2176 exp.Lateral, 2177 this=this, 2178 view=view, 2179 outer=outer, 2180 alias=table_alias, 2181 ) 2182 2183 return expression 2184 2185 def _parse_join_parts( 2186 self, 2187 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2188 return ( 2189 self._match_set(self.JOIN_METHODS) and self._prev, 2190 self._match_set(self.JOIN_SIDES) and self._prev, 2191 self._match_set(self.JOIN_KINDS) and self._prev, 2192 ) 2193 2194 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 2195 if self._match(TokenType.COMMA): 2196 return self.expression(exp.Join, this=self._parse_table()) 2197 2198 index = self._index 2199 method, side, kind = self._parse_join_parts() 2200 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2201 join = self._match(TokenType.JOIN) 2202 2203 if not skip_join_token and not join: 2204 self._retreat(index) 2205 kind = None 2206 method = None 2207 side = None 2208 2209 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2210 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2211 2212 if not skip_join_token and not join and not outer_apply and not cross_apply: 2213 return None 2214 2215 if outer_apply: 2216 side = Token(TokenType.LEFT, "LEFT") 2217 2218 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2219 2220 if method: 2221 kwargs["method"] = method.text 2222 if side: 2223 kwargs["side"] = side.text 2224 if kind: 2225 kwargs["kind"] = kind.text 2226 if hint: 2227 kwargs["hint"] = hint 2228 2229 if self._match(TokenType.ON): 2230 kwargs["on"] = self._parse_conjunction() 2231 elif self._match(TokenType.USING): 2232 kwargs["using"] = self._parse_wrapped_id_vars() 2233 2234 return self.expression(exp.Join, **kwargs) 2235 2236 def _parse_index( 2237 self, 2238 index: t.Optional[exp.Expression] = None, 2239 ) -> t.Optional[exp.Expression]: 2240 if index: 2241 unique = None 2242 primary = None 2243 amp = None 2244 2245 self._match(TokenType.ON) 2246 self._match(TokenType.TABLE) # hive 2247 table = self._parse_table_parts(schema=True) 2248 else: 2249 unique = self._match(TokenType.UNIQUE) 2250 primary = self._match_text_seq("PRIMARY") 2251 amp = self._match_text_seq("AMP") 2252 if not self._match(TokenType.INDEX): 2253 return None 2254 index = self._parse_id_var() 2255 table = None 2256 2257 if self._match(TokenType.L_PAREN, advance=False): 2258 columns = self._parse_wrapped_csv(self._parse_ordered) 2259 else: 2260 columns = None 2261 2262 return self.expression( 2263 exp.Index, 2264 this=index, 2265 table=table, 2266 columns=columns, 2267 unique=unique, 2268 primary=primary, 2269 amp=amp, 2270 partition_by=self._parse_partition_by(), 2271 ) 2272 2273 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2274 return ( 2275 (not schema and self._parse_function()) 2276 or self._parse_id_var(any_token=False) 2277 or self._parse_string_as_identifier() 2278 or self._parse_placeholder() 2279 ) 2280 2281 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2282 catalog = None 2283 db = None 2284 table = self._parse_table_part(schema=schema) 2285 2286 while self._match(TokenType.DOT): 2287 if catalog: 2288 # This allows nesting the table in arbitrarily many dot expressions if needed 2289 table = self.expression( 2290 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2291 ) 2292 else: 2293 catalog = db 2294 db = table 2295 table = self._parse_table_part(schema=schema) 2296 2297 if not table: 2298 self.raise_error(f"Expected table name but got {self._curr}") 2299 2300 return self.expression( 2301 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2302 ) 2303 2304 def _parse_table( 2305 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2306 ) -> t.Optional[exp.Expression]: 2307 lateral = self._parse_lateral() 2308 if lateral: 2309 return lateral 2310 2311 unnest = self._parse_unnest() 2312 if unnest: 2313 return unnest 2314 2315 values = self._parse_derived_table_values() 2316 if values: 2317 return values 2318 2319 subquery = self._parse_select(table=True) 2320 if subquery: 2321 if not subquery.args.get("pivots"): 2322 subquery.set("pivots", self._parse_pivots()) 2323 return subquery 2324 2325 this: exp.Expression = self._parse_table_parts(schema=schema) 2326 2327 if schema: 2328 return self._parse_schema(this=this) 2329 2330 if self.alias_post_tablesample: 2331 table_sample = self._parse_table_sample() 2332 2333 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2334 if alias: 2335 this.set("alias", alias) 2336 2337 if not this.args.get("pivots"): 2338 this.set("pivots", self._parse_pivots()) 2339 2340 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2341 this.set( 2342 "hints", 2343 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2344 ) 2345 self._match_r_paren() 2346 2347 if not self.alias_post_tablesample: 2348 table_sample = self._parse_table_sample() 2349 2350 if table_sample: 2351 table_sample.set("this", this) 2352 this = table_sample 2353 2354 return this 2355 2356 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2357 if not self._match(TokenType.UNNEST): 2358 return None 2359 2360 expressions = self._parse_wrapped_csv(self._parse_type) 2361 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2362 alias = self._parse_table_alias() 2363 2364 if alias and self.unnest_column_only: 2365 if alias.args.get("columns"): 2366 self.raise_error("Unexpected extra column alias in unnest.") 2367 alias.set("columns", [alias.this]) 2368 alias.set("this", None) 2369 2370 offset = None 2371 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2372 self._match(TokenType.ALIAS) 2373 offset = self._parse_id_var() or exp.Identifier(this="offset") 2374 2375 return self.expression( 2376 exp.Unnest, 2377 expressions=expressions, 2378 ordinality=ordinality, 2379 alias=alias, 2380 offset=offset, 2381 ) 2382 2383 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2384 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2385 if not is_derived and not self._match(TokenType.VALUES): 2386 return None 2387 2388 expressions = self._parse_csv(self._parse_value) 2389 2390 if is_derived: 2391 self._match_r_paren() 2392 2393 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2394 2395 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]: 2396 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2397 as_modifier and self._match_text_seq("USING", "SAMPLE") 2398 ): 2399 return None 2400 2401 bucket_numerator = None 2402 bucket_denominator = None 2403 bucket_field = None 2404 percent = None 2405 rows = None 2406 size = None 2407 seed = None 2408 2409 kind = ( 2410 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2411 ) 2412 method = self._parse_var(tokens=(TokenType.ROW,)) 2413 2414 self._match(TokenType.L_PAREN) 2415 2416 num = self._parse_number() 2417 2418 if self._match_text_seq("BUCKET"): 2419 bucket_numerator = self._parse_number() 2420 self._match_text_seq("OUT", "OF") 2421 bucket_denominator = bucket_denominator = self._parse_number() 2422 self._match(TokenType.ON) 2423 bucket_field = self._parse_field() 2424 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2425 percent = num 2426 elif self._match(TokenType.ROWS): 2427 rows = num 2428 else: 2429 size = num 2430 2431 self._match(TokenType.R_PAREN) 2432 2433 if self._match(TokenType.L_PAREN): 2434 method = self._parse_var() 2435 seed = self._match(TokenType.COMMA) and self._parse_number() 2436 self._match_r_paren() 2437 elif self._match_texts(("SEED", "REPEATABLE")): 2438 seed = self._parse_wrapped(self._parse_number) 2439 2440 return self.expression( 2441 exp.TableSample, 2442 method=method, 2443 bucket_numerator=bucket_numerator, 2444 bucket_denominator=bucket_denominator, 2445 bucket_field=bucket_field, 2446 percent=percent, 2447 rows=rows, 2448 size=size, 2449 seed=seed, 2450 kind=kind, 2451 ) 2452 2453 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2454 return list(iter(self._parse_pivot, None)) 2455 2456 # https://duckdb.org/docs/sql/statements/pivot 2457 def _parse_simplified_pivot(self) -> exp.Pivot: 2458 def _parse_on() -> t.Optional[exp.Expression]: 2459 this = self._parse_bitwise() 2460 return self._parse_in(this) if self._match(TokenType.IN) else this 2461 2462 this = self._parse_table() 2463 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2464 using = self._match(TokenType.USING) and self._parse_csv( 2465 lambda: self._parse_alias(self._parse_function()) 2466 ) 2467 group = self._parse_group() 2468 return self.expression( 2469 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2470 ) 2471 2472 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2473 index = self._index 2474 2475 if self._match(TokenType.PIVOT): 2476 unpivot = False 2477 elif self._match(TokenType.UNPIVOT): 2478 unpivot = True 2479 else: 2480 return None 2481 2482 expressions = [] 2483 field = None 2484 2485 if not self._match(TokenType.L_PAREN): 2486 self._retreat(index) 2487 return None 2488 2489 if unpivot: 2490 expressions = self._parse_csv(self._parse_column) 2491 else: 2492 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2493 2494 if not expressions: 2495 self.raise_error("Failed to parse PIVOT's aggregation list") 2496 2497 if not self._match(TokenType.FOR): 2498 self.raise_error("Expecting FOR") 2499 2500 value = self._parse_column() 2501 2502 if not self._match(TokenType.IN): 2503 self.raise_error("Expecting IN") 2504 2505 field = self._parse_in(value, alias=True) 2506 2507 self._match_r_paren() 2508 2509 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2510 2511 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2512 pivot.set("alias", self._parse_table_alias()) 2513 2514 if not unpivot: 2515 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2516 2517 columns: t.List[exp.Expression] = [] 2518 for fld in pivot.args["field"].expressions: 2519 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2520 for name in names: 2521 if self.PREFIXED_PIVOT_COLUMNS: 2522 name = f"{name}_{field_name}" if name else field_name 2523 else: 2524 name = f"{field_name}_{name}" if name else field_name 2525 2526 columns.append(exp.to_identifier(name)) 2527 2528 pivot.set("columns", columns) 2529 2530 return pivot 2531 2532 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2533 return [agg.alias for agg in aggregations] 2534 2535 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2536 if not skip_where_token and not self._match(TokenType.WHERE): 2537 return None 2538 2539 return self.expression( 2540 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2541 ) 2542 2543 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2544 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2545 return None 2546 2547 elements = defaultdict(list) 2548 2549 while True: 2550 expressions = self._parse_csv(self._parse_conjunction) 2551 if expressions: 2552 elements["expressions"].extend(expressions) 2553 2554 grouping_sets = self._parse_grouping_sets() 2555 if grouping_sets: 2556 elements["grouping_sets"].extend(grouping_sets) 2557 2558 rollup = None 2559 cube = None 2560 totals = None 2561 2562 with_ = self._match(TokenType.WITH) 2563 if self._match(TokenType.ROLLUP): 2564 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2565 elements["rollup"].extend(ensure_list(rollup)) 2566 2567 if self._match(TokenType.CUBE): 2568 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2569 elements["cube"].extend(ensure_list(cube)) 2570 2571 if self._match_text_seq("TOTALS"): 2572 totals = True 2573 elements["totals"] = True # type: ignore 2574 2575 if not (grouping_sets or rollup or cube or totals): 2576 break 2577 2578 return self.expression(exp.Group, **elements) # type: ignore 2579 2580 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2581 if not self._match(TokenType.GROUPING_SETS): 2582 return None 2583 2584 return self._parse_wrapped_csv(self._parse_grouping_set) 2585 2586 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2587 if self._match(TokenType.L_PAREN): 2588 grouping_set = self._parse_csv(self._parse_column) 2589 self._match_r_paren() 2590 return self.expression(exp.Tuple, expressions=grouping_set) 2591 2592 return self._parse_column() 2593 2594 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2595 if not skip_having_token and not self._match(TokenType.HAVING): 2596 return None 2597 return self.expression(exp.Having, this=self._parse_conjunction()) 2598 2599 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2600 if not self._match(TokenType.QUALIFY): 2601 return None 2602 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2603 2604 def _parse_order( 2605 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2606 ) -> t.Optional[exp.Expression]: 2607 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2608 return this 2609 2610 return self.expression( 2611 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2612 ) 2613 2614 def _parse_sort( 2615 self, exp_class: t.Type[exp.Expression], *texts: str 2616 ) -> t.Optional[exp.Expression]: 2617 if not self._match_text_seq(*texts): 2618 return None 2619 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2620 2621 def _parse_ordered(self) -> exp.Expression: 2622 this = self._parse_conjunction() 2623 self._match(TokenType.ASC) 2624 is_desc = self._match(TokenType.DESC) 2625 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2626 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2627 desc = is_desc or False 2628 asc = not desc 2629 nulls_first = is_nulls_first or False 2630 explicitly_null_ordered = is_nulls_first or is_nulls_last 2631 if ( 2632 not explicitly_null_ordered 2633 and ( 2634 (asc and self.null_ordering == "nulls_are_small") 2635 or (desc and self.null_ordering != "nulls_are_small") 2636 ) 2637 and self.null_ordering != "nulls_are_last" 2638 ): 2639 nulls_first = True 2640 2641 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2642 2643 def _parse_limit( 2644 self, this: t.Optional[exp.Expression] = None, top: bool = False 2645 ) -> t.Optional[exp.Expression]: 2646 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2647 limit_paren = self._match(TokenType.L_PAREN) 2648 limit_exp = self.expression( 2649 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2650 ) 2651 2652 if limit_paren: 2653 self._match_r_paren() 2654 2655 return limit_exp 2656 2657 if self._match(TokenType.FETCH): 2658 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2659 direction = self._prev.text if direction else "FIRST" 2660 2661 count = self._parse_number() 2662 percent = self._match(TokenType.PERCENT) 2663 2664 self._match_set((TokenType.ROW, TokenType.ROWS)) 2665 2666 only = self._match_text_seq("ONLY") 2667 with_ties = self._match_text_seq("WITH", "TIES") 2668 2669 if only and with_ties: 2670 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2671 2672 return self.expression( 2673 exp.Fetch, 2674 direction=direction, 2675 count=count, 2676 percent=percent, 2677 with_ties=with_ties, 2678 ) 2679 2680 return this 2681 2682 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2683 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2684 return this 2685 2686 count = self._parse_number() 2687 self._match_set((TokenType.ROW, TokenType.ROWS)) 2688 return self.expression(exp.Offset, this=this, expression=count) 2689 2690 def _parse_locks(self) -> t.List[exp.Expression]: 2691 # Lists are invariant, so we need to use a type hint here 2692 locks: t.List[exp.Expression] = [] 2693 2694 while True: 2695 if self._match_text_seq("FOR", "UPDATE"): 2696 update = True 2697 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2698 "LOCK", "IN", "SHARE", "MODE" 2699 ): 2700 update = False 2701 else: 2702 break 2703 2704 expressions = None 2705 if self._match_text_seq("OF"): 2706 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2707 2708 wait: t.Optional[bool | exp.Expression] = None 2709 if self._match_text_seq("NOWAIT"): 2710 wait = True 2711 elif self._match_text_seq("WAIT"): 2712 wait = self._parse_primary() 2713 elif self._match_text_seq("SKIP", "LOCKED"): 2714 wait = False 2715 2716 locks.append( 2717 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2718 ) 2719 2720 return locks 2721 2722 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2723 if not self._match_set(self.SET_OPERATIONS): 2724 return this 2725 2726 token_type = self._prev.token_type 2727 2728 if token_type == TokenType.UNION: 2729 expression = exp.Union 2730 elif token_type == TokenType.EXCEPT: 2731 expression = exp.Except 2732 else: 2733 expression = exp.Intersect 2734 2735 return self.expression( 2736 expression, 2737 this=this, 2738 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2739 expression=self._parse_set_operations(self._parse_select(nested=True)), 2740 ) 2741 2742 def _parse_expression(self) -> t.Optional[exp.Expression]: 2743 return self._parse_alias(self._parse_conjunction()) 2744 2745 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2746 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2747 2748 def _parse_equality(self) -> t.Optional[exp.Expression]: 2749 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2750 2751 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2752 return self._parse_tokens(self._parse_range, self.COMPARISON) 2753 2754 def _parse_range(self) -> t.Optional[exp.Expression]: 2755 this = self._parse_bitwise() 2756 negate = self._match(TokenType.NOT) 2757 2758 if self._match_set(self.RANGE_PARSERS): 2759 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2760 if not expression: 2761 return this 2762 2763 this = expression 2764 elif self._match(TokenType.ISNULL): 2765 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2766 2767 # Postgres supports ISNULL and NOTNULL for conditions. 2768 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2769 if self._match(TokenType.NOTNULL): 2770 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2771 this = self.expression(exp.Not, this=this) 2772 2773 if negate: 2774 this = self.expression(exp.Not, this=this) 2775 2776 if self._match(TokenType.IS): 2777 this = self._parse_is(this) 2778 2779 return this 2780 2781 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2782 index = self._index - 1 2783 negate = self._match(TokenType.NOT) 2784 if self._match_text_seq("DISTINCT", "FROM"): 2785 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2786 return self.expression(klass, this=this, expression=self._parse_expression()) 2787 2788 expression = self._parse_null() or self._parse_boolean() 2789 if not expression: 2790 self._retreat(index) 2791 return None 2792 2793 this = self.expression(exp.Is, this=this, expression=expression) 2794 return self.expression(exp.Not, this=this) if negate else this 2795 2796 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2797 unnest = self._parse_unnest() 2798 if unnest: 2799 this = self.expression(exp.In, this=this, unnest=unnest) 2800 elif self._match(TokenType.L_PAREN): 2801 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2802 2803 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2804 this = self.expression(exp.In, this=this, query=expressions[0]) 2805 else: 2806 this = self.expression(exp.In, this=this, expressions=expressions) 2807 2808 self._match_r_paren(this) 2809 else: 2810 this = self.expression(exp.In, this=this, field=self._parse_field()) 2811 2812 return this 2813 2814 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2815 low = self._parse_bitwise() 2816 self._match(TokenType.AND) 2817 high = self._parse_bitwise() 2818 return self.expression(exp.Between, this=this, low=low, high=high) 2819 2820 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2821 if not self._match(TokenType.ESCAPE): 2822 return this 2823 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2824 2825 def _parse_interval(self) -> t.Optional[exp.Expression]: 2826 if not self._match(TokenType.INTERVAL): 2827 return None 2828 2829 this = self._parse_primary() or self._parse_term() 2830 unit = self._parse_function() or self._parse_var() 2831 2832 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2833 # each INTERVAL expression into this canonical form so it's easy to transpile 2834 if this and this.is_number: 2835 this = exp.Literal.string(this.name) 2836 elif this and this.is_string: 2837 parts = this.name.split() 2838 2839 if len(parts) == 2: 2840 if unit: 2841 # this is not actually a unit, it's something else 2842 unit = None 2843 self._retreat(self._index - 1) 2844 else: 2845 this = exp.Literal.string(parts[0]) 2846 unit = self.expression(exp.Var, this=parts[1]) 2847 2848 return self.expression(exp.Interval, this=this, unit=unit) 2849 2850 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2851 this = self._parse_term() 2852 2853 while True: 2854 if self._match_set(self.BITWISE): 2855 this = self.expression( 2856 self.BITWISE[self._prev.token_type], 2857 this=this, 2858 expression=self._parse_term(), 2859 ) 2860 elif self._match_pair(TokenType.LT, TokenType.LT): 2861 this = self.expression( 2862 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2863 ) 2864 elif self._match_pair(TokenType.GT, TokenType.GT): 2865 this = self.expression( 2866 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2867 ) 2868 else: 2869 break 2870 2871 return this 2872 2873 def _parse_term(self) -> t.Optional[exp.Expression]: 2874 return self._parse_tokens(self._parse_factor, self.TERM) 2875 2876 def _parse_factor(self) -> t.Optional[exp.Expression]: 2877 return self._parse_tokens(self._parse_unary, self.FACTOR) 2878 2879 def _parse_unary(self) -> t.Optional[exp.Expression]: 2880 if self._match_set(self.UNARY_PARSERS): 2881 return self.UNARY_PARSERS[self._prev.token_type](self) 2882 return self._parse_at_time_zone(self._parse_type()) 2883 2884 def _parse_type(self) -> t.Optional[exp.Expression]: 2885 interval = self._parse_interval() 2886 if interval: 2887 return interval 2888 2889 index = self._index 2890 data_type = self._parse_types(check_func=True) 2891 this = self._parse_column() 2892 2893 if data_type: 2894 if isinstance(this, exp.Literal): 2895 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2896 if parser: 2897 return parser(self, this, data_type) 2898 return self.expression(exp.Cast, this=this, to=data_type) 2899 if not data_type.expressions: 2900 self._retreat(index) 2901 return self._parse_column() 2902 return self._parse_column_ops(data_type) 2903 2904 return this 2905 2906 def _parse_type_size(self) -> t.Optional[exp.Expression]: 2907 this = self._parse_type() 2908 if not this: 2909 return None 2910 2911 return self.expression( 2912 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2913 ) 2914 2915 def _parse_types( 2916 self, check_func: bool = False, schema: bool = False 2917 ) -> t.Optional[exp.Expression]: 2918 index = self._index 2919 2920 prefix = self._match_text_seq("SYSUDTLIB", ".") 2921 2922 if not self._match_set(self.TYPE_TOKENS): 2923 return None 2924 2925 type_token = self._prev.token_type 2926 2927 if type_token == TokenType.PSEUDO_TYPE: 2928 return self.expression(exp.PseudoType, this=self._prev.text) 2929 2930 nested = type_token in self.NESTED_TYPE_TOKENS 2931 is_struct = type_token == TokenType.STRUCT 2932 expressions = None 2933 maybe_func = False 2934 2935 if self._match(TokenType.L_PAREN): 2936 if is_struct: 2937 expressions = self._parse_csv(self._parse_struct_types) 2938 elif nested: 2939 expressions = self._parse_csv( 2940 lambda: self._parse_types(check_func=check_func, schema=schema) 2941 ) 2942 else: 2943 expressions = self._parse_csv(self._parse_type_size) 2944 2945 if not expressions or not self._match(TokenType.R_PAREN): 2946 self._retreat(index) 2947 return None 2948 2949 maybe_func = True 2950 2951 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2952 this = exp.DataType( 2953 this=exp.DataType.Type.ARRAY, 2954 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2955 nested=True, 2956 ) 2957 2958 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2959 this = exp.DataType( 2960 this=exp.DataType.Type.ARRAY, 2961 expressions=[this], 2962 nested=True, 2963 ) 2964 2965 return this 2966 2967 if self._match(TokenType.L_BRACKET): 2968 self._retreat(index) 2969 return None 2970 2971 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2972 if nested and self._match(TokenType.LT): 2973 if is_struct: 2974 expressions = self._parse_csv(self._parse_struct_types) 2975 else: 2976 expressions = self._parse_csv( 2977 lambda: self._parse_types(check_func=check_func, schema=schema) 2978 ) 2979 2980 if not self._match(TokenType.GT): 2981 self.raise_error("Expecting >") 2982 2983 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2984 values = self._parse_csv(self._parse_conjunction) 2985 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2986 2987 value: t.Optional[exp.Expression] = None 2988 if type_token in self.TIMESTAMPS: 2989 if self._match_text_seq("WITH", "TIME", "ZONE") or type_token == TokenType.TIMESTAMPTZ: 2990 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2991 elif ( 2992 self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE") 2993 or type_token == TokenType.TIMESTAMPLTZ 2994 ): 2995 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2996 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 2997 if type_token == TokenType.TIME: 2998 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2999 else: 3000 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 3001 3002 maybe_func = maybe_func and value is None 3003 3004 if value is None: 3005 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 3006 elif type_token == TokenType.INTERVAL: 3007 unit = self._parse_var() 3008 3009 if not unit: 3010 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3011 else: 3012 value = self.expression(exp.Interval, unit=unit) 3013 3014 if maybe_func and check_func: 3015 index2 = self._index 3016 peek = self._parse_string() 3017 3018 if not peek: 3019 self._retreat(index) 3020 return None 3021 3022 self._retreat(index2) 3023 3024 if value: 3025 return value 3026 3027 return exp.DataType( 3028 this=exp.DataType.Type[type_token.value.upper()], 3029 expressions=expressions, 3030 nested=nested, 3031 values=values, 3032 prefix=prefix, 3033 ) 3034 3035 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3036 this = self._parse_type() or self._parse_id_var() 3037 self._match(TokenType.COLON) 3038 return self._parse_column_def(this) 3039 3040 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3041 if not self._match_text_seq("AT", "TIME", "ZONE"): 3042 return this 3043 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3044 3045 def _parse_column(self) -> t.Optional[exp.Expression]: 3046 this = self._parse_field() 3047 if isinstance(this, exp.Identifier): 3048 this = self.expression(exp.Column, this=this) 3049 elif not this: 3050 return self._parse_bracket(this) 3051 return self._parse_column_ops(this) 3052 3053 def _parse_column_ops(self, this: exp.Expression) -> exp.Expression: 3054 this = self._parse_bracket(this) 3055 3056 while self._match_set(self.COLUMN_OPERATORS): 3057 op_token = self._prev.token_type 3058 op = self.COLUMN_OPERATORS.get(op_token) 3059 3060 if op_token == TokenType.DCOLON: 3061 field = self._parse_types() 3062 if not field: 3063 self.raise_error("Expected type") 3064 elif op and self._curr: 3065 self._advance() 3066 value = self._prev.text 3067 field = ( 3068 exp.Literal.number(value) 3069 if self._prev.token_type == TokenType.NUMBER 3070 else exp.Literal.string(value) 3071 ) 3072 else: 3073 field = self._parse_field(anonymous_func=True) 3074 3075 if isinstance(field, exp.Func): 3076 # bigquery allows function calls like x.y.count(...) 3077 # SAFE.SUBSTR(...) 3078 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3079 this = self._replace_columns_with_dots(this) 3080 3081 if op: 3082 this = op(self, this, field) 3083 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3084 this = self.expression( 3085 exp.Column, 3086 this=field, 3087 table=this.this, 3088 db=this.args.get("table"), 3089 catalog=this.args.get("db"), 3090 ) 3091 else: 3092 this = self.expression(exp.Dot, this=this, expression=field) 3093 this = self._parse_bracket(this) 3094 return this 3095 3096 def _parse_primary(self) -> t.Optional[exp.Expression]: 3097 if self._match_set(self.PRIMARY_PARSERS): 3098 token_type = self._prev.token_type 3099 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3100 3101 if token_type == TokenType.STRING: 3102 expressions = [primary] 3103 while self._match(TokenType.STRING): 3104 expressions.append(exp.Literal.string(self._prev.text)) 3105 if len(expressions) > 1: 3106 return self.expression(exp.Concat, expressions=expressions) 3107 return primary 3108 3109 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3110 return exp.Literal.number(f"0.{self._prev.text}") 3111 3112 if self._match(TokenType.L_PAREN): 3113 comments = self._prev_comments 3114 query = self._parse_select() 3115 3116 if query: 3117 expressions = [query] 3118 else: 3119 expressions = self._parse_csv(self._parse_expression) 3120 3121 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3122 3123 if isinstance(this, exp.Subqueryable): 3124 this = self._parse_set_operations( 3125 self._parse_subquery(this=this, parse_alias=False) 3126 ) 3127 elif len(expressions) > 1: 3128 this = self.expression(exp.Tuple, expressions=expressions) 3129 else: 3130 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3131 3132 if this: 3133 this.add_comments(comments) 3134 self._match_r_paren(expression=this) 3135 3136 return this 3137 3138 return None 3139 3140 def _parse_field( 3141 self, 3142 any_token: bool = False, 3143 tokens: t.Optional[t.Collection[TokenType]] = None, 3144 anonymous_func: bool = False, 3145 ) -> t.Optional[exp.Expression]: 3146 return ( 3147 self._parse_primary() 3148 or self._parse_function(anonymous=anonymous_func) 3149 or self._parse_id_var(any_token=any_token, tokens=tokens) 3150 ) 3151 3152 def _parse_function( 3153 self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False 3154 ) -> t.Optional[exp.Expression]: 3155 if not self._curr: 3156 return None 3157 3158 token_type = self._curr.token_type 3159 3160 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3161 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3162 3163 if not self._next or self._next.token_type != TokenType.L_PAREN: 3164 if token_type in self.NO_PAREN_FUNCTIONS: 3165 self._advance() 3166 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3167 3168 return None 3169 3170 if token_type not in self.FUNC_TOKENS: 3171 return None 3172 3173 this = self._curr.text 3174 upper = this.upper() 3175 self._advance(2) 3176 3177 parser = self.FUNCTION_PARSERS.get(upper) 3178 3179 if parser and not anonymous: 3180 this = parser(self) 3181 else: 3182 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3183 3184 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3185 this = self.expression(subquery_predicate, this=self._parse_select()) 3186 self._match_r_paren() 3187 return this 3188 3189 if functions is None: 3190 functions = self.FUNCTIONS 3191 3192 function = functions.get(upper) 3193 3194 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3195 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3196 3197 if function and not anonymous: 3198 this = function(args) 3199 self.validate_expression(this, args) 3200 else: 3201 this = self.expression(exp.Anonymous, this=this, expressions=args) 3202 3203 self._match_r_paren(this) 3204 return self._parse_window(this) 3205 3206 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3207 return self._parse_column_def(self._parse_id_var()) 3208 3209 def _parse_user_defined_function( 3210 self, kind: t.Optional[TokenType] = None 3211 ) -> t.Optional[exp.Expression]: 3212 this = self._parse_id_var() 3213 3214 while self._match(TokenType.DOT): 3215 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3216 3217 if not self._match(TokenType.L_PAREN): 3218 return this 3219 3220 expressions = self._parse_csv(self._parse_function_parameter) 3221 self._match_r_paren() 3222 return self.expression( 3223 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3224 ) 3225 3226 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 3227 literal = self._parse_primary() 3228 if literal: 3229 return self.expression(exp.Introducer, this=token.text, expression=literal) 3230 3231 return self.expression(exp.Identifier, this=token.text) 3232 3233 def _parse_session_parameter(self) -> exp.Expression: 3234 kind = None 3235 this = self._parse_id_var() or self._parse_primary() 3236 3237 if this and self._match(TokenType.DOT): 3238 kind = this.name 3239 this = self._parse_var() or self._parse_primary() 3240 3241 return self.expression(exp.SessionParameter, this=this, kind=kind) 3242 3243 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3244 index = self._index 3245 3246 if self._match(TokenType.L_PAREN): 3247 expressions = self._parse_csv(self._parse_id_var) 3248 3249 if not self._match(TokenType.R_PAREN): 3250 self._retreat(index) 3251 else: 3252 expressions = [self._parse_id_var()] 3253 3254 if self._match_set(self.LAMBDAS): 3255 return self.LAMBDAS[self._prev.token_type](self, expressions) 3256 3257 self._retreat(index) 3258 3259 this: t.Optional[exp.Expression] 3260 3261 if self._match(TokenType.DISTINCT): 3262 this = self.expression( 3263 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3264 ) 3265 else: 3266 this = self._parse_select_or_expression(alias=alias) 3267 3268 if isinstance(this, exp.EQ): 3269 left = this.this 3270 if isinstance(left, exp.Column): 3271 left.replace(exp.Var(this=left.text("this"))) 3272 3273 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3274 3275 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3276 index = self._index 3277 3278 if not self.errors: 3279 try: 3280 if self._parse_select(nested=True): 3281 return this 3282 except ParseError: 3283 pass 3284 finally: 3285 self.errors.clear() 3286 self._retreat(index) 3287 3288 if not self._match(TokenType.L_PAREN): 3289 return this 3290 3291 args = self._parse_csv( 3292 lambda: self._parse_constraint() 3293 or self._parse_column_def(self._parse_field(any_token=True)) 3294 ) 3295 self._match_r_paren() 3296 return self.expression(exp.Schema, this=this, expressions=args) 3297 3298 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3299 # column defs are not really columns, they're identifiers 3300 if isinstance(this, exp.Column): 3301 this = this.this 3302 kind = self._parse_types(schema=True) 3303 3304 if self._match_text_seq("FOR", "ORDINALITY"): 3305 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3306 3307 constraints = [] 3308 while True: 3309 constraint = self._parse_column_constraint() 3310 if not constraint: 3311 break 3312 constraints.append(constraint) 3313 3314 if not kind and not constraints: 3315 return this 3316 3317 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3318 3319 def _parse_auto_increment(self) -> exp.Expression: 3320 start = None 3321 increment = None 3322 3323 if self._match(TokenType.L_PAREN, advance=False): 3324 args = self._parse_wrapped_csv(self._parse_bitwise) 3325 start = seq_get(args, 0) 3326 increment = seq_get(args, 1) 3327 elif self._match_text_seq("START"): 3328 start = self._parse_bitwise() 3329 self._match_text_seq("INCREMENT") 3330 increment = self._parse_bitwise() 3331 3332 if start and increment: 3333 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3334 3335 return exp.AutoIncrementColumnConstraint() 3336 3337 def _parse_compress(self) -> exp.Expression: 3338 if self._match(TokenType.L_PAREN, advance=False): 3339 return self.expression( 3340 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3341 ) 3342 3343 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3344 3345 def _parse_generated_as_identity(self) -> exp.Expression: 3346 if self._match_text_seq("BY", "DEFAULT"): 3347 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3348 this = self.expression( 3349 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3350 ) 3351 else: 3352 self._match_text_seq("ALWAYS") 3353 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3354 3355 self._match(TokenType.ALIAS) 3356 identity = self._match_text_seq("IDENTITY") 3357 3358 if self._match(TokenType.L_PAREN): 3359 if self._match_text_seq("START", "WITH"): 3360 this.set("start", self._parse_bitwise()) 3361 if self._match_text_seq("INCREMENT", "BY"): 3362 this.set("increment", self._parse_bitwise()) 3363 if self._match_text_seq("MINVALUE"): 3364 this.set("minvalue", self._parse_bitwise()) 3365 if self._match_text_seq("MAXVALUE"): 3366 this.set("maxvalue", self._parse_bitwise()) 3367 3368 if self._match_text_seq("CYCLE"): 3369 this.set("cycle", True) 3370 elif self._match_text_seq("NO", "CYCLE"): 3371 this.set("cycle", False) 3372 3373 if not identity: 3374 this.set("expression", self._parse_bitwise()) 3375 3376 self._match_r_paren() 3377 3378 return this 3379 3380 def _parse_inline(self) -> t.Optional[exp.Expression]: 3381 self._match_text_seq("LENGTH") 3382 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3383 3384 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 3385 if self._match_text_seq("NULL"): 3386 return self.expression(exp.NotNullColumnConstraint) 3387 if self._match_text_seq("CASESPECIFIC"): 3388 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3389 return None 3390 3391 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3392 if self._match(TokenType.CONSTRAINT): 3393 this = self._parse_id_var() 3394 else: 3395 this = None 3396 3397 if self._match_texts(self.CONSTRAINT_PARSERS): 3398 return self.expression( 3399 exp.ColumnConstraint, 3400 this=this, 3401 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3402 ) 3403 3404 return this 3405 3406 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3407 if not self._match(TokenType.CONSTRAINT): 3408 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3409 3410 this = self._parse_id_var() 3411 expressions = [] 3412 3413 while True: 3414 constraint = self._parse_unnamed_constraint() or self._parse_function() 3415 if not constraint: 3416 break 3417 expressions.append(constraint) 3418 3419 return self.expression(exp.Constraint, this=this, expressions=expressions) 3420 3421 def _parse_unnamed_constraint( 3422 self, constraints: t.Optional[t.Collection[str]] = None 3423 ) -> t.Optional[exp.Expression]: 3424 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3425 return None 3426 3427 constraint = self._prev.text.upper() 3428 if constraint not in self.CONSTRAINT_PARSERS: 3429 self.raise_error(f"No parser found for schema constraint {constraint}.") 3430 3431 return self.CONSTRAINT_PARSERS[constraint](self) 3432 3433 def _parse_unique(self) -> exp.Expression: 3434 self._match_text_seq("KEY") 3435 return self.expression( 3436 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3437 ) 3438 3439 def _parse_key_constraint_options(self) -> t.List[str]: 3440 options = [] 3441 while True: 3442 if not self._curr: 3443 break 3444 3445 if self._match(TokenType.ON): 3446 action = None 3447 on = self._advance_any() and self._prev.text 3448 3449 if self._match_text_seq("NO", "ACTION"): 3450 action = "NO ACTION" 3451 elif self._match_text_seq("CASCADE"): 3452 action = "CASCADE" 3453 elif self._match_pair(TokenType.SET, TokenType.NULL): 3454 action = "SET NULL" 3455 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3456 action = "SET DEFAULT" 3457 else: 3458 self.raise_error("Invalid key constraint") 3459 3460 options.append(f"ON {on} {action}") 3461 elif self._match_text_seq("NOT", "ENFORCED"): 3462 options.append("NOT ENFORCED") 3463 elif self._match_text_seq("DEFERRABLE"): 3464 options.append("DEFERRABLE") 3465 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3466 options.append("INITIALLY DEFERRED") 3467 elif self._match_text_seq("NORELY"): 3468 options.append("NORELY") 3469 elif self._match_text_seq("MATCH", "FULL"): 3470 options.append("MATCH FULL") 3471 else: 3472 break 3473 3474 return options 3475 3476 def _parse_references(self, match: bool = True) -> t.Optional[exp.Expression]: 3477 if match and not self._match(TokenType.REFERENCES): 3478 return None 3479 3480 expressions = None 3481 this = self._parse_id_var() 3482 3483 if self._match(TokenType.L_PAREN, advance=False): 3484 expressions = self._parse_wrapped_id_vars() 3485 3486 options = self._parse_key_constraint_options() 3487 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3488 3489 def _parse_foreign_key(self) -> exp.Expression: 3490 expressions = self._parse_wrapped_id_vars() 3491 reference = self._parse_references() 3492 options = {} 3493 3494 while self._match(TokenType.ON): 3495 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3496 self.raise_error("Expected DELETE or UPDATE") 3497 3498 kind = self._prev.text.lower() 3499 3500 if self._match_text_seq("NO", "ACTION"): 3501 action = "NO ACTION" 3502 elif self._match(TokenType.SET): 3503 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3504 action = "SET " + self._prev.text.upper() 3505 else: 3506 self._advance() 3507 action = self._prev.text.upper() 3508 3509 options[kind] = action 3510 3511 return self.expression( 3512 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3513 ) 3514 3515 def _parse_primary_key( 3516 self, wrapped_optional: bool = False, in_props: bool = False 3517 ) -> exp.Expression: 3518 desc = ( 3519 self._match_set((TokenType.ASC, TokenType.DESC)) 3520 and self._prev.token_type == TokenType.DESC 3521 ) 3522 3523 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3524 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3525 3526 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3527 options = self._parse_key_constraint_options() 3528 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3529 3530 @t.overload 3531 def _parse_bracket(self, this: exp.Expression) -> exp.Expression: 3532 ... 3533 3534 @t.overload 3535 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3536 ... 3537 3538 def _parse_bracket(self, this): 3539 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3540 return this 3541 3542 bracket_kind = self._prev.token_type 3543 expressions: t.List[t.Optional[exp.Expression]] 3544 3545 if self._match(TokenType.COLON): 3546 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3547 else: 3548 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3549 3550 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3551 if bracket_kind == TokenType.L_BRACE: 3552 this = self.expression(exp.Struct, expressions=expressions) 3553 elif not this or this.name.upper() == "ARRAY": 3554 this = self.expression(exp.Array, expressions=expressions) 3555 else: 3556 expressions = apply_index_offset(this, expressions, -self.index_offset) 3557 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3558 3559 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3560 self.raise_error("Expected ]") 3561 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3562 self.raise_error("Expected }") 3563 3564 self._add_comments(this) 3565 return self._parse_bracket(this) 3566 3567 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3568 if self._match(TokenType.COLON): 3569 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3570 return this 3571 3572 def _parse_case(self) -> t.Optional[exp.Expression]: 3573 ifs = [] 3574 default = None 3575 3576 expression = self._parse_conjunction() 3577 3578 while self._match(TokenType.WHEN): 3579 this = self._parse_conjunction() 3580 self._match(TokenType.THEN) 3581 then = self._parse_conjunction() 3582 ifs.append(self.expression(exp.If, this=this, true=then)) 3583 3584 if self._match(TokenType.ELSE): 3585 default = self._parse_conjunction() 3586 3587 if not self._match(TokenType.END): 3588 self.raise_error("Expected END after CASE", self._prev) 3589 3590 return self._parse_window( 3591 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3592 ) 3593 3594 def _parse_if(self) -> t.Optional[exp.Expression]: 3595 if self._match(TokenType.L_PAREN): 3596 args = self._parse_csv(self._parse_conjunction) 3597 this = exp.If.from_arg_list(args) 3598 self.validate_expression(this, args) 3599 self._match_r_paren() 3600 else: 3601 index = self._index - 1 3602 condition = self._parse_conjunction() 3603 3604 if not condition: 3605 self._retreat(index) 3606 return None 3607 3608 self._match(TokenType.THEN) 3609 true = self._parse_conjunction() 3610 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3611 self._match(TokenType.END) 3612 this = self.expression(exp.If, this=condition, true=true, false=false) 3613 3614 return self._parse_window(this) 3615 3616 def _parse_extract(self) -> exp.Expression: 3617 this = self._parse_function() or self._parse_var() or self._parse_type() 3618 3619 if self._match(TokenType.FROM): 3620 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3621 3622 if not self._match(TokenType.COMMA): 3623 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3624 3625 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3626 3627 def _parse_cast(self, strict: bool) -> exp.Expression: 3628 this = self._parse_conjunction() 3629 3630 if not self._match(TokenType.ALIAS): 3631 if self._match(TokenType.COMMA): 3632 return self.expression( 3633 exp.CastToStrType, this=this, expression=self._parse_string() 3634 ) 3635 else: 3636 self.raise_error("Expected AS after CAST") 3637 3638 to = self._parse_types() 3639 3640 if not to: 3641 self.raise_error("Expected TYPE after CAST") 3642 elif to.this == exp.DataType.Type.CHAR: 3643 if self._match(TokenType.CHARACTER_SET): 3644 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3645 3646 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3647 3648 def _parse_string_agg(self) -> exp.Expression: 3649 expression: t.Optional[exp.Expression] 3650 3651 if self._match(TokenType.DISTINCT): 3652 args = self._parse_csv(self._parse_conjunction) 3653 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3654 else: 3655 args = self._parse_csv(self._parse_conjunction) 3656 expression = seq_get(args, 0) 3657 3658 index = self._index 3659 if not self._match(TokenType.R_PAREN): 3660 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3661 order = self._parse_order(this=expression) 3662 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3663 3664 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3665 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3666 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3667 if not self._match_text_seq("WITHIN", "GROUP"): 3668 self._retreat(index) 3669 this = exp.GroupConcat.from_arg_list(args) 3670 self.validate_expression(this, args) 3671 return this 3672 3673 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3674 order = self._parse_order(this=expression) 3675 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3676 3677 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3678 to: t.Optional[exp.Expression] 3679 this = self._parse_bitwise() 3680 3681 if self._match(TokenType.USING): 3682 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3683 elif self._match(TokenType.COMMA): 3684 to = self._parse_bitwise() 3685 else: 3686 to = None 3687 3688 # Swap the argument order if needed to produce the correct AST 3689 if self.CONVERT_TYPE_FIRST: 3690 this, to = to, this 3691 3692 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3693 3694 def _parse_decode(self) -> t.Optional[exp.Expression]: 3695 """ 3696 There are generally two variants of the DECODE function: 3697 3698 - DECODE(bin, charset) 3699 - DECODE(expression, search, result [, search, result] ... [, default]) 3700 3701 The second variant will always be parsed into a CASE expression. Note that NULL 3702 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3703 instead of relying on pattern matching. 3704 """ 3705 args = self._parse_csv(self._parse_conjunction) 3706 3707 if len(args) < 3: 3708 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3709 3710 expression, *expressions = args 3711 if not expression: 3712 return None 3713 3714 ifs = [] 3715 for search, result in zip(expressions[::2], expressions[1::2]): 3716 if not search or not result: 3717 return None 3718 3719 if isinstance(search, exp.Literal): 3720 ifs.append( 3721 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3722 ) 3723 elif isinstance(search, exp.Null): 3724 ifs.append( 3725 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3726 ) 3727 else: 3728 cond = exp.or_( 3729 exp.EQ(this=expression.copy(), expression=search), 3730 exp.and_( 3731 exp.Is(this=expression.copy(), expression=exp.Null()), 3732 exp.Is(this=search.copy(), expression=exp.Null()), 3733 copy=False, 3734 ), 3735 copy=False, 3736 ) 3737 ifs.append(exp.If(this=cond, true=result)) 3738 3739 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3740 3741 def _parse_json_key_value(self) -> t.Optional[exp.Expression]: 3742 self._match_text_seq("KEY") 3743 key = self._parse_field() 3744 self._match(TokenType.COLON) 3745 self._match_text_seq("VALUE") 3746 value = self._parse_field() 3747 if not key and not value: 3748 return None 3749 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3750 3751 def _parse_json_object(self) -> exp.Expression: 3752 expressions = self._parse_csv(self._parse_json_key_value) 3753 3754 null_handling = None 3755 if self._match_text_seq("NULL", "ON", "NULL"): 3756 null_handling = "NULL ON NULL" 3757 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3758 null_handling = "ABSENT ON NULL" 3759 3760 unique_keys = None 3761 if self._match_text_seq("WITH", "UNIQUE"): 3762 unique_keys = True 3763 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3764 unique_keys = False 3765 3766 self._match_text_seq("KEYS") 3767 3768 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3769 format_json = self._match_text_seq("FORMAT", "JSON") 3770 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3771 3772 return self.expression( 3773 exp.JSONObject, 3774 expressions=expressions, 3775 null_handling=null_handling, 3776 unique_keys=unique_keys, 3777 return_type=return_type, 3778 format_json=format_json, 3779 encoding=encoding, 3780 ) 3781 3782 def _parse_logarithm(self) -> exp.Expression: 3783 # Default argument order is base, expression 3784 args = self._parse_csv(self._parse_range) 3785 3786 if len(args) > 1: 3787 if not self.LOG_BASE_FIRST: 3788 args.reverse() 3789 return exp.Log.from_arg_list(args) 3790 3791 return self.expression( 3792 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3793 ) 3794 3795 def _parse_match_against(self) -> exp.Expression: 3796 expressions = self._parse_csv(self._parse_column) 3797 3798 self._match_text_seq(")", "AGAINST", "(") 3799 3800 this = self._parse_string() 3801 3802 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3803 modifier = "IN NATURAL LANGUAGE MODE" 3804 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3805 modifier = f"{modifier} WITH QUERY EXPANSION" 3806 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3807 modifier = "IN BOOLEAN MODE" 3808 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3809 modifier = "WITH QUERY EXPANSION" 3810 else: 3811 modifier = None 3812 3813 return self.expression( 3814 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3815 ) 3816 3817 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3818 def _parse_open_json(self) -> exp.Expression: 3819 this = self._parse_bitwise() 3820 path = self._match(TokenType.COMMA) and self._parse_string() 3821 3822 def _parse_open_json_column_def() -> exp.Expression: 3823 this = self._parse_field(any_token=True) 3824 kind = self._parse_types() 3825 path = self._parse_string() 3826 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3827 return self.expression( 3828 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3829 ) 3830 3831 expressions = None 3832 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3833 self._match_l_paren() 3834 expressions = self._parse_csv(_parse_open_json_column_def) 3835 3836 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3837 3838 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3839 args = self._parse_csv(self._parse_bitwise) 3840 3841 if self._match(TokenType.IN): 3842 return self.expression( 3843 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3844 ) 3845 3846 if haystack_first: 3847 haystack = seq_get(args, 0) 3848 needle = seq_get(args, 1) 3849 else: 3850 needle = seq_get(args, 0) 3851 haystack = seq_get(args, 1) 3852 3853 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3854 3855 self.validate_expression(this, args) 3856 3857 return this 3858 3859 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3860 args = self._parse_csv(self._parse_table) 3861 return exp.JoinHint(this=func_name.upper(), expressions=args) 3862 3863 def _parse_substring(self) -> exp.Expression: 3864 # Postgres supports the form: substring(string [from int] [for int]) 3865 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3866 3867 args = self._parse_csv(self._parse_bitwise) 3868 3869 if self._match(TokenType.FROM): 3870 args.append(self._parse_bitwise()) 3871 if self._match(TokenType.FOR): 3872 args.append(self._parse_bitwise()) 3873 3874 this = exp.Substring.from_arg_list(args) 3875 self.validate_expression(this, args) 3876 3877 return this 3878 3879 def _parse_trim(self) -> exp.Expression: 3880 # https://www.w3resource.com/sql/character-functions/trim.php 3881 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3882 3883 position = None 3884 collation = None 3885 3886 if self._match_texts(self.TRIM_TYPES): 3887 position = self._prev.text.upper() 3888 3889 expression = self._parse_bitwise() 3890 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3891 this = self._parse_bitwise() 3892 else: 3893 this = expression 3894 expression = None 3895 3896 if self._match(TokenType.COLLATE): 3897 collation = self._parse_bitwise() 3898 3899 return self.expression( 3900 exp.Trim, 3901 this=this, 3902 position=position, 3903 expression=expression, 3904 collation=collation, 3905 ) 3906 3907 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3908 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3909 3910 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3911 return self._parse_window(self._parse_id_var(), alias=True) 3912 3913 def _parse_respect_or_ignore_nulls( 3914 self, this: t.Optional[exp.Expression] 3915 ) -> t.Optional[exp.Expression]: 3916 if self._match_text_seq("IGNORE", "NULLS"): 3917 return self.expression(exp.IgnoreNulls, this=this) 3918 if self._match_text_seq("RESPECT", "NULLS"): 3919 return self.expression(exp.RespectNulls, this=this) 3920 return this 3921 3922 def _parse_window( 3923 self, this: t.Optional[exp.Expression], alias: bool = False 3924 ) -> t.Optional[exp.Expression]: 3925 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3926 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3927 self._match_r_paren() 3928 3929 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3930 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3931 if self._match_text_seq("WITHIN", "GROUP"): 3932 order = self._parse_wrapped(self._parse_order) 3933 this = self.expression(exp.WithinGroup, this=this, expression=order) 3934 3935 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3936 # Some dialects choose to implement and some do not. 3937 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3938 3939 # There is some code above in _parse_lambda that handles 3940 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3941 3942 # The below changes handle 3943 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3944 3945 # Oracle allows both formats 3946 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3947 # and Snowflake chose to do the same for familiarity 3948 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3949 this = self._parse_respect_or_ignore_nulls(this) 3950 3951 # bigquery select from window x AS (partition by ...) 3952 if alias: 3953 over = None 3954 self._match(TokenType.ALIAS) 3955 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 3956 return this 3957 else: 3958 over = self._prev.text.upper() 3959 3960 if not self._match(TokenType.L_PAREN): 3961 return self.expression( 3962 exp.Window, this=this, alias=self._parse_id_var(False), over=over 3963 ) 3964 3965 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3966 3967 first = self._match(TokenType.FIRST) 3968 if self._match_text_seq("LAST"): 3969 first = False 3970 3971 partition = self._parse_partition_by() 3972 order = self._parse_order() 3973 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3974 3975 if kind: 3976 self._match(TokenType.BETWEEN) 3977 start = self._parse_window_spec() 3978 self._match(TokenType.AND) 3979 end = self._parse_window_spec() 3980 3981 spec = self.expression( 3982 exp.WindowSpec, 3983 kind=kind, 3984 start=start["value"], 3985 start_side=start["side"], 3986 end=end["value"], 3987 end_side=end["side"], 3988 ) 3989 else: 3990 spec = None 3991 3992 self._match_r_paren() 3993 3994 return self.expression( 3995 exp.Window, 3996 this=this, 3997 partition_by=partition, 3998 order=order, 3999 spec=spec, 4000 alias=window_alias, 4001 over=over, 4002 first=first, 4003 ) 4004 4005 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4006 self._match(TokenType.BETWEEN) 4007 4008 return { 4009 "value": ( 4010 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4011 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4012 or self._parse_bitwise() 4013 ), 4014 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4015 } 4016 4017 def _parse_alias( 4018 self, this: t.Optional[exp.Expression], explicit: bool = False 4019 ) -> t.Optional[exp.Expression]: 4020 any_token = self._match(TokenType.ALIAS) 4021 4022 if explicit and not any_token: 4023 return this 4024 4025 if self._match(TokenType.L_PAREN): 4026 aliases = self.expression( 4027 exp.Aliases, 4028 this=this, 4029 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4030 ) 4031 self._match_r_paren(aliases) 4032 return aliases 4033 4034 alias = self._parse_id_var(any_token) 4035 4036 if alias: 4037 return self.expression(exp.Alias, this=this, alias=alias) 4038 4039 return this 4040 4041 def _parse_id_var( 4042 self, 4043 any_token: bool = True, 4044 tokens: t.Optional[t.Collection[TokenType]] = None, 4045 ) -> t.Optional[exp.Expression]: 4046 identifier = self._parse_identifier() 4047 4048 if identifier: 4049 return identifier 4050 4051 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4052 quoted = self._prev.token_type == TokenType.STRING 4053 return exp.Identifier(this=self._prev.text, quoted=quoted) 4054 4055 return None 4056 4057 def _parse_string(self) -> t.Optional[exp.Expression]: 4058 if self._match(TokenType.STRING): 4059 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4060 return self._parse_placeholder() 4061 4062 def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]: 4063 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4064 4065 def _parse_number(self) -> t.Optional[exp.Expression]: 4066 if self._match(TokenType.NUMBER): 4067 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4068 return self._parse_placeholder() 4069 4070 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4071 if self._match(TokenType.IDENTIFIER): 4072 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4073 return self._parse_placeholder() 4074 4075 def _parse_var( 4076 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4077 ) -> t.Optional[exp.Expression]: 4078 if ( 4079 (any_token and self._advance_any()) 4080 or self._match(TokenType.VAR) 4081 or (self._match_set(tokens) if tokens else False) 4082 ): 4083 return self.expression(exp.Var, this=self._prev.text) 4084 return self._parse_placeholder() 4085 4086 def _advance_any(self) -> t.Optional[Token]: 4087 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4088 self._advance() 4089 return self._prev 4090 return None 4091 4092 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4093 return self._parse_var() or self._parse_string() 4094 4095 def _parse_null(self) -> t.Optional[exp.Expression]: 4096 if self._match(TokenType.NULL): 4097 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4098 return None 4099 4100 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4101 if self._match(TokenType.TRUE): 4102 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4103 if self._match(TokenType.FALSE): 4104 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4105 return None 4106 4107 def _parse_star(self) -> t.Optional[exp.Expression]: 4108 if self._match(TokenType.STAR): 4109 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4110 return None 4111 4112 def _parse_parameter(self) -> exp.Expression: 4113 wrapped = self._match(TokenType.L_BRACE) 4114 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4115 self._match(TokenType.R_BRACE) 4116 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4117 4118 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4119 if self._match_set(self.PLACEHOLDER_PARSERS): 4120 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4121 if placeholder: 4122 return placeholder 4123 self._advance(-1) 4124 return None 4125 4126 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4127 if not self._match(TokenType.EXCEPT): 4128 return None 4129 if self._match(TokenType.L_PAREN, advance=False): 4130 return self._parse_wrapped_csv(self._parse_column) 4131 return self._parse_csv(self._parse_column) 4132 4133 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4134 if not self._match(TokenType.REPLACE): 4135 return None 4136 if self._match(TokenType.L_PAREN, advance=False): 4137 return self._parse_wrapped_csv(self._parse_expression) 4138 return self._parse_csv(self._parse_expression) 4139 4140 def _parse_csv( 4141 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4142 ) -> t.List[t.Optional[exp.Expression]]: 4143 parse_result = parse_method() 4144 items = [parse_result] if parse_result is not None else [] 4145 4146 while self._match(sep): 4147 self._add_comments(parse_result) 4148 parse_result = parse_method() 4149 if parse_result is not None: 4150 items.append(parse_result) 4151 4152 return items 4153 4154 def _parse_tokens( 4155 self, parse_method: t.Callable, expressions: t.Dict 4156 ) -> t.Optional[exp.Expression]: 4157 this = parse_method() 4158 4159 while self._match_set(expressions): 4160 this = self.expression( 4161 expressions[self._prev.token_type], 4162 this=this, 4163 comments=self._prev_comments, 4164 expression=parse_method(), 4165 ) 4166 4167 return this 4168 4169 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4170 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4171 4172 def _parse_wrapped_csv( 4173 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4174 ) -> t.List[t.Optional[exp.Expression]]: 4175 return self._parse_wrapped( 4176 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4177 ) 4178 4179 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4180 wrapped = self._match(TokenType.L_PAREN) 4181 if not wrapped and not optional: 4182 self.raise_error("Expecting (") 4183 parse_result = parse_method() 4184 if wrapped: 4185 self._match_r_paren() 4186 return parse_result 4187 4188 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4189 return self._parse_select() or self._parse_set_operations( 4190 self._parse_expression() if alias else self._parse_conjunction() 4191 ) 4192 4193 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4194 return self._parse_query_modifiers( 4195 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4196 ) 4197 4198 def _parse_transaction(self) -> exp.Expression: 4199 this = None 4200 if self._match_texts(self.TRANSACTION_KIND): 4201 this = self._prev.text 4202 4203 self._match_texts({"TRANSACTION", "WORK"}) 4204 4205 modes = [] 4206 while True: 4207 mode = [] 4208 while self._match(TokenType.VAR): 4209 mode.append(self._prev.text) 4210 4211 if mode: 4212 modes.append(" ".join(mode)) 4213 if not self._match(TokenType.COMMA): 4214 break 4215 4216 return self.expression(exp.Transaction, this=this, modes=modes) 4217 4218 def _parse_commit_or_rollback(self) -> exp.Expression: 4219 chain = None 4220 savepoint = None 4221 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4222 4223 self._match_texts({"TRANSACTION", "WORK"}) 4224 4225 if self._match_text_seq("TO"): 4226 self._match_text_seq("SAVEPOINT") 4227 savepoint = self._parse_id_var() 4228 4229 if self._match(TokenType.AND): 4230 chain = not self._match_text_seq("NO") 4231 self._match_text_seq("CHAIN") 4232 4233 if is_rollback: 4234 return self.expression(exp.Rollback, savepoint=savepoint) 4235 return self.expression(exp.Commit, chain=chain) 4236 4237 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4238 if not self._match_text_seq("ADD"): 4239 return None 4240 4241 self._match(TokenType.COLUMN) 4242 exists_column = self._parse_exists(not_=True) 4243 expression = self._parse_column_def(self._parse_field(any_token=True)) 4244 4245 if expression: 4246 expression.set("exists", exists_column) 4247 4248 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4249 if self._match_texts(("FIRST", "AFTER")): 4250 position = self._prev.text 4251 column_position = self.expression( 4252 exp.ColumnPosition, this=self._parse_column(), position=position 4253 ) 4254 expression.set("position", column_position) 4255 4256 return expression 4257 4258 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 4259 drop = self._match(TokenType.DROP) and self._parse_drop() 4260 if drop and not isinstance(drop, exp.Command): 4261 drop.set("kind", drop.args.get("kind", "COLUMN")) 4262 return drop 4263 4264 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4265 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 4266 return self.expression( 4267 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4268 ) 4269 4270 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 4271 this = None 4272 kind = self._prev.token_type 4273 4274 if kind == TokenType.CONSTRAINT: 4275 this = self._parse_id_var() 4276 4277 if self._match_text_seq("CHECK"): 4278 expression = self._parse_wrapped(self._parse_conjunction) 4279 enforced = self._match_text_seq("ENFORCED") 4280 4281 return self.expression( 4282 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4283 ) 4284 4285 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4286 expression = self._parse_foreign_key() 4287 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4288 expression = self._parse_primary_key() 4289 else: 4290 expression = None 4291 4292 return self.expression(exp.AddConstraint, this=this, expression=expression) 4293 4294 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4295 index = self._index - 1 4296 4297 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4298 return self._parse_csv(self._parse_add_constraint) 4299 4300 self._retreat(index) 4301 return self._parse_csv(self._parse_add_column) 4302 4303 def _parse_alter_table_alter(self) -> exp.Expression: 4304 self._match(TokenType.COLUMN) 4305 column = self._parse_field(any_token=True) 4306 4307 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4308 return self.expression(exp.AlterColumn, this=column, drop=True) 4309 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4310 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4311 4312 self._match_text_seq("SET", "DATA") 4313 return self.expression( 4314 exp.AlterColumn, 4315 this=column, 4316 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4317 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4318 using=self._match(TokenType.USING) and self._parse_conjunction(), 4319 ) 4320 4321 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4322 index = self._index - 1 4323 4324 partition_exists = self._parse_exists() 4325 if self._match(TokenType.PARTITION, advance=False): 4326 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4327 4328 self._retreat(index) 4329 return self._parse_csv(self._parse_drop_column) 4330 4331 def _parse_alter_table_rename(self) -> exp.Expression: 4332 self._match_text_seq("TO") 4333 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4334 4335 def _parse_alter(self) -> t.Optional[exp.Expression]: 4336 start = self._prev 4337 4338 if not self._match(TokenType.TABLE): 4339 return self._parse_as_command(start) 4340 4341 exists = self._parse_exists() 4342 this = self._parse_table(schema=True) 4343 4344 if self._next: 4345 self._advance() 4346 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4347 4348 if parser: 4349 actions = ensure_list(parser(self)) 4350 4351 if not self._curr: 4352 return self.expression( 4353 exp.AlterTable, 4354 this=this, 4355 exists=exists, 4356 actions=actions, 4357 ) 4358 return self._parse_as_command(start) 4359 4360 def _parse_merge(self) -> exp.Expression: 4361 self._match(TokenType.INTO) 4362 target = self._parse_table() 4363 4364 self._match(TokenType.USING) 4365 using = self._parse_table() 4366 4367 self._match(TokenType.ON) 4368 on = self._parse_conjunction() 4369 4370 whens = [] 4371 while self._match(TokenType.WHEN): 4372 matched = not self._match(TokenType.NOT) 4373 self._match_text_seq("MATCHED") 4374 source = ( 4375 False 4376 if self._match_text_seq("BY", "TARGET") 4377 else self._match_text_seq("BY", "SOURCE") 4378 ) 4379 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4380 4381 self._match(TokenType.THEN) 4382 4383 if self._match(TokenType.INSERT): 4384 _this = self._parse_star() 4385 if _this: 4386 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4387 else: 4388 then = self.expression( 4389 exp.Insert, 4390 this=self._parse_value(), 4391 expression=self._match(TokenType.VALUES) and self._parse_value(), 4392 ) 4393 elif self._match(TokenType.UPDATE): 4394 expressions = self._parse_star() 4395 if expressions: 4396 then = self.expression(exp.Update, expressions=expressions) 4397 else: 4398 then = self.expression( 4399 exp.Update, 4400 expressions=self._match(TokenType.SET) 4401 and self._parse_csv(self._parse_equality), 4402 ) 4403 elif self._match(TokenType.DELETE): 4404 then = self.expression(exp.Var, this=self._prev.text) 4405 else: 4406 then = None 4407 4408 whens.append( 4409 self.expression( 4410 exp.When, 4411 matched=matched, 4412 source=source, 4413 condition=condition, 4414 then=then, 4415 ) 4416 ) 4417 4418 return self.expression( 4419 exp.Merge, 4420 this=target, 4421 using=using, 4422 on=on, 4423 expressions=whens, 4424 ) 4425 4426 def _parse_show(self) -> t.Optional[exp.Expression]: 4427 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 4428 if parser: 4429 return parser(self) 4430 self._advance() 4431 return self.expression(exp.Show, this=self._prev.text.upper()) 4432 4433 def _parse_set_item_assignment( 4434 self, kind: t.Optional[str] = None 4435 ) -> t.Optional[exp.Expression]: 4436 index = self._index 4437 4438 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4439 return self._parse_set_transaction(global_=kind == "GLOBAL") 4440 4441 left = self._parse_primary() or self._parse_id_var() 4442 4443 if not self._match_texts(("=", "TO")): 4444 self._retreat(index) 4445 return None 4446 4447 right = self._parse_statement() or self._parse_id_var() 4448 this = self.expression( 4449 exp.EQ, 4450 this=left, 4451 expression=right, 4452 ) 4453 4454 return self.expression( 4455 exp.SetItem, 4456 this=this, 4457 kind=kind, 4458 ) 4459 4460 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4461 self._match_text_seq("TRANSACTION") 4462 characteristics = self._parse_csv( 4463 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4464 ) 4465 return self.expression( 4466 exp.SetItem, 4467 expressions=characteristics, 4468 kind="TRANSACTION", 4469 **{"global": global_}, # type: ignore 4470 ) 4471 4472 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4473 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 4474 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4475 4476 def _parse_set(self) -> exp.Expression: 4477 index = self._index 4478 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4479 4480 if self._curr: 4481 self._retreat(index) 4482 return self._parse_as_command(self._prev) 4483 4484 return set_ 4485 4486 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]: 4487 for option in options: 4488 if self._match_text_seq(*option.split(" ")): 4489 return exp.Var(this=option) 4490 return None 4491 4492 def _parse_as_command(self, start: Token) -> exp.Command: 4493 while self._curr: 4494 self._advance() 4495 text = self._find_sql(start, self._prev) 4496 size = len(start.text) 4497 return exp.Command(this=text[:size], expression=text[size:]) 4498 4499 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4500 settings = [] 4501 4502 self._match_l_paren() 4503 kind = self._parse_id_var() 4504 4505 if self._match(TokenType.L_PAREN): 4506 while True: 4507 key = self._parse_id_var() 4508 value = self._parse_primary() 4509 4510 if not key and value is None: 4511 break 4512 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4513 self._match(TokenType.R_PAREN) 4514 4515 self._match_r_paren() 4516 4517 return self.expression( 4518 exp.DictProperty, 4519 this=this, 4520 kind=kind.this if kind else None, 4521 settings=settings, 4522 ) 4523 4524 def _parse_dict_range(self, this: str) -> exp.DictRange: 4525 self._match_l_paren() 4526 has_min = self._match_text_seq("MIN") 4527 if has_min: 4528 min = self._parse_var() or self._parse_primary() 4529 self._match_text_seq("MAX") 4530 max = self._parse_var() or self._parse_primary() 4531 else: 4532 max = self._parse_var() or self._parse_primary() 4533 min = exp.Literal.number(0) 4534 self._match_r_paren() 4535 return self.expression(exp.DictRange, this=this, min=min, max=max) 4536 4537 def _find_parser( 4538 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4539 ) -> t.Optional[t.Callable]: 4540 if not self._curr: 4541 return None 4542 4543 index = self._index 4544 this = [] 4545 while True: 4546 # The current token might be multiple words 4547 curr = self._curr.text.upper() 4548 key = curr.split(" ") 4549 this.append(curr) 4550 self._advance() 4551 result, trie = in_trie(trie, key) 4552 if result == 0: 4553 break 4554 if result == 2: 4555 subparser = parsers[" ".join(this)] 4556 return subparser 4557 self._retreat(index) 4558 return None 4559 4560 def _match(self, token_type, advance=True, expression=None): 4561 if not self._curr: 4562 return None 4563 4564 if self._curr.token_type == token_type: 4565 if advance: 4566 self._advance() 4567 self._add_comments(expression) 4568 return True 4569 4570 return None 4571 4572 def _match_set(self, types, advance=True): 4573 if not self._curr: 4574 return None 4575 4576 if self._curr.token_type in types: 4577 if advance: 4578 self._advance() 4579 return True 4580 4581 return None 4582 4583 def _match_pair(self, token_type_a, token_type_b, advance=True): 4584 if not self._curr or not self._next: 4585 return None 4586 4587 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4588 if advance: 4589 self._advance(2) 4590 return True 4591 4592 return None 4593 4594 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4595 if not self._match(TokenType.L_PAREN, expression=expression): 4596 self.raise_error("Expecting (") 4597 4598 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4599 if not self._match(TokenType.R_PAREN, expression=expression): 4600 self.raise_error("Expecting )") 4601 4602 def _match_texts(self, texts, advance=True): 4603 if self._curr and self._curr.text.upper() in texts: 4604 if advance: 4605 self._advance() 4606 return True 4607 return False 4608 4609 def _match_text_seq(self, *texts, advance=True): 4610 index = self._index 4611 for text in texts: 4612 if self._curr and self._curr.text.upper() == text: 4613 self._advance() 4614 else: 4615 self._retreat(index) 4616 return False 4617 4618 if not advance: 4619 self._retreat(index) 4620 4621 return True 4622 4623 @t.overload 4624 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4625 ... 4626 4627 @t.overload 4628 def _replace_columns_with_dots( 4629 self, this: t.Optional[exp.Expression] 4630 ) -> t.Optional[exp.Expression]: 4631 ... 4632 4633 def _replace_columns_with_dots(self, this): 4634 if isinstance(this, exp.Dot): 4635 exp.replace_children(this, self._replace_columns_with_dots) 4636 elif isinstance(this, exp.Column): 4637 exp.replace_children(this, self._replace_columns_with_dots) 4638 table = this.args.get("table") 4639 this = ( 4640 self.expression(exp.Dot, this=table, expression=this.this) 4641 if table 4642 else self.expression(exp.Var, this=this.name) 4643 ) 4644 elif isinstance(this, exp.Identifier): 4645 this = self.expression(exp.Var, this=this.name) 4646 4647 return this 4648 4649 def _replace_lambda( 4650 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4651 ) -> t.Optional[exp.Expression]: 4652 if not node: 4653 return node 4654 4655 for column in node.find_all(exp.Column): 4656 if column.parts[0].name in lambda_variables: 4657 dot_or_id = column.to_dot() if column.table else column.this 4658 parent = column.parent 4659 4660 while isinstance(parent, exp.Dot): 4661 if not isinstance(parent.parent, exp.Dot): 4662 parent.replace(dot_or_id) 4663 break 4664 parent = parent.parent 4665 else: 4666 if column is node: 4667 node = dot_or_id 4668 else: 4669 column.replace(dot_or_id) 4670 return node
20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 return exp.VarMap( 30 keys=exp.Array(expressions=keys), 31 values=exp.Array(expressions=values), 32 )
57class Parser(metaclass=_Parser): 58 """ 59 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 60 a parsed syntax tree. 61 62 Args: 63 error_level: the desired error level. 64 Default: ErrorLevel.IMMEDIATE 65 error_message_context: determines the amount of context to capture from a 66 query string when displaying the error message (in number of characters). 67 Default: 50. 68 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 69 Default: 0 70 alias_post_tablesample: If the table alias comes after tablesample. 71 Default: False 72 max_errors: Maximum number of error messages to include in a raised ParseError. 73 This is only relevant if error_level is ErrorLevel.RAISE. 74 Default: 3 75 null_ordering: Indicates the default null ordering method to use if not explicitly set. 76 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 77 Default: "nulls_are_small" 78 """ 79 80 FUNCTIONS: t.Dict[str, t.Callable] = { 81 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 82 "DATE_TO_DATE_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 87 "IFNULL": exp.Coalesce.from_arg_list, 88 "LIKE": parse_like, 89 "TIME_TO_TIME_STR": lambda args: exp.Cast( 90 this=seq_get(args, 0), 91 to=exp.DataType(this=exp.DataType.Type.TEXT), 92 ), 93 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 94 this=exp.Cast( 95 this=seq_get(args, 0), 96 to=exp.DataType(this=exp.DataType.Type.TEXT), 97 ), 98 start=exp.Literal.number(1), 99 length=exp.Literal.number(10), 100 ), 101 "VAR_MAP": parse_var_map, 102 } 103 104 NO_PAREN_FUNCTIONS = { 105 TokenType.CURRENT_DATE: exp.CurrentDate, 106 TokenType.CURRENT_DATETIME: exp.CurrentDate, 107 TokenType.CURRENT_TIME: exp.CurrentTime, 108 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 109 TokenType.CURRENT_USER: exp.CurrentUser, 110 } 111 112 JOIN_HINTS: t.Set[str] = set() 113 114 NESTED_TYPE_TOKENS = { 115 TokenType.ARRAY, 116 TokenType.MAP, 117 TokenType.NULLABLE, 118 TokenType.STRUCT, 119 } 120 121 TYPE_TOKENS = { 122 TokenType.BIT, 123 TokenType.BOOLEAN, 124 TokenType.TINYINT, 125 TokenType.UTINYINT, 126 TokenType.SMALLINT, 127 TokenType.USMALLINT, 128 TokenType.INT, 129 TokenType.UINT, 130 TokenType.BIGINT, 131 TokenType.UBIGINT, 132 TokenType.INT128, 133 TokenType.UINT128, 134 TokenType.INT256, 135 TokenType.UINT256, 136 TokenType.FLOAT, 137 TokenType.DOUBLE, 138 TokenType.CHAR, 139 TokenType.NCHAR, 140 TokenType.VARCHAR, 141 TokenType.NVARCHAR, 142 TokenType.TEXT, 143 TokenType.MEDIUMTEXT, 144 TokenType.LONGTEXT, 145 TokenType.MEDIUMBLOB, 146 TokenType.LONGBLOB, 147 TokenType.BINARY, 148 TokenType.VARBINARY, 149 TokenType.JSON, 150 TokenType.JSONB, 151 TokenType.INTERVAL, 152 TokenType.TIME, 153 TokenType.TIMESTAMP, 154 TokenType.TIMESTAMPTZ, 155 TokenType.TIMESTAMPLTZ, 156 TokenType.DATETIME, 157 TokenType.DATETIME64, 158 TokenType.DATE, 159 TokenType.INT4RANGE, 160 TokenType.INT4MULTIRANGE, 161 TokenType.INT8RANGE, 162 TokenType.INT8MULTIRANGE, 163 TokenType.NUMRANGE, 164 TokenType.NUMMULTIRANGE, 165 TokenType.TSRANGE, 166 TokenType.TSMULTIRANGE, 167 TokenType.TSTZRANGE, 168 TokenType.TSTZMULTIRANGE, 169 TokenType.DATERANGE, 170 TokenType.DATEMULTIRANGE, 171 TokenType.DECIMAL, 172 TokenType.BIGDECIMAL, 173 TokenType.UUID, 174 TokenType.GEOGRAPHY, 175 TokenType.GEOMETRY, 176 TokenType.HLLSKETCH, 177 TokenType.HSTORE, 178 TokenType.PSEUDO_TYPE, 179 TokenType.SUPER, 180 TokenType.SERIAL, 181 TokenType.SMALLSERIAL, 182 TokenType.BIGSERIAL, 183 TokenType.XML, 184 TokenType.UNIQUEIDENTIFIER, 185 TokenType.MONEY, 186 TokenType.SMALLMONEY, 187 TokenType.ROWVERSION, 188 TokenType.IMAGE, 189 TokenType.VARIANT, 190 TokenType.OBJECT, 191 TokenType.INET, 192 *NESTED_TYPE_TOKENS, 193 } 194 195 SUBQUERY_PREDICATES = { 196 TokenType.ANY: exp.Any, 197 TokenType.ALL: exp.All, 198 TokenType.EXISTS: exp.Exists, 199 TokenType.SOME: exp.Any, 200 } 201 202 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 203 204 DB_CREATABLES = { 205 TokenType.DATABASE, 206 TokenType.SCHEMA, 207 TokenType.TABLE, 208 TokenType.VIEW, 209 TokenType.DICTIONARY, 210 } 211 212 CREATABLES = { 213 TokenType.COLUMN, 214 TokenType.FUNCTION, 215 TokenType.INDEX, 216 TokenType.PROCEDURE, 217 *DB_CREATABLES, 218 } 219 220 ID_VAR_TOKENS = { 221 TokenType.VAR, 222 TokenType.ANTI, 223 TokenType.APPLY, 224 TokenType.ASC, 225 TokenType.AUTO_INCREMENT, 226 TokenType.BEGIN, 227 TokenType.CACHE, 228 TokenType.COLLATE, 229 TokenType.COMMAND, 230 TokenType.COMMENT, 231 TokenType.COMMIT, 232 TokenType.CONSTRAINT, 233 TokenType.DEFAULT, 234 TokenType.DELETE, 235 TokenType.DESC, 236 TokenType.DESCRIBE, 237 TokenType.DICTIONARY, 238 TokenType.DIV, 239 TokenType.END, 240 TokenType.EXECUTE, 241 TokenType.ESCAPE, 242 TokenType.FALSE, 243 TokenType.FIRST, 244 TokenType.FILTER, 245 TokenType.FORMAT, 246 TokenType.FULL, 247 TokenType.IF, 248 TokenType.IS, 249 TokenType.ISNULL, 250 TokenType.INTERVAL, 251 TokenType.KEEP, 252 TokenType.LEFT, 253 TokenType.LOAD, 254 TokenType.MERGE, 255 TokenType.NATURAL, 256 TokenType.NEXT, 257 TokenType.OFFSET, 258 TokenType.ORDINALITY, 259 TokenType.OVERWRITE, 260 TokenType.PARTITION, 261 TokenType.PERCENT, 262 TokenType.PIVOT, 263 TokenType.PRAGMA, 264 TokenType.RANGE, 265 TokenType.REFERENCES, 266 TokenType.RIGHT, 267 TokenType.ROW, 268 TokenType.ROWS, 269 TokenType.SEMI, 270 TokenType.SET, 271 TokenType.SETTINGS, 272 TokenType.SHOW, 273 TokenType.TEMPORARY, 274 TokenType.TOP, 275 TokenType.TRUE, 276 TokenType.UNIQUE, 277 TokenType.UNPIVOT, 278 TokenType.VOLATILE, 279 TokenType.WINDOW, 280 *CREATABLES, 281 *SUBQUERY_PREDICATES, 282 *TYPE_TOKENS, 283 *NO_PAREN_FUNCTIONS, 284 } 285 286 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 287 288 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 289 TokenType.APPLY, 290 TokenType.ASOF, 291 TokenType.FULL, 292 TokenType.LEFT, 293 TokenType.LOCK, 294 TokenType.NATURAL, 295 TokenType.OFFSET, 296 TokenType.RIGHT, 297 TokenType.WINDOW, 298 } 299 300 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 301 302 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 303 304 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 305 306 FUNC_TOKENS = { 307 TokenType.COMMAND, 308 TokenType.CURRENT_DATE, 309 TokenType.CURRENT_DATETIME, 310 TokenType.CURRENT_TIMESTAMP, 311 TokenType.CURRENT_TIME, 312 TokenType.CURRENT_USER, 313 TokenType.FILTER, 314 TokenType.FIRST, 315 TokenType.FORMAT, 316 TokenType.GLOB, 317 TokenType.IDENTIFIER, 318 TokenType.INDEX, 319 TokenType.ISNULL, 320 TokenType.ILIKE, 321 TokenType.LIKE, 322 TokenType.MERGE, 323 TokenType.OFFSET, 324 TokenType.PRIMARY_KEY, 325 TokenType.RANGE, 326 TokenType.REPLACE, 327 TokenType.ROW, 328 TokenType.UNNEST, 329 TokenType.VAR, 330 TokenType.LEFT, 331 TokenType.RIGHT, 332 TokenType.DATE, 333 TokenType.DATETIME, 334 TokenType.TABLE, 335 TokenType.TIMESTAMP, 336 TokenType.TIMESTAMPTZ, 337 TokenType.WINDOW, 338 *TYPE_TOKENS, 339 *SUBQUERY_PREDICATES, 340 } 341 342 CONJUNCTION = { 343 TokenType.AND: exp.And, 344 TokenType.OR: exp.Or, 345 } 346 347 EQUALITY = { 348 TokenType.EQ: exp.EQ, 349 TokenType.NEQ: exp.NEQ, 350 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 351 } 352 353 COMPARISON = { 354 TokenType.GT: exp.GT, 355 TokenType.GTE: exp.GTE, 356 TokenType.LT: exp.LT, 357 TokenType.LTE: exp.LTE, 358 } 359 360 BITWISE = { 361 TokenType.AMP: exp.BitwiseAnd, 362 TokenType.CARET: exp.BitwiseXor, 363 TokenType.PIPE: exp.BitwiseOr, 364 TokenType.DPIPE: exp.DPipe, 365 } 366 367 TERM = { 368 TokenType.DASH: exp.Sub, 369 TokenType.PLUS: exp.Add, 370 TokenType.MOD: exp.Mod, 371 TokenType.COLLATE: exp.Collate, 372 } 373 374 FACTOR = { 375 TokenType.DIV: exp.IntDiv, 376 TokenType.LR_ARROW: exp.Distance, 377 TokenType.SLASH: exp.Div, 378 TokenType.STAR: exp.Mul, 379 } 380 381 TIMESTAMPS = { 382 TokenType.TIME, 383 TokenType.TIMESTAMP, 384 TokenType.TIMESTAMPTZ, 385 TokenType.TIMESTAMPLTZ, 386 } 387 388 SET_OPERATIONS = { 389 TokenType.UNION, 390 TokenType.INTERSECT, 391 TokenType.EXCEPT, 392 } 393 394 JOIN_METHODS = { 395 TokenType.NATURAL, 396 TokenType.ASOF, 397 } 398 399 JOIN_SIDES = { 400 TokenType.LEFT, 401 TokenType.RIGHT, 402 TokenType.FULL, 403 } 404 405 JOIN_KINDS = { 406 TokenType.INNER, 407 TokenType.OUTER, 408 TokenType.CROSS, 409 TokenType.SEMI, 410 TokenType.ANTI, 411 } 412 413 LAMBDAS = { 414 TokenType.ARROW: lambda self, expressions: self.expression( 415 exp.Lambda, 416 this=self._replace_lambda( 417 self._parse_conjunction(), 418 {node.name for node in expressions}, 419 ), 420 expressions=expressions, 421 ), 422 TokenType.FARROW: lambda self, expressions: self.expression( 423 exp.Kwarg, 424 this=exp.Var(this=expressions[0].name), 425 expression=self._parse_conjunction(), 426 ), 427 } 428 429 COLUMN_OPERATORS = { 430 TokenType.DOT: None, 431 TokenType.DCOLON: lambda self, this, to: self.expression( 432 exp.Cast if self.STRICT_CAST else exp.TryCast, 433 this=this, 434 to=to, 435 ), 436 TokenType.ARROW: lambda self, this, path: self.expression( 437 exp.JSONExtract, 438 this=this, 439 expression=path, 440 ), 441 TokenType.DARROW: lambda self, this, path: self.expression( 442 exp.JSONExtractScalar, 443 this=this, 444 expression=path, 445 ), 446 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 447 exp.JSONBExtract, 448 this=this, 449 expression=path, 450 ), 451 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 452 exp.JSONBExtractScalar, 453 this=this, 454 expression=path, 455 ), 456 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 457 exp.JSONBContains, 458 this=this, 459 expression=key, 460 ), 461 } 462 463 EXPRESSION_PARSERS = { 464 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"), 465 exp.Column: lambda self: self._parse_column(), 466 exp.Condition: lambda self: self._parse_conjunction(), 467 exp.DataType: lambda self: self._parse_types(), 468 exp.Expression: lambda self: self._parse_statement(), 469 exp.From: lambda self: self._parse_from(), 470 exp.Group: lambda self: self._parse_group(), 471 exp.Having: lambda self: self._parse_having(), 472 exp.Identifier: lambda self: self._parse_id_var(), 473 exp.Join: lambda self: self._parse_join(), 474 exp.Lambda: lambda self: self._parse_lambda(), 475 exp.Lateral: lambda self: self._parse_lateral(), 476 exp.Limit: lambda self: self._parse_limit(), 477 exp.Offset: lambda self: self._parse_offset(), 478 exp.Order: lambda self: self._parse_order(), 479 exp.Ordered: lambda self: self._parse_ordered(), 480 exp.Properties: lambda self: self._parse_properties(), 481 exp.Qualify: lambda self: self._parse_qualify(), 482 exp.Returning: lambda self: self._parse_returning(), 483 exp.Sort: lambda self: self._parse_sort(exp.Sort, "SORT", "BY"), 484 exp.Table: lambda self: self._parse_table_parts(), 485 exp.TableAlias: lambda self: self._parse_table_alias(), 486 exp.Where: lambda self: self._parse_where(), 487 exp.Window: lambda self: self._parse_named_window(), 488 exp.With: lambda self: self._parse_with(), 489 "JOIN_TYPE": lambda self: self._parse_join_parts(), 490 } 491 492 STATEMENT_PARSERS = { 493 TokenType.ALTER: lambda self: self._parse_alter(), 494 TokenType.BEGIN: lambda self: self._parse_transaction(), 495 TokenType.CACHE: lambda self: self._parse_cache(), 496 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 497 TokenType.COMMENT: lambda self: self._parse_comment(), 498 TokenType.CREATE: lambda self: self._parse_create(), 499 TokenType.DELETE: lambda self: self._parse_delete(), 500 TokenType.DESC: lambda self: self._parse_describe(), 501 TokenType.DESCRIBE: lambda self: self._parse_describe(), 502 TokenType.DROP: lambda self: self._parse_drop(), 503 TokenType.END: lambda self: self._parse_commit_or_rollback(), 504 TokenType.FROM: lambda self: exp.select("*").from_( 505 t.cast(exp.From, self._parse_from(skip_from_token=True)) 506 ), 507 TokenType.INSERT: lambda self: self._parse_insert(), 508 TokenType.LOAD: lambda self: self._parse_load(), 509 TokenType.MERGE: lambda self: self._parse_merge(), 510 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 511 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 512 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 513 TokenType.SET: lambda self: self._parse_set(), 514 TokenType.UNCACHE: lambda self: self._parse_uncache(), 515 TokenType.UPDATE: lambda self: self._parse_update(), 516 TokenType.USE: lambda self: self.expression( 517 exp.Use, 518 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 519 and exp.Var(this=self._prev.text), 520 this=self._parse_table(schema=False), 521 ), 522 } 523 524 UNARY_PARSERS = { 525 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 526 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 527 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 528 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 529 } 530 531 PRIMARY_PARSERS = { 532 TokenType.STRING: lambda self, token: self.expression( 533 exp.Literal, this=token.text, is_string=True 534 ), 535 TokenType.NUMBER: lambda self, token: self.expression( 536 exp.Literal, this=token.text, is_string=False 537 ), 538 TokenType.STAR: lambda self, _: self.expression( 539 exp.Star, 540 **{"except": self._parse_except(), "replace": self._parse_replace()}, 541 ), 542 TokenType.NULL: lambda self, _: self.expression(exp.Null), 543 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 544 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 545 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 546 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 547 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 548 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 549 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 550 exp.National, this=token.text 551 ), 552 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 553 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 554 } 555 556 PLACEHOLDER_PARSERS = { 557 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 558 TokenType.PARAMETER: lambda self: self._parse_parameter(), 559 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 560 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 561 else None, 562 } 563 564 RANGE_PARSERS = { 565 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 566 TokenType.GLOB: binary_range_parser(exp.Glob), 567 TokenType.ILIKE: binary_range_parser(exp.ILike), 568 TokenType.IN: lambda self, this: self._parse_in(this), 569 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 570 TokenType.IS: lambda self, this: self._parse_is(this), 571 TokenType.LIKE: binary_range_parser(exp.Like), 572 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 573 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 574 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 575 } 576 577 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 578 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 579 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 580 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 581 "CHARACTER SET": lambda self: self._parse_character_set(), 582 "CHECKSUM": lambda self: self._parse_checksum(), 583 "CLUSTER": lambda self: self._parse_cluster(), 584 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 585 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 586 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 587 "DEFINER": lambda self: self._parse_definer(), 588 "DETERMINISTIC": lambda self: self.expression( 589 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 590 ), 591 "DISTKEY": lambda self: self._parse_distkey(), 592 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 593 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 594 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 595 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 596 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 597 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 598 "FREESPACE": lambda self: self._parse_freespace(), 599 "IMMUTABLE": lambda self: self.expression( 600 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 601 ), 602 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 603 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 604 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 605 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 606 "LIKE": lambda self: self._parse_create_like(), 607 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 608 "LOCK": lambda self: self._parse_locking(), 609 "LOCKING": lambda self: self._parse_locking(), 610 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 611 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 612 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 613 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 614 "NO": lambda self: self._parse_no_property(), 615 "ON": lambda self: self._parse_on_property(), 616 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 617 "PARTITION BY": lambda self: self._parse_partitioned_by(), 618 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 619 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 620 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 621 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 622 "RETURNS": lambda self: self._parse_returns(), 623 "ROW": lambda self: self._parse_row(), 624 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 625 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 626 "SETTINGS": lambda self: self.expression( 627 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 628 ), 629 "SORTKEY": lambda self: self._parse_sortkey(), 630 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 631 "STABLE": lambda self: self.expression( 632 exp.StabilityProperty, this=exp.Literal.string("STABLE") 633 ), 634 "STORED": lambda self: self._parse_stored(), 635 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 636 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 637 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 638 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 639 "TTL": lambda self: self._parse_ttl(), 640 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 641 "VOLATILE": lambda self: self._parse_volatile_property(), 642 "WITH": lambda self: self._parse_with_property(), 643 } 644 645 CONSTRAINT_PARSERS = { 646 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 647 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 648 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 649 "CHARACTER SET": lambda self: self.expression( 650 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 651 ), 652 "CHECK": lambda self: self.expression( 653 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 654 ), 655 "COLLATE": lambda self: self.expression( 656 exp.CollateColumnConstraint, this=self._parse_var() 657 ), 658 "COMMENT": lambda self: self.expression( 659 exp.CommentColumnConstraint, this=self._parse_string() 660 ), 661 "COMPRESS": lambda self: self._parse_compress(), 662 "DEFAULT": lambda self: self.expression( 663 exp.DefaultColumnConstraint, this=self._parse_bitwise() 664 ), 665 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 666 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 667 "FORMAT": lambda self: self.expression( 668 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 669 ), 670 "GENERATED": lambda self: self._parse_generated_as_identity(), 671 "IDENTITY": lambda self: self._parse_auto_increment(), 672 "INLINE": lambda self: self._parse_inline(), 673 "LIKE": lambda self: self._parse_create_like(), 674 "NOT": lambda self: self._parse_not_constraint(), 675 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 676 "ON": lambda self: self._match(TokenType.UPDATE) 677 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 678 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 679 "PRIMARY KEY": lambda self: self._parse_primary_key(), 680 "REFERENCES": lambda self: self._parse_references(match=False), 681 "TITLE": lambda self: self.expression( 682 exp.TitleColumnConstraint, this=self._parse_var_or_string() 683 ), 684 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 685 "UNIQUE": lambda self: self._parse_unique(), 686 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 687 } 688 689 ALTER_PARSERS = { 690 "ADD": lambda self: self._parse_alter_table_add(), 691 "ALTER": lambda self: self._parse_alter_table_alter(), 692 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 693 "DROP": lambda self: self._parse_alter_table_drop(), 694 "RENAME": lambda self: self._parse_alter_table_rename(), 695 } 696 697 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 698 699 NO_PAREN_FUNCTION_PARSERS = { 700 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 701 TokenType.CASE: lambda self: self._parse_case(), 702 TokenType.IF: lambda self: self._parse_if(), 703 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 704 exp.NextValueFor, 705 this=self._parse_column(), 706 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 707 ), 708 } 709 710 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 711 712 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 713 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 714 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 715 "DECODE": lambda self: self._parse_decode(), 716 "EXTRACT": lambda self: self._parse_extract(), 717 "JSON_OBJECT": lambda self: self._parse_json_object(), 718 "LOG": lambda self: self._parse_logarithm(), 719 "MATCH": lambda self: self._parse_match_against(), 720 "OPENJSON": lambda self: self._parse_open_json(), 721 "POSITION": lambda self: self._parse_position(), 722 "SAFE_CAST": lambda self: self._parse_cast(False), 723 "STRING_AGG": lambda self: self._parse_string_agg(), 724 "SUBSTRING": lambda self: self._parse_substring(), 725 "TRIM": lambda self: self._parse_trim(), 726 "TRY_CAST": lambda self: self._parse_cast(False), 727 "TRY_CONVERT": lambda self: self._parse_convert(False), 728 } 729 730 QUERY_MODIFIER_PARSERS = { 731 "joins": lambda self: list(iter(self._parse_join, None)), 732 "laterals": lambda self: list(iter(self._parse_lateral, None)), 733 "match": lambda self: self._parse_match_recognize(), 734 "where": lambda self: self._parse_where(), 735 "group": lambda self: self._parse_group(), 736 "having": lambda self: self._parse_having(), 737 "qualify": lambda self: self._parse_qualify(), 738 "windows": lambda self: self._parse_window_clause(), 739 "order": lambda self: self._parse_order(), 740 "limit": lambda self: self._parse_limit(), 741 "offset": lambda self: self._parse_offset(), 742 "locks": lambda self: self._parse_locks(), 743 "sample": lambda self: self._parse_table_sample(as_modifier=True), 744 } 745 746 SET_PARSERS = { 747 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 748 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 749 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 750 "TRANSACTION": lambda self: self._parse_set_transaction(), 751 } 752 753 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 754 755 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 756 757 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 758 759 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 760 761 TRANSACTION_CHARACTERISTICS = { 762 "ISOLATION LEVEL REPEATABLE READ", 763 "ISOLATION LEVEL READ COMMITTED", 764 "ISOLATION LEVEL READ UNCOMMITTED", 765 "ISOLATION LEVEL SERIALIZABLE", 766 "READ WRITE", 767 "READ ONLY", 768 } 769 770 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 771 772 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 773 774 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 775 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 776 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 777 778 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 779 780 STRICT_CAST = True 781 782 CONVERT_TYPE_FIRST = False 783 784 PREFIXED_PIVOT_COLUMNS = False 785 IDENTIFY_PIVOT_STRINGS = False 786 787 LOG_BASE_FIRST = True 788 LOG_DEFAULTS_TO_LN = False 789 790 __slots__ = ( 791 "error_level", 792 "error_message_context", 793 "sql", 794 "errors", 795 "index_offset", 796 "unnest_column_only", 797 "alias_post_tablesample", 798 "max_errors", 799 "null_ordering", 800 "_tokens", 801 "_index", 802 "_curr", 803 "_next", 804 "_prev", 805 "_prev_comments", 806 "_show_trie", 807 "_set_trie", 808 ) 809 810 def __init__( 811 self, 812 error_level: t.Optional[ErrorLevel] = None, 813 error_message_context: int = 100, 814 index_offset: int = 0, 815 unnest_column_only: bool = False, 816 alias_post_tablesample: bool = False, 817 max_errors: int = 3, 818 null_ordering: t.Optional[str] = None, 819 ): 820 self.error_level = error_level or ErrorLevel.IMMEDIATE 821 self.error_message_context = error_message_context 822 self.index_offset = index_offset 823 self.unnest_column_only = unnest_column_only 824 self.alias_post_tablesample = alias_post_tablesample 825 self.max_errors = max_errors 826 self.null_ordering = null_ordering 827 self.reset() 828 829 def reset(self): 830 self.sql = "" 831 self.errors = [] 832 self._tokens = [] 833 self._index = 0 834 self._curr = None 835 self._next = None 836 self._prev = None 837 self._prev_comments = None 838 839 def parse( 840 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 841 ) -> t.List[t.Optional[exp.Expression]]: 842 """ 843 Parses a list of tokens and returns a list of syntax trees, one tree 844 per parsed SQL statement. 845 846 Args: 847 raw_tokens: the list of tokens. 848 sql: the original SQL string, used to produce helpful debug messages. 849 850 Returns: 851 The list of syntax trees. 852 """ 853 return self._parse( 854 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 855 ) 856 857 def parse_into( 858 self, 859 expression_types: exp.IntoType, 860 raw_tokens: t.List[Token], 861 sql: t.Optional[str] = None, 862 ) -> t.List[t.Optional[exp.Expression]]: 863 """ 864 Parses a list of tokens into a given Expression type. If a collection of Expression 865 types is given instead, this method will try to parse the token list into each one 866 of them, stopping at the first for which the parsing succeeds. 867 868 Args: 869 expression_types: the expression type(s) to try and parse the token list into. 870 raw_tokens: the list of tokens. 871 sql: the original SQL string, used to produce helpful debug messages. 872 873 Returns: 874 The target Expression. 875 """ 876 errors = [] 877 for expression_type in ensure_collection(expression_types): 878 parser = self.EXPRESSION_PARSERS.get(expression_type) 879 if not parser: 880 raise TypeError(f"No parser registered for {expression_type}") 881 try: 882 return self._parse(parser, raw_tokens, sql) 883 except ParseError as e: 884 e.errors[0]["into_expression"] = expression_type 885 errors.append(e) 886 raise ParseError( 887 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 888 errors=merge_errors(errors), 889 ) from errors[-1] 890 891 def _parse( 892 self, 893 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 894 raw_tokens: t.List[Token], 895 sql: t.Optional[str] = None, 896 ) -> t.List[t.Optional[exp.Expression]]: 897 self.reset() 898 self.sql = sql or "" 899 total = len(raw_tokens) 900 chunks: t.List[t.List[Token]] = [[]] 901 902 for i, token in enumerate(raw_tokens): 903 if token.token_type == TokenType.SEMICOLON: 904 if i < total - 1: 905 chunks.append([]) 906 else: 907 chunks[-1].append(token) 908 909 expressions = [] 910 911 for tokens in chunks: 912 self._index = -1 913 self._tokens = tokens 914 self._advance() 915 916 expressions.append(parse_method(self)) 917 918 if self._index < len(self._tokens): 919 self.raise_error("Invalid expression / Unexpected token") 920 921 self.check_errors() 922 923 return expressions 924 925 def check_errors(self) -> None: 926 """ 927 Logs or raises any found errors, depending on the chosen error level setting. 928 """ 929 if self.error_level == ErrorLevel.WARN: 930 for error in self.errors: 931 logger.error(str(error)) 932 elif self.error_level == ErrorLevel.RAISE and self.errors: 933 raise ParseError( 934 concat_messages(self.errors, self.max_errors), 935 errors=merge_errors(self.errors), 936 ) 937 938 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 939 """ 940 Appends an error in the list of recorded errors or raises it, depending on the chosen 941 error level setting. 942 """ 943 token = token or self._curr or self._prev or Token.string("") 944 start = token.start 945 end = token.end + 1 946 start_context = self.sql[max(start - self.error_message_context, 0) : start] 947 highlight = self.sql[start:end] 948 end_context = self.sql[end : end + self.error_message_context] 949 950 error = ParseError.new( 951 f"{message}. Line {token.line}, Col: {token.col}.\n" 952 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 953 description=message, 954 line=token.line, 955 col=token.col, 956 start_context=start_context, 957 highlight=highlight, 958 end_context=end_context, 959 ) 960 961 if self.error_level == ErrorLevel.IMMEDIATE: 962 raise error 963 964 self.errors.append(error) 965 966 def expression( 967 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 968 ) -> E: 969 """ 970 Creates a new, validated Expression. 971 972 Args: 973 exp_class: the expression class to instantiate. 974 comments: an optional list of comments to attach to the expression. 975 kwargs: the arguments to set for the expression along with their respective values. 976 977 Returns: 978 The target expression. 979 """ 980 instance = exp_class(**kwargs) 981 instance.add_comments(comments) if comments else self._add_comments(instance) 982 self.validate_expression(instance) 983 return instance 984 985 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 986 if expression and self._prev_comments: 987 expression.add_comments(self._prev_comments) 988 self._prev_comments = None 989 990 def validate_expression( 991 self, expression: exp.Expression, args: t.Optional[t.List] = None 992 ) -> None: 993 """ 994 Validates an already instantiated expression, making sure that all its mandatory arguments 995 are set. 996 997 Args: 998 expression: the expression to validate. 999 args: an optional list of items that was used to instantiate the expression, if it's a Func. 1000 """ 1001 if self.error_level == ErrorLevel.IGNORE: 1002 return 1003 1004 for error_message in expression.error_messages(args): 1005 self.raise_error(error_message) 1006 1007 def _find_sql(self, start: Token, end: Token) -> str: 1008 return self.sql[start.start : end.end + 1] 1009 1010 def _advance(self, times: int = 1) -> None: 1011 self._index += times 1012 self._curr = seq_get(self._tokens, self._index) 1013 self._next = seq_get(self._tokens, self._index + 1) 1014 if self._index > 0: 1015 self._prev = self._tokens[self._index - 1] 1016 self._prev_comments = self._prev.comments 1017 else: 1018 self._prev = None 1019 self._prev_comments = None 1020 1021 def _retreat(self, index: int) -> None: 1022 if index != self._index: 1023 self._advance(index - self._index) 1024 1025 def _parse_command(self) -> exp.Command: 1026 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1027 1028 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1029 start = self._prev 1030 exists = self._parse_exists() if allow_exists else None 1031 1032 self._match(TokenType.ON) 1033 1034 kind = self._match_set(self.CREATABLES) and self._prev 1035 1036 if not kind: 1037 return self._parse_as_command(start) 1038 1039 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1040 this = self._parse_user_defined_function(kind=kind.token_type) 1041 elif kind.token_type == TokenType.TABLE: 1042 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1043 elif kind.token_type == TokenType.COLUMN: 1044 this = self._parse_column() 1045 else: 1046 this = self._parse_id_var() 1047 1048 self._match(TokenType.IS) 1049 1050 return self.expression( 1051 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1052 ) 1053 1054 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1055 def _parse_ttl(self) -> exp.Expression: 1056 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1057 this = self._parse_bitwise() 1058 1059 if self._match_text_seq("DELETE"): 1060 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1061 if self._match_text_seq("RECOMPRESS"): 1062 return self.expression( 1063 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1064 ) 1065 if self._match_text_seq("TO", "DISK"): 1066 return self.expression( 1067 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1068 ) 1069 if self._match_text_seq("TO", "VOLUME"): 1070 return self.expression( 1071 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1072 ) 1073 1074 return this 1075 1076 expressions = self._parse_csv(_parse_ttl_action) 1077 where = self._parse_where() 1078 group = self._parse_group() 1079 1080 aggregates = None 1081 if group and self._match(TokenType.SET): 1082 aggregates = self._parse_csv(self._parse_set_item) 1083 1084 return self.expression( 1085 exp.MergeTreeTTL, 1086 expressions=expressions, 1087 where=where, 1088 group=group, 1089 aggregates=aggregates, 1090 ) 1091 1092 def _parse_statement(self) -> t.Optional[exp.Expression]: 1093 if self._curr is None: 1094 return None 1095 1096 if self._match_set(self.STATEMENT_PARSERS): 1097 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1098 1099 if self._match_set(Tokenizer.COMMANDS): 1100 return self._parse_command() 1101 1102 expression = self._parse_expression() 1103 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1104 return self._parse_query_modifiers(expression) 1105 1106 def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]: 1107 start = self._prev 1108 temporary = self._match(TokenType.TEMPORARY) 1109 materialized = self._match_text_seq("MATERIALIZED") 1110 kind = self._match_set(self.CREATABLES) and self._prev.text 1111 if not kind: 1112 return self._parse_as_command(start) 1113 1114 return self.expression( 1115 exp.Drop, 1116 exists=self._parse_exists(), 1117 this=self._parse_table(schema=True), 1118 kind=kind, 1119 temporary=temporary, 1120 materialized=materialized, 1121 cascade=self._match_text_seq("CASCADE"), 1122 constraints=self._match_text_seq("CONSTRAINTS"), 1123 purge=self._match_text_seq("PURGE"), 1124 ) 1125 1126 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1127 return ( 1128 self._match(TokenType.IF) 1129 and (not not_ or self._match(TokenType.NOT)) 1130 and self._match(TokenType.EXISTS) 1131 ) 1132 1133 def _parse_create(self) -> t.Optional[exp.Expression]: 1134 start = self._prev 1135 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 1136 TokenType.OR, TokenType.REPLACE 1137 ) 1138 unique = self._match(TokenType.UNIQUE) 1139 1140 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1141 self._match(TokenType.TABLE) 1142 1143 properties = None 1144 create_token = self._match_set(self.CREATABLES) and self._prev 1145 1146 if not create_token: 1147 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1148 create_token = self._match_set(self.CREATABLES) and self._prev 1149 1150 if not properties or not create_token: 1151 return self._parse_as_command(start) 1152 1153 exists = self._parse_exists(not_=True) 1154 this = None 1155 expression = None 1156 indexes = None 1157 no_schema_binding = None 1158 begin = None 1159 clone = None 1160 1161 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1162 this = self._parse_user_defined_function(kind=create_token.token_type) 1163 temp_properties = self._parse_properties() 1164 if properties and temp_properties: 1165 properties.expressions.extend(temp_properties.expressions) 1166 elif temp_properties: 1167 properties = temp_properties 1168 1169 self._match(TokenType.ALIAS) 1170 begin = self._match(TokenType.BEGIN) 1171 return_ = self._match_text_seq("RETURN") 1172 expression = self._parse_statement() 1173 1174 if return_: 1175 expression = self.expression(exp.Return, this=expression) 1176 elif create_token.token_type == TokenType.INDEX: 1177 this = self._parse_index(index=self._parse_id_var()) 1178 elif create_token.token_type in self.DB_CREATABLES: 1179 table_parts = self._parse_table_parts(schema=True) 1180 1181 # exp.Properties.Location.POST_NAME 1182 if self._match(TokenType.COMMA): 1183 temp_properties = self._parse_properties(before=True) 1184 if properties and temp_properties: 1185 properties.expressions.extend(temp_properties.expressions) 1186 elif temp_properties: 1187 properties = temp_properties 1188 1189 this = self._parse_schema(this=table_parts) 1190 1191 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1192 temp_properties = self._parse_properties() 1193 if properties and temp_properties: 1194 properties.expressions.extend(temp_properties.expressions) 1195 elif temp_properties: 1196 properties = temp_properties 1197 1198 self._match(TokenType.ALIAS) 1199 1200 # exp.Properties.Location.POST_ALIAS 1201 if not ( 1202 self._match(TokenType.SELECT, advance=False) 1203 or self._match(TokenType.WITH, advance=False) 1204 or self._match(TokenType.L_PAREN, advance=False) 1205 ): 1206 temp_properties = self._parse_properties() 1207 if properties and temp_properties: 1208 properties.expressions.extend(temp_properties.expressions) 1209 elif temp_properties: 1210 properties = temp_properties 1211 1212 expression = self._parse_ddl_select() 1213 1214 if create_token.token_type == TokenType.TABLE: 1215 indexes = [] 1216 while True: 1217 index = self._parse_index() 1218 1219 # exp.Properties.Location.POST_EXPRESSION or exp.Properties.Location.POST_INDEX 1220 temp_properties = self._parse_properties() 1221 if properties and temp_properties: 1222 properties.expressions.extend(temp_properties.expressions) 1223 elif temp_properties: 1224 properties = temp_properties 1225 1226 if not index: 1227 break 1228 else: 1229 self._match(TokenType.COMMA) 1230 indexes.append(index) 1231 elif create_token.token_type == TokenType.VIEW: 1232 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1233 no_schema_binding = True 1234 1235 if self._match_text_seq("CLONE"): 1236 clone = self._parse_table(schema=True) 1237 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1238 clone_kind = ( 1239 self._match(TokenType.L_PAREN) 1240 and self._match_texts(self.CLONE_KINDS) 1241 and self._prev.text.upper() 1242 ) 1243 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1244 self._match(TokenType.R_PAREN) 1245 clone = self.expression( 1246 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1247 ) 1248 1249 return self.expression( 1250 exp.Create, 1251 this=this, 1252 kind=create_token.text, 1253 replace=replace, 1254 unique=unique, 1255 expression=expression, 1256 exists=exists, 1257 properties=properties, 1258 indexes=indexes, 1259 no_schema_binding=no_schema_binding, 1260 begin=begin, 1261 clone=clone, 1262 ) 1263 1264 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1265 # only used for teradata currently 1266 self._match(TokenType.COMMA) 1267 1268 kwargs = { 1269 "no": self._match_text_seq("NO"), 1270 "dual": self._match_text_seq("DUAL"), 1271 "before": self._match_text_seq("BEFORE"), 1272 "default": self._match_text_seq("DEFAULT"), 1273 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1274 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1275 "after": self._match_text_seq("AFTER"), 1276 "minimum": self._match_texts(("MIN", "MINIMUM")), 1277 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1278 } 1279 1280 if self._match_texts(self.PROPERTY_PARSERS): 1281 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1282 try: 1283 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1284 except TypeError: 1285 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1286 1287 return None 1288 1289 def _parse_property(self) -> t.Optional[exp.Expression]: 1290 if self._match_texts(self.PROPERTY_PARSERS): 1291 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1292 1293 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1294 return self._parse_character_set(default=True) 1295 1296 if self._match_text_seq("COMPOUND", "SORTKEY"): 1297 return self._parse_sortkey(compound=True) 1298 1299 if self._match_text_seq("SQL", "SECURITY"): 1300 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1301 1302 assignment = self._match_pair( 1303 TokenType.VAR, TokenType.EQ, advance=False 1304 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1305 1306 if assignment: 1307 key = self._parse_var_or_string() 1308 self._match(TokenType.EQ) 1309 return self.expression(exp.Property, this=key, value=self._parse_column()) 1310 1311 return None 1312 1313 def _parse_stored(self) -> exp.Expression: 1314 self._match(TokenType.ALIAS) 1315 1316 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1317 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1318 1319 return self.expression( 1320 exp.FileFormatProperty, 1321 this=self.expression( 1322 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1323 ) 1324 if input_format or output_format 1325 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1326 ) 1327 1328 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1329 self._match(TokenType.EQ) 1330 self._match(TokenType.ALIAS) 1331 return self.expression(exp_class, this=self._parse_field()) 1332 1333 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Expression]: 1334 properties = [] 1335 1336 while True: 1337 if before: 1338 prop = self._parse_property_before() 1339 else: 1340 prop = self._parse_property() 1341 1342 if not prop: 1343 break 1344 for p in ensure_list(prop): 1345 properties.append(p) 1346 1347 if properties: 1348 return self.expression(exp.Properties, expressions=properties) 1349 1350 return None 1351 1352 def _parse_fallback(self, no: bool = False) -> exp.Expression: 1353 return self.expression( 1354 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1355 ) 1356 1357 def _parse_volatile_property(self) -> exp.Expression: 1358 if self._index >= 2: 1359 pre_volatile_token = self._tokens[self._index - 2] 1360 else: 1361 pre_volatile_token = None 1362 1363 if pre_volatile_token and pre_volatile_token.token_type in ( 1364 TokenType.CREATE, 1365 TokenType.REPLACE, 1366 TokenType.UNIQUE, 1367 ): 1368 return exp.VolatileProperty() 1369 1370 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1371 1372 def _parse_with_property( 1373 self, 1374 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1375 self._match(TokenType.WITH) 1376 if self._match(TokenType.L_PAREN, advance=False): 1377 return self._parse_wrapped_csv(self._parse_property) 1378 1379 if self._match_text_seq("JOURNAL"): 1380 return self._parse_withjournaltable() 1381 1382 if self._match_text_seq("DATA"): 1383 return self._parse_withdata(no=False) 1384 elif self._match_text_seq("NO", "DATA"): 1385 return self._parse_withdata(no=True) 1386 1387 if not self._next: 1388 return None 1389 1390 return self._parse_withisolatedloading() 1391 1392 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1393 def _parse_definer(self) -> t.Optional[exp.Expression]: 1394 self._match(TokenType.EQ) 1395 1396 user = self._parse_id_var() 1397 self._match(TokenType.PARAMETER) 1398 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1399 1400 if not user or not host: 1401 return None 1402 1403 return exp.DefinerProperty(this=f"{user}@{host}") 1404 1405 def _parse_withjournaltable(self) -> exp.Expression: 1406 self._match(TokenType.TABLE) 1407 self._match(TokenType.EQ) 1408 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1409 1410 def _parse_log(self, no: bool = False) -> exp.Expression: 1411 return self.expression(exp.LogProperty, no=no) 1412 1413 def _parse_journal(self, **kwargs) -> exp.Expression: 1414 return self.expression(exp.JournalProperty, **kwargs) 1415 1416 def _parse_checksum(self) -> exp.Expression: 1417 self._match(TokenType.EQ) 1418 1419 on = None 1420 if self._match(TokenType.ON): 1421 on = True 1422 elif self._match_text_seq("OFF"): 1423 on = False 1424 default = self._match(TokenType.DEFAULT) 1425 1426 return self.expression( 1427 exp.ChecksumProperty, 1428 on=on, 1429 default=default, 1430 ) 1431 1432 def _parse_cluster(self) -> t.Optional[exp.Expression]: 1433 if not self._match_text_seq("BY"): 1434 self._retreat(self._index - 1) 1435 return None 1436 return self.expression( 1437 exp.Cluster, 1438 expressions=self._parse_csv(self._parse_ordered), 1439 ) 1440 1441 def _parse_freespace(self) -> exp.Expression: 1442 self._match(TokenType.EQ) 1443 return self.expression( 1444 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1445 ) 1446 1447 def _parse_mergeblockratio(self, no: bool = False, default: bool = False) -> exp.Expression: 1448 if self._match(TokenType.EQ): 1449 return self.expression( 1450 exp.MergeBlockRatioProperty, 1451 this=self._parse_number(), 1452 percent=self._match(TokenType.PERCENT), 1453 ) 1454 return self.expression( 1455 exp.MergeBlockRatioProperty, 1456 no=no, 1457 default=default, 1458 ) 1459 1460 def _parse_datablocksize( 1461 self, 1462 default: t.Optional[bool] = None, 1463 minimum: t.Optional[bool] = None, 1464 maximum: t.Optional[bool] = None, 1465 ) -> exp.Expression: 1466 self._match(TokenType.EQ) 1467 size = self._parse_number() 1468 units = None 1469 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1470 units = self._prev.text 1471 return self.expression( 1472 exp.DataBlocksizeProperty, 1473 size=size, 1474 units=units, 1475 default=default, 1476 minimum=minimum, 1477 maximum=maximum, 1478 ) 1479 1480 def _parse_blockcompression(self) -> exp.Expression: 1481 self._match(TokenType.EQ) 1482 always = self._match_text_seq("ALWAYS") 1483 manual = self._match_text_seq("MANUAL") 1484 never = self._match_text_seq("NEVER") 1485 default = self._match_text_seq("DEFAULT") 1486 autotemp = None 1487 if self._match_text_seq("AUTOTEMP"): 1488 autotemp = self._parse_schema() 1489 1490 return self.expression( 1491 exp.BlockCompressionProperty, 1492 always=always, 1493 manual=manual, 1494 never=never, 1495 default=default, 1496 autotemp=autotemp, 1497 ) 1498 1499 def _parse_withisolatedloading(self) -> exp.Expression: 1500 no = self._match_text_seq("NO") 1501 concurrent = self._match_text_seq("CONCURRENT") 1502 self._match_text_seq("ISOLATED", "LOADING") 1503 for_all = self._match_text_seq("FOR", "ALL") 1504 for_insert = self._match_text_seq("FOR", "INSERT") 1505 for_none = self._match_text_seq("FOR", "NONE") 1506 return self.expression( 1507 exp.IsolatedLoadingProperty, 1508 no=no, 1509 concurrent=concurrent, 1510 for_all=for_all, 1511 for_insert=for_insert, 1512 for_none=for_none, 1513 ) 1514 1515 def _parse_locking(self) -> exp.Expression: 1516 if self._match(TokenType.TABLE): 1517 kind = "TABLE" 1518 elif self._match(TokenType.VIEW): 1519 kind = "VIEW" 1520 elif self._match(TokenType.ROW): 1521 kind = "ROW" 1522 elif self._match_text_seq("DATABASE"): 1523 kind = "DATABASE" 1524 else: 1525 kind = None 1526 1527 if kind in ("DATABASE", "TABLE", "VIEW"): 1528 this = self._parse_table_parts() 1529 else: 1530 this = None 1531 1532 if self._match(TokenType.FOR): 1533 for_or_in = "FOR" 1534 elif self._match(TokenType.IN): 1535 for_or_in = "IN" 1536 else: 1537 for_or_in = None 1538 1539 if self._match_text_seq("ACCESS"): 1540 lock_type = "ACCESS" 1541 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1542 lock_type = "EXCLUSIVE" 1543 elif self._match_text_seq("SHARE"): 1544 lock_type = "SHARE" 1545 elif self._match_text_seq("READ"): 1546 lock_type = "READ" 1547 elif self._match_text_seq("WRITE"): 1548 lock_type = "WRITE" 1549 elif self._match_text_seq("CHECKSUM"): 1550 lock_type = "CHECKSUM" 1551 else: 1552 lock_type = None 1553 1554 override = self._match_text_seq("OVERRIDE") 1555 1556 return self.expression( 1557 exp.LockingProperty, 1558 this=this, 1559 kind=kind, 1560 for_or_in=for_or_in, 1561 lock_type=lock_type, 1562 override=override, 1563 ) 1564 1565 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1566 if self._match(TokenType.PARTITION_BY): 1567 return self._parse_csv(self._parse_conjunction) 1568 return [] 1569 1570 def _parse_partitioned_by(self) -> exp.Expression: 1571 self._match(TokenType.EQ) 1572 return self.expression( 1573 exp.PartitionedByProperty, 1574 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1575 ) 1576 1577 def _parse_withdata(self, no: bool = False) -> exp.Expression: 1578 if self._match_text_seq("AND", "STATISTICS"): 1579 statistics = True 1580 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1581 statistics = False 1582 else: 1583 statistics = None 1584 1585 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1586 1587 def _parse_no_property(self) -> t.Optional[exp.Property]: 1588 if self._match_text_seq("PRIMARY", "INDEX"): 1589 return exp.NoPrimaryIndexProperty() 1590 return None 1591 1592 def _parse_on_property(self) -> t.Optional[exp.Property]: 1593 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1594 return exp.OnCommitProperty() 1595 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1596 return exp.OnCommitProperty(delete=True) 1597 return None 1598 1599 def _parse_distkey(self) -> exp.Expression: 1600 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1601 1602 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1603 table = self._parse_table(schema=True) 1604 options = [] 1605 while self._match_texts(("INCLUDING", "EXCLUDING")): 1606 this = self._prev.text.upper() 1607 id_var = self._parse_id_var() 1608 1609 if not id_var: 1610 return None 1611 1612 options.append( 1613 self.expression( 1614 exp.Property, 1615 this=this, 1616 value=exp.Var(this=id_var.this.upper()), 1617 ) 1618 ) 1619 return self.expression(exp.LikeProperty, this=table, expressions=options) 1620 1621 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1622 return self.expression( 1623 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1624 ) 1625 1626 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1627 self._match(TokenType.EQ) 1628 return self.expression( 1629 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1630 ) 1631 1632 def _parse_returns(self) -> exp.Expression: 1633 value: t.Optional[exp.Expression] 1634 is_table = self._match(TokenType.TABLE) 1635 1636 if is_table: 1637 if self._match(TokenType.LT): 1638 value = self.expression( 1639 exp.Schema, 1640 this="TABLE", 1641 expressions=self._parse_csv(self._parse_struct_types), 1642 ) 1643 if not self._match(TokenType.GT): 1644 self.raise_error("Expecting >") 1645 else: 1646 value = self._parse_schema(exp.Var(this="TABLE")) 1647 else: 1648 value = self._parse_types() 1649 1650 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1651 1652 def _parse_describe(self) -> exp.Expression: 1653 kind = self._match_set(self.CREATABLES) and self._prev.text 1654 this = self._parse_table() 1655 1656 return self.expression(exp.Describe, this=this, kind=kind) 1657 1658 def _parse_insert(self) -> exp.Expression: 1659 overwrite = self._match(TokenType.OVERWRITE) 1660 local = self._match_text_seq("LOCAL") 1661 alternative = None 1662 1663 if self._match_text_seq("DIRECTORY"): 1664 this: t.Optional[exp.Expression] = self.expression( 1665 exp.Directory, 1666 this=self._parse_var_or_string(), 1667 local=local, 1668 row_format=self._parse_row_format(match_row=True), 1669 ) 1670 else: 1671 if self._match(TokenType.OR): 1672 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1673 1674 self._match(TokenType.INTO) 1675 self._match(TokenType.TABLE) 1676 this = self._parse_table(schema=True) 1677 1678 return self.expression( 1679 exp.Insert, 1680 this=this, 1681 exists=self._parse_exists(), 1682 partition=self._parse_partition(), 1683 expression=self._parse_ddl_select(), 1684 conflict=self._parse_on_conflict(), 1685 returning=self._parse_returning(), 1686 overwrite=overwrite, 1687 alternative=alternative, 1688 ) 1689 1690 def _parse_on_conflict(self) -> t.Optional[exp.Expression]: 1691 conflict = self._match_text_seq("ON", "CONFLICT") 1692 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1693 1694 if not (conflict or duplicate): 1695 return None 1696 1697 nothing = None 1698 expressions = None 1699 key = None 1700 constraint = None 1701 1702 if conflict: 1703 if self._match_text_seq("ON", "CONSTRAINT"): 1704 constraint = self._parse_id_var() 1705 else: 1706 key = self._parse_csv(self._parse_value) 1707 1708 self._match_text_seq("DO") 1709 if self._match_text_seq("NOTHING"): 1710 nothing = True 1711 else: 1712 self._match(TokenType.UPDATE) 1713 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1714 1715 return self.expression( 1716 exp.OnConflict, 1717 duplicate=duplicate, 1718 expressions=expressions, 1719 nothing=nothing, 1720 key=key, 1721 constraint=constraint, 1722 ) 1723 1724 def _parse_returning(self) -> t.Optional[exp.Expression]: 1725 if not self._match(TokenType.RETURNING): 1726 return None 1727 1728 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1729 1730 def _parse_row(self) -> t.Optional[exp.Expression]: 1731 if not self._match(TokenType.FORMAT): 1732 return None 1733 return self._parse_row_format() 1734 1735 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1736 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1737 return None 1738 1739 if self._match_text_seq("SERDE"): 1740 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1741 1742 self._match_text_seq("DELIMITED") 1743 1744 kwargs = {} 1745 1746 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1747 kwargs["fields"] = self._parse_string() 1748 if self._match_text_seq("ESCAPED", "BY"): 1749 kwargs["escaped"] = self._parse_string() 1750 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1751 kwargs["collection_items"] = self._parse_string() 1752 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1753 kwargs["map_keys"] = self._parse_string() 1754 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1755 kwargs["lines"] = self._parse_string() 1756 if self._match_text_seq("NULL", "DEFINED", "AS"): 1757 kwargs["null"] = self._parse_string() 1758 1759 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1760 1761 def _parse_load(self) -> exp.Expression: 1762 if self._match_text_seq("DATA"): 1763 local = self._match_text_seq("LOCAL") 1764 self._match_text_seq("INPATH") 1765 inpath = self._parse_string() 1766 overwrite = self._match(TokenType.OVERWRITE) 1767 self._match_pair(TokenType.INTO, TokenType.TABLE) 1768 1769 return self.expression( 1770 exp.LoadData, 1771 this=self._parse_table(schema=True), 1772 local=local, 1773 overwrite=overwrite, 1774 inpath=inpath, 1775 partition=self._parse_partition(), 1776 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1777 serde=self._match_text_seq("SERDE") and self._parse_string(), 1778 ) 1779 return self._parse_as_command(self._prev) 1780 1781 def _parse_delete(self) -> exp.Expression: 1782 self._match(TokenType.FROM) 1783 1784 return self.expression( 1785 exp.Delete, 1786 this=self._parse_table(), 1787 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1788 where=self._parse_where(), 1789 returning=self._parse_returning(), 1790 ) 1791 1792 def _parse_update(self) -> exp.Expression: 1793 return self.expression( 1794 exp.Update, 1795 **{ # type: ignore 1796 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1797 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1798 "from": self._parse_from(modifiers=True), 1799 "where": self._parse_where(), 1800 "returning": self._parse_returning(), 1801 }, 1802 ) 1803 1804 def _parse_uncache(self) -> exp.Expression: 1805 if not self._match(TokenType.TABLE): 1806 self.raise_error("Expecting TABLE after UNCACHE") 1807 1808 return self.expression( 1809 exp.Uncache, 1810 exists=self._parse_exists(), 1811 this=self._parse_table(schema=True), 1812 ) 1813 1814 def _parse_cache(self) -> exp.Expression: 1815 lazy = self._match_text_seq("LAZY") 1816 self._match(TokenType.TABLE) 1817 table = self._parse_table(schema=True) 1818 options = [] 1819 1820 if self._match_text_seq("OPTIONS"): 1821 self._match_l_paren() 1822 k = self._parse_string() 1823 self._match(TokenType.EQ) 1824 v = self._parse_string() 1825 options = [k, v] 1826 self._match_r_paren() 1827 1828 self._match(TokenType.ALIAS) 1829 return self.expression( 1830 exp.Cache, 1831 this=table, 1832 lazy=lazy, 1833 options=options, 1834 expression=self._parse_select(nested=True), 1835 ) 1836 1837 def _parse_partition(self) -> t.Optional[exp.Expression]: 1838 if not self._match(TokenType.PARTITION): 1839 return None 1840 1841 return self.expression( 1842 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1843 ) 1844 1845 def _parse_value(self) -> exp.Expression: 1846 if self._match(TokenType.L_PAREN): 1847 expressions = self._parse_csv(self._parse_conjunction) 1848 self._match_r_paren() 1849 return self.expression(exp.Tuple, expressions=expressions) 1850 1851 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1852 # Source: https://prestodb.io/docs/current/sql/values.html 1853 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1854 1855 def _parse_select( 1856 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1857 ) -> t.Optional[exp.Expression]: 1858 cte = self._parse_with() 1859 if cte: 1860 this = self._parse_statement() 1861 1862 if not this: 1863 self.raise_error("Failed to parse any statement following CTE") 1864 return cte 1865 1866 if "with" in this.arg_types: 1867 this.set("with", cte) 1868 else: 1869 self.raise_error(f"{this.key} does not support CTE") 1870 this = cte 1871 elif self._match(TokenType.SELECT): 1872 comments = self._prev_comments 1873 1874 hint = self._parse_hint() 1875 all_ = self._match(TokenType.ALL) 1876 distinct = self._match(TokenType.DISTINCT) 1877 1878 kind = ( 1879 self._match(TokenType.ALIAS) 1880 and self._match_texts(("STRUCT", "VALUE")) 1881 and self._prev.text 1882 ) 1883 1884 if distinct: 1885 distinct = self.expression( 1886 exp.Distinct, 1887 on=self._parse_value() if self._match(TokenType.ON) else None, 1888 ) 1889 1890 if all_ and distinct: 1891 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1892 1893 limit = self._parse_limit(top=True) 1894 expressions = self._parse_csv(self._parse_expression) 1895 1896 this = self.expression( 1897 exp.Select, 1898 kind=kind, 1899 hint=hint, 1900 distinct=distinct, 1901 expressions=expressions, 1902 limit=limit, 1903 ) 1904 this.comments = comments 1905 1906 into = self._parse_into() 1907 if into: 1908 this.set("into", into) 1909 1910 from_ = self._parse_from() 1911 if from_: 1912 this.set("from", from_) 1913 1914 this = self._parse_query_modifiers(this) 1915 elif (table or nested) and self._match(TokenType.L_PAREN): 1916 if self._match(TokenType.PIVOT): 1917 this = self._parse_simplified_pivot() 1918 elif self._match(TokenType.FROM): 1919 this = exp.select("*").from_( 1920 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1921 ) 1922 else: 1923 this = self._parse_table() if table else self._parse_select(nested=True) 1924 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1925 1926 self._match_r_paren() 1927 1928 # early return so that subquery unions aren't parsed again 1929 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1930 # Union ALL should be a property of the top select node, not the subquery 1931 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1932 elif self._match(TokenType.VALUES): 1933 this = self.expression( 1934 exp.Values, 1935 expressions=self._parse_csv(self._parse_value), 1936 alias=self._parse_table_alias(), 1937 ) 1938 else: 1939 this = None 1940 1941 return self._parse_set_operations(this) 1942 1943 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1944 if not skip_with_token and not self._match(TokenType.WITH): 1945 return None 1946 1947 comments = self._prev_comments 1948 recursive = self._match(TokenType.RECURSIVE) 1949 1950 expressions = [] 1951 while True: 1952 expressions.append(self._parse_cte()) 1953 1954 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1955 break 1956 else: 1957 self._match(TokenType.WITH) 1958 1959 return self.expression( 1960 exp.With, comments=comments, expressions=expressions, recursive=recursive 1961 ) 1962 1963 def _parse_cte(self) -> exp.Expression: 1964 alias = self._parse_table_alias() 1965 if not alias or not alias.this: 1966 self.raise_error("Expected CTE to have alias") 1967 1968 self._match(TokenType.ALIAS) 1969 1970 return self.expression( 1971 exp.CTE, 1972 this=self._parse_wrapped(self._parse_statement), 1973 alias=alias, 1974 ) 1975 1976 def _parse_table_alias( 1977 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1978 ) -> t.Optional[exp.Expression]: 1979 any_token = self._match(TokenType.ALIAS) 1980 alias = ( 1981 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1982 or self._parse_string_as_identifier() 1983 ) 1984 1985 index = self._index 1986 if self._match(TokenType.L_PAREN): 1987 columns = self._parse_csv(self._parse_function_parameter) 1988 self._match_r_paren() if columns else self._retreat(index) 1989 else: 1990 columns = None 1991 1992 if not alias and not columns: 1993 return None 1994 1995 return self.expression(exp.TableAlias, this=alias, columns=columns) 1996 1997 def _parse_subquery( 1998 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1999 ) -> t.Optional[exp.Expression]: 2000 if not this: 2001 return None 2002 return self.expression( 2003 exp.Subquery, 2004 this=this, 2005 pivots=self._parse_pivots(), 2006 alias=self._parse_table_alias() if parse_alias else None, 2007 ) 2008 2009 def _parse_query_modifiers( 2010 self, this: t.Optional[exp.Expression] 2011 ) -> t.Optional[exp.Expression]: 2012 if isinstance(this, self.MODIFIABLES): 2013 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 2014 expression = parser(self) 2015 2016 if expression: 2017 this.set(key, expression) 2018 return this 2019 2020 def _parse_hint(self) -> t.Optional[exp.Expression]: 2021 if self._match(TokenType.HINT): 2022 hints = self._parse_csv(self._parse_function) 2023 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2024 self.raise_error("Expected */ after HINT") 2025 return self.expression(exp.Hint, expressions=hints) 2026 2027 return None 2028 2029 def _parse_into(self) -> t.Optional[exp.Expression]: 2030 if not self._match(TokenType.INTO): 2031 return None 2032 2033 temp = self._match(TokenType.TEMPORARY) 2034 unlogged = self._match_text_seq("UNLOGGED") 2035 self._match(TokenType.TABLE) 2036 2037 return self.expression( 2038 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2039 ) 2040 2041 def _parse_from( 2042 self, modifiers: bool = False, skip_from_token: bool = False 2043 ) -> t.Optional[exp.From]: 2044 if not skip_from_token and not self._match(TokenType.FROM): 2045 return None 2046 2047 comments = self._prev_comments 2048 this = self._parse_table() 2049 2050 return self.expression( 2051 exp.From, 2052 comments=comments, 2053 this=self._parse_query_modifiers(this) if modifiers else this, 2054 ) 2055 2056 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 2057 if not self._match(TokenType.MATCH_RECOGNIZE): 2058 return None 2059 2060 self._match_l_paren() 2061 2062 partition = self._parse_partition_by() 2063 order = self._parse_order() 2064 measures = ( 2065 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2066 ) 2067 2068 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2069 rows = exp.Var(this="ONE ROW PER MATCH") 2070 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2071 text = "ALL ROWS PER MATCH" 2072 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2073 text += f" SHOW EMPTY MATCHES" 2074 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2075 text += f" OMIT EMPTY MATCHES" 2076 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2077 text += f" WITH UNMATCHED ROWS" 2078 rows = exp.Var(this=text) 2079 else: 2080 rows = None 2081 2082 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2083 text = "AFTER MATCH SKIP" 2084 if self._match_text_seq("PAST", "LAST", "ROW"): 2085 text += f" PAST LAST ROW" 2086 elif self._match_text_seq("TO", "NEXT", "ROW"): 2087 text += f" TO NEXT ROW" 2088 elif self._match_text_seq("TO", "FIRST"): 2089 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2090 elif self._match_text_seq("TO", "LAST"): 2091 text += f" TO LAST {self._advance_any().text}" # type: ignore 2092 after = exp.Var(this=text) 2093 else: 2094 after = None 2095 2096 if self._match_text_seq("PATTERN"): 2097 self._match_l_paren() 2098 2099 if not self._curr: 2100 self.raise_error("Expecting )", self._curr) 2101 2102 paren = 1 2103 start = self._curr 2104 2105 while self._curr and paren > 0: 2106 if self._curr.token_type == TokenType.L_PAREN: 2107 paren += 1 2108 if self._curr.token_type == TokenType.R_PAREN: 2109 paren -= 1 2110 end = self._prev 2111 self._advance() 2112 if paren > 0: 2113 self.raise_error("Expecting )", self._curr) 2114 pattern = exp.Var(this=self._find_sql(start, end)) 2115 else: 2116 pattern = None 2117 2118 define = ( 2119 self._parse_csv( 2120 lambda: self.expression( 2121 exp.Alias, 2122 alias=self._parse_id_var(any_token=True), 2123 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2124 ) 2125 ) 2126 if self._match_text_seq("DEFINE") 2127 else None 2128 ) 2129 2130 self._match_r_paren() 2131 2132 return self.expression( 2133 exp.MatchRecognize, 2134 partition_by=partition, 2135 order=order, 2136 measures=measures, 2137 rows=rows, 2138 after=after, 2139 pattern=pattern, 2140 define=define, 2141 alias=self._parse_table_alias(), 2142 ) 2143 2144 def _parse_lateral(self) -> t.Optional[exp.Expression]: 2145 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2146 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2147 2148 if outer_apply or cross_apply: 2149 this = self._parse_select(table=True) 2150 view = None 2151 outer = not cross_apply 2152 elif self._match(TokenType.LATERAL): 2153 this = self._parse_select(table=True) 2154 view = self._match(TokenType.VIEW) 2155 outer = self._match(TokenType.OUTER) 2156 else: 2157 return None 2158 2159 if not this: 2160 this = self._parse_function() or self._parse_id_var(any_token=False) 2161 while self._match(TokenType.DOT): 2162 this = exp.Dot( 2163 this=this, 2164 expression=self._parse_function() or self._parse_id_var(any_token=False), 2165 ) 2166 2167 table_alias: t.Optional[exp.Expression] 2168 2169 if view: 2170 table = self._parse_id_var(any_token=False) 2171 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2172 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 2173 else: 2174 table_alias = self._parse_table_alias() 2175 2176 expression = self.expression( 2177 exp.Lateral, 2178 this=this, 2179 view=view, 2180 outer=outer, 2181 alias=table_alias, 2182 ) 2183 2184 return expression 2185 2186 def _parse_join_parts( 2187 self, 2188 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2189 return ( 2190 self._match_set(self.JOIN_METHODS) and self._prev, 2191 self._match_set(self.JOIN_SIDES) and self._prev, 2192 self._match_set(self.JOIN_KINDS) and self._prev, 2193 ) 2194 2195 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 2196 if self._match(TokenType.COMMA): 2197 return self.expression(exp.Join, this=self._parse_table()) 2198 2199 index = self._index 2200 method, side, kind = self._parse_join_parts() 2201 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2202 join = self._match(TokenType.JOIN) 2203 2204 if not skip_join_token and not join: 2205 self._retreat(index) 2206 kind = None 2207 method = None 2208 side = None 2209 2210 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2211 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2212 2213 if not skip_join_token and not join and not outer_apply and not cross_apply: 2214 return None 2215 2216 if outer_apply: 2217 side = Token(TokenType.LEFT, "LEFT") 2218 2219 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2220 2221 if method: 2222 kwargs["method"] = method.text 2223 if side: 2224 kwargs["side"] = side.text 2225 if kind: 2226 kwargs["kind"] = kind.text 2227 if hint: 2228 kwargs["hint"] = hint 2229 2230 if self._match(TokenType.ON): 2231 kwargs["on"] = self._parse_conjunction() 2232 elif self._match(TokenType.USING): 2233 kwargs["using"] = self._parse_wrapped_id_vars() 2234 2235 return self.expression(exp.Join, **kwargs) 2236 2237 def _parse_index( 2238 self, 2239 index: t.Optional[exp.Expression] = None, 2240 ) -> t.Optional[exp.Expression]: 2241 if index: 2242 unique = None 2243 primary = None 2244 amp = None 2245 2246 self._match(TokenType.ON) 2247 self._match(TokenType.TABLE) # hive 2248 table = self._parse_table_parts(schema=True) 2249 else: 2250 unique = self._match(TokenType.UNIQUE) 2251 primary = self._match_text_seq("PRIMARY") 2252 amp = self._match_text_seq("AMP") 2253 if not self._match(TokenType.INDEX): 2254 return None 2255 index = self._parse_id_var() 2256 table = None 2257 2258 if self._match(TokenType.L_PAREN, advance=False): 2259 columns = self._parse_wrapped_csv(self._parse_ordered) 2260 else: 2261 columns = None 2262 2263 return self.expression( 2264 exp.Index, 2265 this=index, 2266 table=table, 2267 columns=columns, 2268 unique=unique, 2269 primary=primary, 2270 amp=amp, 2271 partition_by=self._parse_partition_by(), 2272 ) 2273 2274 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2275 return ( 2276 (not schema and self._parse_function()) 2277 or self._parse_id_var(any_token=False) 2278 or self._parse_string_as_identifier() 2279 or self._parse_placeholder() 2280 ) 2281 2282 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2283 catalog = None 2284 db = None 2285 table = self._parse_table_part(schema=schema) 2286 2287 while self._match(TokenType.DOT): 2288 if catalog: 2289 # This allows nesting the table in arbitrarily many dot expressions if needed 2290 table = self.expression( 2291 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2292 ) 2293 else: 2294 catalog = db 2295 db = table 2296 table = self._parse_table_part(schema=schema) 2297 2298 if not table: 2299 self.raise_error(f"Expected table name but got {self._curr}") 2300 2301 return self.expression( 2302 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2303 ) 2304 2305 def _parse_table( 2306 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2307 ) -> t.Optional[exp.Expression]: 2308 lateral = self._parse_lateral() 2309 if lateral: 2310 return lateral 2311 2312 unnest = self._parse_unnest() 2313 if unnest: 2314 return unnest 2315 2316 values = self._parse_derived_table_values() 2317 if values: 2318 return values 2319 2320 subquery = self._parse_select(table=True) 2321 if subquery: 2322 if not subquery.args.get("pivots"): 2323 subquery.set("pivots", self._parse_pivots()) 2324 return subquery 2325 2326 this: exp.Expression = self._parse_table_parts(schema=schema) 2327 2328 if schema: 2329 return self._parse_schema(this=this) 2330 2331 if self.alias_post_tablesample: 2332 table_sample = self._parse_table_sample() 2333 2334 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2335 if alias: 2336 this.set("alias", alias) 2337 2338 if not this.args.get("pivots"): 2339 this.set("pivots", self._parse_pivots()) 2340 2341 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2342 this.set( 2343 "hints", 2344 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2345 ) 2346 self._match_r_paren() 2347 2348 if not self.alias_post_tablesample: 2349 table_sample = self._parse_table_sample() 2350 2351 if table_sample: 2352 table_sample.set("this", this) 2353 this = table_sample 2354 2355 return this 2356 2357 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2358 if not self._match(TokenType.UNNEST): 2359 return None 2360 2361 expressions = self._parse_wrapped_csv(self._parse_type) 2362 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2363 alias = self._parse_table_alias() 2364 2365 if alias and self.unnest_column_only: 2366 if alias.args.get("columns"): 2367 self.raise_error("Unexpected extra column alias in unnest.") 2368 alias.set("columns", [alias.this]) 2369 alias.set("this", None) 2370 2371 offset = None 2372 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2373 self._match(TokenType.ALIAS) 2374 offset = self._parse_id_var() or exp.Identifier(this="offset") 2375 2376 return self.expression( 2377 exp.Unnest, 2378 expressions=expressions, 2379 ordinality=ordinality, 2380 alias=alias, 2381 offset=offset, 2382 ) 2383 2384 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2385 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2386 if not is_derived and not self._match(TokenType.VALUES): 2387 return None 2388 2389 expressions = self._parse_csv(self._parse_value) 2390 2391 if is_derived: 2392 self._match_r_paren() 2393 2394 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2395 2396 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]: 2397 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2398 as_modifier and self._match_text_seq("USING", "SAMPLE") 2399 ): 2400 return None 2401 2402 bucket_numerator = None 2403 bucket_denominator = None 2404 bucket_field = None 2405 percent = None 2406 rows = None 2407 size = None 2408 seed = None 2409 2410 kind = ( 2411 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2412 ) 2413 method = self._parse_var(tokens=(TokenType.ROW,)) 2414 2415 self._match(TokenType.L_PAREN) 2416 2417 num = self._parse_number() 2418 2419 if self._match_text_seq("BUCKET"): 2420 bucket_numerator = self._parse_number() 2421 self._match_text_seq("OUT", "OF") 2422 bucket_denominator = bucket_denominator = self._parse_number() 2423 self._match(TokenType.ON) 2424 bucket_field = self._parse_field() 2425 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2426 percent = num 2427 elif self._match(TokenType.ROWS): 2428 rows = num 2429 else: 2430 size = num 2431 2432 self._match(TokenType.R_PAREN) 2433 2434 if self._match(TokenType.L_PAREN): 2435 method = self._parse_var() 2436 seed = self._match(TokenType.COMMA) and self._parse_number() 2437 self._match_r_paren() 2438 elif self._match_texts(("SEED", "REPEATABLE")): 2439 seed = self._parse_wrapped(self._parse_number) 2440 2441 return self.expression( 2442 exp.TableSample, 2443 method=method, 2444 bucket_numerator=bucket_numerator, 2445 bucket_denominator=bucket_denominator, 2446 bucket_field=bucket_field, 2447 percent=percent, 2448 rows=rows, 2449 size=size, 2450 seed=seed, 2451 kind=kind, 2452 ) 2453 2454 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2455 return list(iter(self._parse_pivot, None)) 2456 2457 # https://duckdb.org/docs/sql/statements/pivot 2458 def _parse_simplified_pivot(self) -> exp.Pivot: 2459 def _parse_on() -> t.Optional[exp.Expression]: 2460 this = self._parse_bitwise() 2461 return self._parse_in(this) if self._match(TokenType.IN) else this 2462 2463 this = self._parse_table() 2464 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2465 using = self._match(TokenType.USING) and self._parse_csv( 2466 lambda: self._parse_alias(self._parse_function()) 2467 ) 2468 group = self._parse_group() 2469 return self.expression( 2470 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2471 ) 2472 2473 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2474 index = self._index 2475 2476 if self._match(TokenType.PIVOT): 2477 unpivot = False 2478 elif self._match(TokenType.UNPIVOT): 2479 unpivot = True 2480 else: 2481 return None 2482 2483 expressions = [] 2484 field = None 2485 2486 if not self._match(TokenType.L_PAREN): 2487 self._retreat(index) 2488 return None 2489 2490 if unpivot: 2491 expressions = self._parse_csv(self._parse_column) 2492 else: 2493 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2494 2495 if not expressions: 2496 self.raise_error("Failed to parse PIVOT's aggregation list") 2497 2498 if not self._match(TokenType.FOR): 2499 self.raise_error("Expecting FOR") 2500 2501 value = self._parse_column() 2502 2503 if not self._match(TokenType.IN): 2504 self.raise_error("Expecting IN") 2505 2506 field = self._parse_in(value, alias=True) 2507 2508 self._match_r_paren() 2509 2510 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2511 2512 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2513 pivot.set("alias", self._parse_table_alias()) 2514 2515 if not unpivot: 2516 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2517 2518 columns: t.List[exp.Expression] = [] 2519 for fld in pivot.args["field"].expressions: 2520 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2521 for name in names: 2522 if self.PREFIXED_PIVOT_COLUMNS: 2523 name = f"{name}_{field_name}" if name else field_name 2524 else: 2525 name = f"{field_name}_{name}" if name else field_name 2526 2527 columns.append(exp.to_identifier(name)) 2528 2529 pivot.set("columns", columns) 2530 2531 return pivot 2532 2533 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2534 return [agg.alias for agg in aggregations] 2535 2536 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2537 if not skip_where_token and not self._match(TokenType.WHERE): 2538 return None 2539 2540 return self.expression( 2541 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2542 ) 2543 2544 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2545 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2546 return None 2547 2548 elements = defaultdict(list) 2549 2550 while True: 2551 expressions = self._parse_csv(self._parse_conjunction) 2552 if expressions: 2553 elements["expressions"].extend(expressions) 2554 2555 grouping_sets = self._parse_grouping_sets() 2556 if grouping_sets: 2557 elements["grouping_sets"].extend(grouping_sets) 2558 2559 rollup = None 2560 cube = None 2561 totals = None 2562 2563 with_ = self._match(TokenType.WITH) 2564 if self._match(TokenType.ROLLUP): 2565 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2566 elements["rollup"].extend(ensure_list(rollup)) 2567 2568 if self._match(TokenType.CUBE): 2569 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2570 elements["cube"].extend(ensure_list(cube)) 2571 2572 if self._match_text_seq("TOTALS"): 2573 totals = True 2574 elements["totals"] = True # type: ignore 2575 2576 if not (grouping_sets or rollup or cube or totals): 2577 break 2578 2579 return self.expression(exp.Group, **elements) # type: ignore 2580 2581 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2582 if not self._match(TokenType.GROUPING_SETS): 2583 return None 2584 2585 return self._parse_wrapped_csv(self._parse_grouping_set) 2586 2587 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2588 if self._match(TokenType.L_PAREN): 2589 grouping_set = self._parse_csv(self._parse_column) 2590 self._match_r_paren() 2591 return self.expression(exp.Tuple, expressions=grouping_set) 2592 2593 return self._parse_column() 2594 2595 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2596 if not skip_having_token and not self._match(TokenType.HAVING): 2597 return None 2598 return self.expression(exp.Having, this=self._parse_conjunction()) 2599 2600 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2601 if not self._match(TokenType.QUALIFY): 2602 return None 2603 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2604 2605 def _parse_order( 2606 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2607 ) -> t.Optional[exp.Expression]: 2608 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2609 return this 2610 2611 return self.expression( 2612 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2613 ) 2614 2615 def _parse_sort( 2616 self, exp_class: t.Type[exp.Expression], *texts: str 2617 ) -> t.Optional[exp.Expression]: 2618 if not self._match_text_seq(*texts): 2619 return None 2620 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2621 2622 def _parse_ordered(self) -> exp.Expression: 2623 this = self._parse_conjunction() 2624 self._match(TokenType.ASC) 2625 is_desc = self._match(TokenType.DESC) 2626 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2627 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2628 desc = is_desc or False 2629 asc = not desc 2630 nulls_first = is_nulls_first or False 2631 explicitly_null_ordered = is_nulls_first or is_nulls_last 2632 if ( 2633 not explicitly_null_ordered 2634 and ( 2635 (asc and self.null_ordering == "nulls_are_small") 2636 or (desc and self.null_ordering != "nulls_are_small") 2637 ) 2638 and self.null_ordering != "nulls_are_last" 2639 ): 2640 nulls_first = True 2641 2642 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2643 2644 def _parse_limit( 2645 self, this: t.Optional[exp.Expression] = None, top: bool = False 2646 ) -> t.Optional[exp.Expression]: 2647 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2648 limit_paren = self._match(TokenType.L_PAREN) 2649 limit_exp = self.expression( 2650 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2651 ) 2652 2653 if limit_paren: 2654 self._match_r_paren() 2655 2656 return limit_exp 2657 2658 if self._match(TokenType.FETCH): 2659 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2660 direction = self._prev.text if direction else "FIRST" 2661 2662 count = self._parse_number() 2663 percent = self._match(TokenType.PERCENT) 2664 2665 self._match_set((TokenType.ROW, TokenType.ROWS)) 2666 2667 only = self._match_text_seq("ONLY") 2668 with_ties = self._match_text_seq("WITH", "TIES") 2669 2670 if only and with_ties: 2671 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2672 2673 return self.expression( 2674 exp.Fetch, 2675 direction=direction, 2676 count=count, 2677 percent=percent, 2678 with_ties=with_ties, 2679 ) 2680 2681 return this 2682 2683 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2684 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2685 return this 2686 2687 count = self._parse_number() 2688 self._match_set((TokenType.ROW, TokenType.ROWS)) 2689 return self.expression(exp.Offset, this=this, expression=count) 2690 2691 def _parse_locks(self) -> t.List[exp.Expression]: 2692 # Lists are invariant, so we need to use a type hint here 2693 locks: t.List[exp.Expression] = [] 2694 2695 while True: 2696 if self._match_text_seq("FOR", "UPDATE"): 2697 update = True 2698 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2699 "LOCK", "IN", "SHARE", "MODE" 2700 ): 2701 update = False 2702 else: 2703 break 2704 2705 expressions = None 2706 if self._match_text_seq("OF"): 2707 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2708 2709 wait: t.Optional[bool | exp.Expression] = None 2710 if self._match_text_seq("NOWAIT"): 2711 wait = True 2712 elif self._match_text_seq("WAIT"): 2713 wait = self._parse_primary() 2714 elif self._match_text_seq("SKIP", "LOCKED"): 2715 wait = False 2716 2717 locks.append( 2718 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2719 ) 2720 2721 return locks 2722 2723 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2724 if not self._match_set(self.SET_OPERATIONS): 2725 return this 2726 2727 token_type = self._prev.token_type 2728 2729 if token_type == TokenType.UNION: 2730 expression = exp.Union 2731 elif token_type == TokenType.EXCEPT: 2732 expression = exp.Except 2733 else: 2734 expression = exp.Intersect 2735 2736 return self.expression( 2737 expression, 2738 this=this, 2739 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2740 expression=self._parse_set_operations(self._parse_select(nested=True)), 2741 ) 2742 2743 def _parse_expression(self) -> t.Optional[exp.Expression]: 2744 return self._parse_alias(self._parse_conjunction()) 2745 2746 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2747 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2748 2749 def _parse_equality(self) -> t.Optional[exp.Expression]: 2750 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2751 2752 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2753 return self._parse_tokens(self._parse_range, self.COMPARISON) 2754 2755 def _parse_range(self) -> t.Optional[exp.Expression]: 2756 this = self._parse_bitwise() 2757 negate = self._match(TokenType.NOT) 2758 2759 if self._match_set(self.RANGE_PARSERS): 2760 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2761 if not expression: 2762 return this 2763 2764 this = expression 2765 elif self._match(TokenType.ISNULL): 2766 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2767 2768 # Postgres supports ISNULL and NOTNULL for conditions. 2769 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2770 if self._match(TokenType.NOTNULL): 2771 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2772 this = self.expression(exp.Not, this=this) 2773 2774 if negate: 2775 this = self.expression(exp.Not, this=this) 2776 2777 if self._match(TokenType.IS): 2778 this = self._parse_is(this) 2779 2780 return this 2781 2782 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2783 index = self._index - 1 2784 negate = self._match(TokenType.NOT) 2785 if self._match_text_seq("DISTINCT", "FROM"): 2786 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2787 return self.expression(klass, this=this, expression=self._parse_expression()) 2788 2789 expression = self._parse_null() or self._parse_boolean() 2790 if not expression: 2791 self._retreat(index) 2792 return None 2793 2794 this = self.expression(exp.Is, this=this, expression=expression) 2795 return self.expression(exp.Not, this=this) if negate else this 2796 2797 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2798 unnest = self._parse_unnest() 2799 if unnest: 2800 this = self.expression(exp.In, this=this, unnest=unnest) 2801 elif self._match(TokenType.L_PAREN): 2802 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2803 2804 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2805 this = self.expression(exp.In, this=this, query=expressions[0]) 2806 else: 2807 this = self.expression(exp.In, this=this, expressions=expressions) 2808 2809 self._match_r_paren(this) 2810 else: 2811 this = self.expression(exp.In, this=this, field=self._parse_field()) 2812 2813 return this 2814 2815 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2816 low = self._parse_bitwise() 2817 self._match(TokenType.AND) 2818 high = self._parse_bitwise() 2819 return self.expression(exp.Between, this=this, low=low, high=high) 2820 2821 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2822 if not self._match(TokenType.ESCAPE): 2823 return this 2824 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2825 2826 def _parse_interval(self) -> t.Optional[exp.Expression]: 2827 if not self._match(TokenType.INTERVAL): 2828 return None 2829 2830 this = self._parse_primary() or self._parse_term() 2831 unit = self._parse_function() or self._parse_var() 2832 2833 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2834 # each INTERVAL expression into this canonical form so it's easy to transpile 2835 if this and this.is_number: 2836 this = exp.Literal.string(this.name) 2837 elif this and this.is_string: 2838 parts = this.name.split() 2839 2840 if len(parts) == 2: 2841 if unit: 2842 # this is not actually a unit, it's something else 2843 unit = None 2844 self._retreat(self._index - 1) 2845 else: 2846 this = exp.Literal.string(parts[0]) 2847 unit = self.expression(exp.Var, this=parts[1]) 2848 2849 return self.expression(exp.Interval, this=this, unit=unit) 2850 2851 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2852 this = self._parse_term() 2853 2854 while True: 2855 if self._match_set(self.BITWISE): 2856 this = self.expression( 2857 self.BITWISE[self._prev.token_type], 2858 this=this, 2859 expression=self._parse_term(), 2860 ) 2861 elif self._match_pair(TokenType.LT, TokenType.LT): 2862 this = self.expression( 2863 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2864 ) 2865 elif self._match_pair(TokenType.GT, TokenType.GT): 2866 this = self.expression( 2867 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2868 ) 2869 else: 2870 break 2871 2872 return this 2873 2874 def _parse_term(self) -> t.Optional[exp.Expression]: 2875 return self._parse_tokens(self._parse_factor, self.TERM) 2876 2877 def _parse_factor(self) -> t.Optional[exp.Expression]: 2878 return self._parse_tokens(self._parse_unary, self.FACTOR) 2879 2880 def _parse_unary(self) -> t.Optional[exp.Expression]: 2881 if self._match_set(self.UNARY_PARSERS): 2882 return self.UNARY_PARSERS[self._prev.token_type](self) 2883 return self._parse_at_time_zone(self._parse_type()) 2884 2885 def _parse_type(self) -> t.Optional[exp.Expression]: 2886 interval = self._parse_interval() 2887 if interval: 2888 return interval 2889 2890 index = self._index 2891 data_type = self._parse_types(check_func=True) 2892 this = self._parse_column() 2893 2894 if data_type: 2895 if isinstance(this, exp.Literal): 2896 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2897 if parser: 2898 return parser(self, this, data_type) 2899 return self.expression(exp.Cast, this=this, to=data_type) 2900 if not data_type.expressions: 2901 self._retreat(index) 2902 return self._parse_column() 2903 return self._parse_column_ops(data_type) 2904 2905 return this 2906 2907 def _parse_type_size(self) -> t.Optional[exp.Expression]: 2908 this = self._parse_type() 2909 if not this: 2910 return None 2911 2912 return self.expression( 2913 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2914 ) 2915 2916 def _parse_types( 2917 self, check_func: bool = False, schema: bool = False 2918 ) -> t.Optional[exp.Expression]: 2919 index = self._index 2920 2921 prefix = self._match_text_seq("SYSUDTLIB", ".") 2922 2923 if not self._match_set(self.TYPE_TOKENS): 2924 return None 2925 2926 type_token = self._prev.token_type 2927 2928 if type_token == TokenType.PSEUDO_TYPE: 2929 return self.expression(exp.PseudoType, this=self._prev.text) 2930 2931 nested = type_token in self.NESTED_TYPE_TOKENS 2932 is_struct = type_token == TokenType.STRUCT 2933 expressions = None 2934 maybe_func = False 2935 2936 if self._match(TokenType.L_PAREN): 2937 if is_struct: 2938 expressions = self._parse_csv(self._parse_struct_types) 2939 elif nested: 2940 expressions = self._parse_csv( 2941 lambda: self._parse_types(check_func=check_func, schema=schema) 2942 ) 2943 else: 2944 expressions = self._parse_csv(self._parse_type_size) 2945 2946 if not expressions or not self._match(TokenType.R_PAREN): 2947 self._retreat(index) 2948 return None 2949 2950 maybe_func = True 2951 2952 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2953 this = exp.DataType( 2954 this=exp.DataType.Type.ARRAY, 2955 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2956 nested=True, 2957 ) 2958 2959 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2960 this = exp.DataType( 2961 this=exp.DataType.Type.ARRAY, 2962 expressions=[this], 2963 nested=True, 2964 ) 2965 2966 return this 2967 2968 if self._match(TokenType.L_BRACKET): 2969 self._retreat(index) 2970 return None 2971 2972 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2973 if nested and self._match(TokenType.LT): 2974 if is_struct: 2975 expressions = self._parse_csv(self._parse_struct_types) 2976 else: 2977 expressions = self._parse_csv( 2978 lambda: self._parse_types(check_func=check_func, schema=schema) 2979 ) 2980 2981 if not self._match(TokenType.GT): 2982 self.raise_error("Expecting >") 2983 2984 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2985 values = self._parse_csv(self._parse_conjunction) 2986 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2987 2988 value: t.Optional[exp.Expression] = None 2989 if type_token in self.TIMESTAMPS: 2990 if self._match_text_seq("WITH", "TIME", "ZONE") or type_token == TokenType.TIMESTAMPTZ: 2991 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2992 elif ( 2993 self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE") 2994 or type_token == TokenType.TIMESTAMPLTZ 2995 ): 2996 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2997 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 2998 if type_token == TokenType.TIME: 2999 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 3000 else: 3001 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 3002 3003 maybe_func = maybe_func and value is None 3004 3005 if value is None: 3006 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 3007 elif type_token == TokenType.INTERVAL: 3008 unit = self._parse_var() 3009 3010 if not unit: 3011 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3012 else: 3013 value = self.expression(exp.Interval, unit=unit) 3014 3015 if maybe_func and check_func: 3016 index2 = self._index 3017 peek = self._parse_string() 3018 3019 if not peek: 3020 self._retreat(index) 3021 return None 3022 3023 self._retreat(index2) 3024 3025 if value: 3026 return value 3027 3028 return exp.DataType( 3029 this=exp.DataType.Type[type_token.value.upper()], 3030 expressions=expressions, 3031 nested=nested, 3032 values=values, 3033 prefix=prefix, 3034 ) 3035 3036 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3037 this = self._parse_type() or self._parse_id_var() 3038 self._match(TokenType.COLON) 3039 return self._parse_column_def(this) 3040 3041 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3042 if not self._match_text_seq("AT", "TIME", "ZONE"): 3043 return this 3044 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3045 3046 def _parse_column(self) -> t.Optional[exp.Expression]: 3047 this = self._parse_field() 3048 if isinstance(this, exp.Identifier): 3049 this = self.expression(exp.Column, this=this) 3050 elif not this: 3051 return self._parse_bracket(this) 3052 return self._parse_column_ops(this) 3053 3054 def _parse_column_ops(self, this: exp.Expression) -> exp.Expression: 3055 this = self._parse_bracket(this) 3056 3057 while self._match_set(self.COLUMN_OPERATORS): 3058 op_token = self._prev.token_type 3059 op = self.COLUMN_OPERATORS.get(op_token) 3060 3061 if op_token == TokenType.DCOLON: 3062 field = self._parse_types() 3063 if not field: 3064 self.raise_error("Expected type") 3065 elif op and self._curr: 3066 self._advance() 3067 value = self._prev.text 3068 field = ( 3069 exp.Literal.number(value) 3070 if self._prev.token_type == TokenType.NUMBER 3071 else exp.Literal.string(value) 3072 ) 3073 else: 3074 field = self._parse_field(anonymous_func=True) 3075 3076 if isinstance(field, exp.Func): 3077 # bigquery allows function calls like x.y.count(...) 3078 # SAFE.SUBSTR(...) 3079 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3080 this = self._replace_columns_with_dots(this) 3081 3082 if op: 3083 this = op(self, this, field) 3084 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3085 this = self.expression( 3086 exp.Column, 3087 this=field, 3088 table=this.this, 3089 db=this.args.get("table"), 3090 catalog=this.args.get("db"), 3091 ) 3092 else: 3093 this = self.expression(exp.Dot, this=this, expression=field) 3094 this = self._parse_bracket(this) 3095 return this 3096 3097 def _parse_primary(self) -> t.Optional[exp.Expression]: 3098 if self._match_set(self.PRIMARY_PARSERS): 3099 token_type = self._prev.token_type 3100 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3101 3102 if token_type == TokenType.STRING: 3103 expressions = [primary] 3104 while self._match(TokenType.STRING): 3105 expressions.append(exp.Literal.string(self._prev.text)) 3106 if len(expressions) > 1: 3107 return self.expression(exp.Concat, expressions=expressions) 3108 return primary 3109 3110 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3111 return exp.Literal.number(f"0.{self._prev.text}") 3112 3113 if self._match(TokenType.L_PAREN): 3114 comments = self._prev_comments 3115 query = self._parse_select() 3116 3117 if query: 3118 expressions = [query] 3119 else: 3120 expressions = self._parse_csv(self._parse_expression) 3121 3122 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3123 3124 if isinstance(this, exp.Subqueryable): 3125 this = self._parse_set_operations( 3126 self._parse_subquery(this=this, parse_alias=False) 3127 ) 3128 elif len(expressions) > 1: 3129 this = self.expression(exp.Tuple, expressions=expressions) 3130 else: 3131 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3132 3133 if this: 3134 this.add_comments(comments) 3135 self._match_r_paren(expression=this) 3136 3137 return this 3138 3139 return None 3140 3141 def _parse_field( 3142 self, 3143 any_token: bool = False, 3144 tokens: t.Optional[t.Collection[TokenType]] = None, 3145 anonymous_func: bool = False, 3146 ) -> t.Optional[exp.Expression]: 3147 return ( 3148 self._parse_primary() 3149 or self._parse_function(anonymous=anonymous_func) 3150 or self._parse_id_var(any_token=any_token, tokens=tokens) 3151 ) 3152 3153 def _parse_function( 3154 self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False 3155 ) -> t.Optional[exp.Expression]: 3156 if not self._curr: 3157 return None 3158 3159 token_type = self._curr.token_type 3160 3161 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3162 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3163 3164 if not self._next or self._next.token_type != TokenType.L_PAREN: 3165 if token_type in self.NO_PAREN_FUNCTIONS: 3166 self._advance() 3167 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3168 3169 return None 3170 3171 if token_type not in self.FUNC_TOKENS: 3172 return None 3173 3174 this = self._curr.text 3175 upper = this.upper() 3176 self._advance(2) 3177 3178 parser = self.FUNCTION_PARSERS.get(upper) 3179 3180 if parser and not anonymous: 3181 this = parser(self) 3182 else: 3183 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3184 3185 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3186 this = self.expression(subquery_predicate, this=self._parse_select()) 3187 self._match_r_paren() 3188 return this 3189 3190 if functions is None: 3191 functions = self.FUNCTIONS 3192 3193 function = functions.get(upper) 3194 3195 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3196 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3197 3198 if function and not anonymous: 3199 this = function(args) 3200 self.validate_expression(this, args) 3201 else: 3202 this = self.expression(exp.Anonymous, this=this, expressions=args) 3203 3204 self._match_r_paren(this) 3205 return self._parse_window(this) 3206 3207 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3208 return self._parse_column_def(self._parse_id_var()) 3209 3210 def _parse_user_defined_function( 3211 self, kind: t.Optional[TokenType] = None 3212 ) -> t.Optional[exp.Expression]: 3213 this = self._parse_id_var() 3214 3215 while self._match(TokenType.DOT): 3216 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3217 3218 if not self._match(TokenType.L_PAREN): 3219 return this 3220 3221 expressions = self._parse_csv(self._parse_function_parameter) 3222 self._match_r_paren() 3223 return self.expression( 3224 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3225 ) 3226 3227 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 3228 literal = self._parse_primary() 3229 if literal: 3230 return self.expression(exp.Introducer, this=token.text, expression=literal) 3231 3232 return self.expression(exp.Identifier, this=token.text) 3233 3234 def _parse_session_parameter(self) -> exp.Expression: 3235 kind = None 3236 this = self._parse_id_var() or self._parse_primary() 3237 3238 if this and self._match(TokenType.DOT): 3239 kind = this.name 3240 this = self._parse_var() or self._parse_primary() 3241 3242 return self.expression(exp.SessionParameter, this=this, kind=kind) 3243 3244 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3245 index = self._index 3246 3247 if self._match(TokenType.L_PAREN): 3248 expressions = self._parse_csv(self._parse_id_var) 3249 3250 if not self._match(TokenType.R_PAREN): 3251 self._retreat(index) 3252 else: 3253 expressions = [self._parse_id_var()] 3254 3255 if self._match_set(self.LAMBDAS): 3256 return self.LAMBDAS[self._prev.token_type](self, expressions) 3257 3258 self._retreat(index) 3259 3260 this: t.Optional[exp.Expression] 3261 3262 if self._match(TokenType.DISTINCT): 3263 this = self.expression( 3264 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3265 ) 3266 else: 3267 this = self._parse_select_or_expression(alias=alias) 3268 3269 if isinstance(this, exp.EQ): 3270 left = this.this 3271 if isinstance(left, exp.Column): 3272 left.replace(exp.Var(this=left.text("this"))) 3273 3274 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3275 3276 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3277 index = self._index 3278 3279 if not self.errors: 3280 try: 3281 if self._parse_select(nested=True): 3282 return this 3283 except ParseError: 3284 pass 3285 finally: 3286 self.errors.clear() 3287 self._retreat(index) 3288 3289 if not self._match(TokenType.L_PAREN): 3290 return this 3291 3292 args = self._parse_csv( 3293 lambda: self._parse_constraint() 3294 or self._parse_column_def(self._parse_field(any_token=True)) 3295 ) 3296 self._match_r_paren() 3297 return self.expression(exp.Schema, this=this, expressions=args) 3298 3299 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3300 # column defs are not really columns, they're identifiers 3301 if isinstance(this, exp.Column): 3302 this = this.this 3303 kind = self._parse_types(schema=True) 3304 3305 if self._match_text_seq("FOR", "ORDINALITY"): 3306 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3307 3308 constraints = [] 3309 while True: 3310 constraint = self._parse_column_constraint() 3311 if not constraint: 3312 break 3313 constraints.append(constraint) 3314 3315 if not kind and not constraints: 3316 return this 3317 3318 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3319 3320 def _parse_auto_increment(self) -> exp.Expression: 3321 start = None 3322 increment = None 3323 3324 if self._match(TokenType.L_PAREN, advance=False): 3325 args = self._parse_wrapped_csv(self._parse_bitwise) 3326 start = seq_get(args, 0) 3327 increment = seq_get(args, 1) 3328 elif self._match_text_seq("START"): 3329 start = self._parse_bitwise() 3330 self._match_text_seq("INCREMENT") 3331 increment = self._parse_bitwise() 3332 3333 if start and increment: 3334 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3335 3336 return exp.AutoIncrementColumnConstraint() 3337 3338 def _parse_compress(self) -> exp.Expression: 3339 if self._match(TokenType.L_PAREN, advance=False): 3340 return self.expression( 3341 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3342 ) 3343 3344 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3345 3346 def _parse_generated_as_identity(self) -> exp.Expression: 3347 if self._match_text_seq("BY", "DEFAULT"): 3348 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3349 this = self.expression( 3350 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3351 ) 3352 else: 3353 self._match_text_seq("ALWAYS") 3354 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3355 3356 self._match(TokenType.ALIAS) 3357 identity = self._match_text_seq("IDENTITY") 3358 3359 if self._match(TokenType.L_PAREN): 3360 if self._match_text_seq("START", "WITH"): 3361 this.set("start", self._parse_bitwise()) 3362 if self._match_text_seq("INCREMENT", "BY"): 3363 this.set("increment", self._parse_bitwise()) 3364 if self._match_text_seq("MINVALUE"): 3365 this.set("minvalue", self._parse_bitwise()) 3366 if self._match_text_seq("MAXVALUE"): 3367 this.set("maxvalue", self._parse_bitwise()) 3368 3369 if self._match_text_seq("CYCLE"): 3370 this.set("cycle", True) 3371 elif self._match_text_seq("NO", "CYCLE"): 3372 this.set("cycle", False) 3373 3374 if not identity: 3375 this.set("expression", self._parse_bitwise()) 3376 3377 self._match_r_paren() 3378 3379 return this 3380 3381 def _parse_inline(self) -> t.Optional[exp.Expression]: 3382 self._match_text_seq("LENGTH") 3383 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3384 3385 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 3386 if self._match_text_seq("NULL"): 3387 return self.expression(exp.NotNullColumnConstraint) 3388 if self._match_text_seq("CASESPECIFIC"): 3389 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3390 return None 3391 3392 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3393 if self._match(TokenType.CONSTRAINT): 3394 this = self._parse_id_var() 3395 else: 3396 this = None 3397 3398 if self._match_texts(self.CONSTRAINT_PARSERS): 3399 return self.expression( 3400 exp.ColumnConstraint, 3401 this=this, 3402 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3403 ) 3404 3405 return this 3406 3407 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3408 if not self._match(TokenType.CONSTRAINT): 3409 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3410 3411 this = self._parse_id_var() 3412 expressions = [] 3413 3414 while True: 3415 constraint = self._parse_unnamed_constraint() or self._parse_function() 3416 if not constraint: 3417 break 3418 expressions.append(constraint) 3419 3420 return self.expression(exp.Constraint, this=this, expressions=expressions) 3421 3422 def _parse_unnamed_constraint( 3423 self, constraints: t.Optional[t.Collection[str]] = None 3424 ) -> t.Optional[exp.Expression]: 3425 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3426 return None 3427 3428 constraint = self._prev.text.upper() 3429 if constraint not in self.CONSTRAINT_PARSERS: 3430 self.raise_error(f"No parser found for schema constraint {constraint}.") 3431 3432 return self.CONSTRAINT_PARSERS[constraint](self) 3433 3434 def _parse_unique(self) -> exp.Expression: 3435 self._match_text_seq("KEY") 3436 return self.expression( 3437 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3438 ) 3439 3440 def _parse_key_constraint_options(self) -> t.List[str]: 3441 options = [] 3442 while True: 3443 if not self._curr: 3444 break 3445 3446 if self._match(TokenType.ON): 3447 action = None 3448 on = self._advance_any() and self._prev.text 3449 3450 if self._match_text_seq("NO", "ACTION"): 3451 action = "NO ACTION" 3452 elif self._match_text_seq("CASCADE"): 3453 action = "CASCADE" 3454 elif self._match_pair(TokenType.SET, TokenType.NULL): 3455 action = "SET NULL" 3456 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3457 action = "SET DEFAULT" 3458 else: 3459 self.raise_error("Invalid key constraint") 3460 3461 options.append(f"ON {on} {action}") 3462 elif self._match_text_seq("NOT", "ENFORCED"): 3463 options.append("NOT ENFORCED") 3464 elif self._match_text_seq("DEFERRABLE"): 3465 options.append("DEFERRABLE") 3466 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3467 options.append("INITIALLY DEFERRED") 3468 elif self._match_text_seq("NORELY"): 3469 options.append("NORELY") 3470 elif self._match_text_seq("MATCH", "FULL"): 3471 options.append("MATCH FULL") 3472 else: 3473 break 3474 3475 return options 3476 3477 def _parse_references(self, match: bool = True) -> t.Optional[exp.Expression]: 3478 if match and not self._match(TokenType.REFERENCES): 3479 return None 3480 3481 expressions = None 3482 this = self._parse_id_var() 3483 3484 if self._match(TokenType.L_PAREN, advance=False): 3485 expressions = self._parse_wrapped_id_vars() 3486 3487 options = self._parse_key_constraint_options() 3488 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3489 3490 def _parse_foreign_key(self) -> exp.Expression: 3491 expressions = self._parse_wrapped_id_vars() 3492 reference = self._parse_references() 3493 options = {} 3494 3495 while self._match(TokenType.ON): 3496 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3497 self.raise_error("Expected DELETE or UPDATE") 3498 3499 kind = self._prev.text.lower() 3500 3501 if self._match_text_seq("NO", "ACTION"): 3502 action = "NO ACTION" 3503 elif self._match(TokenType.SET): 3504 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3505 action = "SET " + self._prev.text.upper() 3506 else: 3507 self._advance() 3508 action = self._prev.text.upper() 3509 3510 options[kind] = action 3511 3512 return self.expression( 3513 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3514 ) 3515 3516 def _parse_primary_key( 3517 self, wrapped_optional: bool = False, in_props: bool = False 3518 ) -> exp.Expression: 3519 desc = ( 3520 self._match_set((TokenType.ASC, TokenType.DESC)) 3521 and self._prev.token_type == TokenType.DESC 3522 ) 3523 3524 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3525 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3526 3527 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3528 options = self._parse_key_constraint_options() 3529 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3530 3531 @t.overload 3532 def _parse_bracket(self, this: exp.Expression) -> exp.Expression: 3533 ... 3534 3535 @t.overload 3536 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3537 ... 3538 3539 def _parse_bracket(self, this): 3540 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3541 return this 3542 3543 bracket_kind = self._prev.token_type 3544 expressions: t.List[t.Optional[exp.Expression]] 3545 3546 if self._match(TokenType.COLON): 3547 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3548 else: 3549 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3550 3551 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3552 if bracket_kind == TokenType.L_BRACE: 3553 this = self.expression(exp.Struct, expressions=expressions) 3554 elif not this or this.name.upper() == "ARRAY": 3555 this = self.expression(exp.Array, expressions=expressions) 3556 else: 3557 expressions = apply_index_offset(this, expressions, -self.index_offset) 3558 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3559 3560 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3561 self.raise_error("Expected ]") 3562 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3563 self.raise_error("Expected }") 3564 3565 self._add_comments(this) 3566 return self._parse_bracket(this) 3567 3568 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3569 if self._match(TokenType.COLON): 3570 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3571 return this 3572 3573 def _parse_case(self) -> t.Optional[exp.Expression]: 3574 ifs = [] 3575 default = None 3576 3577 expression = self._parse_conjunction() 3578 3579 while self._match(TokenType.WHEN): 3580 this = self._parse_conjunction() 3581 self._match(TokenType.THEN) 3582 then = self._parse_conjunction() 3583 ifs.append(self.expression(exp.If, this=this, true=then)) 3584 3585 if self._match(TokenType.ELSE): 3586 default = self._parse_conjunction() 3587 3588 if not self._match(TokenType.END): 3589 self.raise_error("Expected END after CASE", self._prev) 3590 3591 return self._parse_window( 3592 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3593 ) 3594 3595 def _parse_if(self) -> t.Optional[exp.Expression]: 3596 if self._match(TokenType.L_PAREN): 3597 args = self._parse_csv(self._parse_conjunction) 3598 this = exp.If.from_arg_list(args) 3599 self.validate_expression(this, args) 3600 self._match_r_paren() 3601 else: 3602 index = self._index - 1 3603 condition = self._parse_conjunction() 3604 3605 if not condition: 3606 self._retreat(index) 3607 return None 3608 3609 self._match(TokenType.THEN) 3610 true = self._parse_conjunction() 3611 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3612 self._match(TokenType.END) 3613 this = self.expression(exp.If, this=condition, true=true, false=false) 3614 3615 return self._parse_window(this) 3616 3617 def _parse_extract(self) -> exp.Expression: 3618 this = self._parse_function() or self._parse_var() or self._parse_type() 3619 3620 if self._match(TokenType.FROM): 3621 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3622 3623 if not self._match(TokenType.COMMA): 3624 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3625 3626 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3627 3628 def _parse_cast(self, strict: bool) -> exp.Expression: 3629 this = self._parse_conjunction() 3630 3631 if not self._match(TokenType.ALIAS): 3632 if self._match(TokenType.COMMA): 3633 return self.expression( 3634 exp.CastToStrType, this=this, expression=self._parse_string() 3635 ) 3636 else: 3637 self.raise_error("Expected AS after CAST") 3638 3639 to = self._parse_types() 3640 3641 if not to: 3642 self.raise_error("Expected TYPE after CAST") 3643 elif to.this == exp.DataType.Type.CHAR: 3644 if self._match(TokenType.CHARACTER_SET): 3645 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3646 3647 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3648 3649 def _parse_string_agg(self) -> exp.Expression: 3650 expression: t.Optional[exp.Expression] 3651 3652 if self._match(TokenType.DISTINCT): 3653 args = self._parse_csv(self._parse_conjunction) 3654 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3655 else: 3656 args = self._parse_csv(self._parse_conjunction) 3657 expression = seq_get(args, 0) 3658 3659 index = self._index 3660 if not self._match(TokenType.R_PAREN): 3661 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3662 order = self._parse_order(this=expression) 3663 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3664 3665 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3666 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3667 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3668 if not self._match_text_seq("WITHIN", "GROUP"): 3669 self._retreat(index) 3670 this = exp.GroupConcat.from_arg_list(args) 3671 self.validate_expression(this, args) 3672 return this 3673 3674 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3675 order = self._parse_order(this=expression) 3676 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3677 3678 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3679 to: t.Optional[exp.Expression] 3680 this = self._parse_bitwise() 3681 3682 if self._match(TokenType.USING): 3683 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3684 elif self._match(TokenType.COMMA): 3685 to = self._parse_bitwise() 3686 else: 3687 to = None 3688 3689 # Swap the argument order if needed to produce the correct AST 3690 if self.CONVERT_TYPE_FIRST: 3691 this, to = to, this 3692 3693 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3694 3695 def _parse_decode(self) -> t.Optional[exp.Expression]: 3696 """ 3697 There are generally two variants of the DECODE function: 3698 3699 - DECODE(bin, charset) 3700 - DECODE(expression, search, result [, search, result] ... [, default]) 3701 3702 The second variant will always be parsed into a CASE expression. Note that NULL 3703 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3704 instead of relying on pattern matching. 3705 """ 3706 args = self._parse_csv(self._parse_conjunction) 3707 3708 if len(args) < 3: 3709 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3710 3711 expression, *expressions = args 3712 if not expression: 3713 return None 3714 3715 ifs = [] 3716 for search, result in zip(expressions[::2], expressions[1::2]): 3717 if not search or not result: 3718 return None 3719 3720 if isinstance(search, exp.Literal): 3721 ifs.append( 3722 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3723 ) 3724 elif isinstance(search, exp.Null): 3725 ifs.append( 3726 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3727 ) 3728 else: 3729 cond = exp.or_( 3730 exp.EQ(this=expression.copy(), expression=search), 3731 exp.and_( 3732 exp.Is(this=expression.copy(), expression=exp.Null()), 3733 exp.Is(this=search.copy(), expression=exp.Null()), 3734 copy=False, 3735 ), 3736 copy=False, 3737 ) 3738 ifs.append(exp.If(this=cond, true=result)) 3739 3740 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3741 3742 def _parse_json_key_value(self) -> t.Optional[exp.Expression]: 3743 self._match_text_seq("KEY") 3744 key = self._parse_field() 3745 self._match(TokenType.COLON) 3746 self._match_text_seq("VALUE") 3747 value = self._parse_field() 3748 if not key and not value: 3749 return None 3750 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3751 3752 def _parse_json_object(self) -> exp.Expression: 3753 expressions = self._parse_csv(self._parse_json_key_value) 3754 3755 null_handling = None 3756 if self._match_text_seq("NULL", "ON", "NULL"): 3757 null_handling = "NULL ON NULL" 3758 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3759 null_handling = "ABSENT ON NULL" 3760 3761 unique_keys = None 3762 if self._match_text_seq("WITH", "UNIQUE"): 3763 unique_keys = True 3764 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3765 unique_keys = False 3766 3767 self._match_text_seq("KEYS") 3768 3769 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3770 format_json = self._match_text_seq("FORMAT", "JSON") 3771 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3772 3773 return self.expression( 3774 exp.JSONObject, 3775 expressions=expressions, 3776 null_handling=null_handling, 3777 unique_keys=unique_keys, 3778 return_type=return_type, 3779 format_json=format_json, 3780 encoding=encoding, 3781 ) 3782 3783 def _parse_logarithm(self) -> exp.Expression: 3784 # Default argument order is base, expression 3785 args = self._parse_csv(self._parse_range) 3786 3787 if len(args) > 1: 3788 if not self.LOG_BASE_FIRST: 3789 args.reverse() 3790 return exp.Log.from_arg_list(args) 3791 3792 return self.expression( 3793 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3794 ) 3795 3796 def _parse_match_against(self) -> exp.Expression: 3797 expressions = self._parse_csv(self._parse_column) 3798 3799 self._match_text_seq(")", "AGAINST", "(") 3800 3801 this = self._parse_string() 3802 3803 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3804 modifier = "IN NATURAL LANGUAGE MODE" 3805 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3806 modifier = f"{modifier} WITH QUERY EXPANSION" 3807 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3808 modifier = "IN BOOLEAN MODE" 3809 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3810 modifier = "WITH QUERY EXPANSION" 3811 else: 3812 modifier = None 3813 3814 return self.expression( 3815 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3816 ) 3817 3818 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3819 def _parse_open_json(self) -> exp.Expression: 3820 this = self._parse_bitwise() 3821 path = self._match(TokenType.COMMA) and self._parse_string() 3822 3823 def _parse_open_json_column_def() -> exp.Expression: 3824 this = self._parse_field(any_token=True) 3825 kind = self._parse_types() 3826 path = self._parse_string() 3827 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3828 return self.expression( 3829 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3830 ) 3831 3832 expressions = None 3833 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3834 self._match_l_paren() 3835 expressions = self._parse_csv(_parse_open_json_column_def) 3836 3837 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3838 3839 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3840 args = self._parse_csv(self._parse_bitwise) 3841 3842 if self._match(TokenType.IN): 3843 return self.expression( 3844 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3845 ) 3846 3847 if haystack_first: 3848 haystack = seq_get(args, 0) 3849 needle = seq_get(args, 1) 3850 else: 3851 needle = seq_get(args, 0) 3852 haystack = seq_get(args, 1) 3853 3854 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3855 3856 self.validate_expression(this, args) 3857 3858 return this 3859 3860 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3861 args = self._parse_csv(self._parse_table) 3862 return exp.JoinHint(this=func_name.upper(), expressions=args) 3863 3864 def _parse_substring(self) -> exp.Expression: 3865 # Postgres supports the form: substring(string [from int] [for int]) 3866 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3867 3868 args = self._parse_csv(self._parse_bitwise) 3869 3870 if self._match(TokenType.FROM): 3871 args.append(self._parse_bitwise()) 3872 if self._match(TokenType.FOR): 3873 args.append(self._parse_bitwise()) 3874 3875 this = exp.Substring.from_arg_list(args) 3876 self.validate_expression(this, args) 3877 3878 return this 3879 3880 def _parse_trim(self) -> exp.Expression: 3881 # https://www.w3resource.com/sql/character-functions/trim.php 3882 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3883 3884 position = None 3885 collation = None 3886 3887 if self._match_texts(self.TRIM_TYPES): 3888 position = self._prev.text.upper() 3889 3890 expression = self._parse_bitwise() 3891 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3892 this = self._parse_bitwise() 3893 else: 3894 this = expression 3895 expression = None 3896 3897 if self._match(TokenType.COLLATE): 3898 collation = self._parse_bitwise() 3899 3900 return self.expression( 3901 exp.Trim, 3902 this=this, 3903 position=position, 3904 expression=expression, 3905 collation=collation, 3906 ) 3907 3908 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3909 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3910 3911 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3912 return self._parse_window(self._parse_id_var(), alias=True) 3913 3914 def _parse_respect_or_ignore_nulls( 3915 self, this: t.Optional[exp.Expression] 3916 ) -> t.Optional[exp.Expression]: 3917 if self._match_text_seq("IGNORE", "NULLS"): 3918 return self.expression(exp.IgnoreNulls, this=this) 3919 if self._match_text_seq("RESPECT", "NULLS"): 3920 return self.expression(exp.RespectNulls, this=this) 3921 return this 3922 3923 def _parse_window( 3924 self, this: t.Optional[exp.Expression], alias: bool = False 3925 ) -> t.Optional[exp.Expression]: 3926 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3927 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3928 self._match_r_paren() 3929 3930 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3931 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3932 if self._match_text_seq("WITHIN", "GROUP"): 3933 order = self._parse_wrapped(self._parse_order) 3934 this = self.expression(exp.WithinGroup, this=this, expression=order) 3935 3936 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3937 # Some dialects choose to implement and some do not. 3938 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3939 3940 # There is some code above in _parse_lambda that handles 3941 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3942 3943 # The below changes handle 3944 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3945 3946 # Oracle allows both formats 3947 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3948 # and Snowflake chose to do the same for familiarity 3949 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3950 this = self._parse_respect_or_ignore_nulls(this) 3951 3952 # bigquery select from window x AS (partition by ...) 3953 if alias: 3954 over = None 3955 self._match(TokenType.ALIAS) 3956 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 3957 return this 3958 else: 3959 over = self._prev.text.upper() 3960 3961 if not self._match(TokenType.L_PAREN): 3962 return self.expression( 3963 exp.Window, this=this, alias=self._parse_id_var(False), over=over 3964 ) 3965 3966 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3967 3968 first = self._match(TokenType.FIRST) 3969 if self._match_text_seq("LAST"): 3970 first = False 3971 3972 partition = self._parse_partition_by() 3973 order = self._parse_order() 3974 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3975 3976 if kind: 3977 self._match(TokenType.BETWEEN) 3978 start = self._parse_window_spec() 3979 self._match(TokenType.AND) 3980 end = self._parse_window_spec() 3981 3982 spec = self.expression( 3983 exp.WindowSpec, 3984 kind=kind, 3985 start=start["value"], 3986 start_side=start["side"], 3987 end=end["value"], 3988 end_side=end["side"], 3989 ) 3990 else: 3991 spec = None 3992 3993 self._match_r_paren() 3994 3995 return self.expression( 3996 exp.Window, 3997 this=this, 3998 partition_by=partition, 3999 order=order, 4000 spec=spec, 4001 alias=window_alias, 4002 over=over, 4003 first=first, 4004 ) 4005 4006 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4007 self._match(TokenType.BETWEEN) 4008 4009 return { 4010 "value": ( 4011 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4012 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4013 or self._parse_bitwise() 4014 ), 4015 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4016 } 4017 4018 def _parse_alias( 4019 self, this: t.Optional[exp.Expression], explicit: bool = False 4020 ) -> t.Optional[exp.Expression]: 4021 any_token = self._match(TokenType.ALIAS) 4022 4023 if explicit and not any_token: 4024 return this 4025 4026 if self._match(TokenType.L_PAREN): 4027 aliases = self.expression( 4028 exp.Aliases, 4029 this=this, 4030 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4031 ) 4032 self._match_r_paren(aliases) 4033 return aliases 4034 4035 alias = self._parse_id_var(any_token) 4036 4037 if alias: 4038 return self.expression(exp.Alias, this=this, alias=alias) 4039 4040 return this 4041 4042 def _parse_id_var( 4043 self, 4044 any_token: bool = True, 4045 tokens: t.Optional[t.Collection[TokenType]] = None, 4046 ) -> t.Optional[exp.Expression]: 4047 identifier = self._parse_identifier() 4048 4049 if identifier: 4050 return identifier 4051 4052 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4053 quoted = self._prev.token_type == TokenType.STRING 4054 return exp.Identifier(this=self._prev.text, quoted=quoted) 4055 4056 return None 4057 4058 def _parse_string(self) -> t.Optional[exp.Expression]: 4059 if self._match(TokenType.STRING): 4060 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4061 return self._parse_placeholder() 4062 4063 def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]: 4064 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4065 4066 def _parse_number(self) -> t.Optional[exp.Expression]: 4067 if self._match(TokenType.NUMBER): 4068 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4069 return self._parse_placeholder() 4070 4071 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4072 if self._match(TokenType.IDENTIFIER): 4073 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4074 return self._parse_placeholder() 4075 4076 def _parse_var( 4077 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4078 ) -> t.Optional[exp.Expression]: 4079 if ( 4080 (any_token and self._advance_any()) 4081 or self._match(TokenType.VAR) 4082 or (self._match_set(tokens) if tokens else False) 4083 ): 4084 return self.expression(exp.Var, this=self._prev.text) 4085 return self._parse_placeholder() 4086 4087 def _advance_any(self) -> t.Optional[Token]: 4088 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4089 self._advance() 4090 return self._prev 4091 return None 4092 4093 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4094 return self._parse_var() or self._parse_string() 4095 4096 def _parse_null(self) -> t.Optional[exp.Expression]: 4097 if self._match(TokenType.NULL): 4098 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4099 return None 4100 4101 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4102 if self._match(TokenType.TRUE): 4103 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4104 if self._match(TokenType.FALSE): 4105 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4106 return None 4107 4108 def _parse_star(self) -> t.Optional[exp.Expression]: 4109 if self._match(TokenType.STAR): 4110 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4111 return None 4112 4113 def _parse_parameter(self) -> exp.Expression: 4114 wrapped = self._match(TokenType.L_BRACE) 4115 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4116 self._match(TokenType.R_BRACE) 4117 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4118 4119 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4120 if self._match_set(self.PLACEHOLDER_PARSERS): 4121 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4122 if placeholder: 4123 return placeholder 4124 self._advance(-1) 4125 return None 4126 4127 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4128 if not self._match(TokenType.EXCEPT): 4129 return None 4130 if self._match(TokenType.L_PAREN, advance=False): 4131 return self._parse_wrapped_csv(self._parse_column) 4132 return self._parse_csv(self._parse_column) 4133 4134 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4135 if not self._match(TokenType.REPLACE): 4136 return None 4137 if self._match(TokenType.L_PAREN, advance=False): 4138 return self._parse_wrapped_csv(self._parse_expression) 4139 return self._parse_csv(self._parse_expression) 4140 4141 def _parse_csv( 4142 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4143 ) -> t.List[t.Optional[exp.Expression]]: 4144 parse_result = parse_method() 4145 items = [parse_result] if parse_result is not None else [] 4146 4147 while self._match(sep): 4148 self._add_comments(parse_result) 4149 parse_result = parse_method() 4150 if parse_result is not None: 4151 items.append(parse_result) 4152 4153 return items 4154 4155 def _parse_tokens( 4156 self, parse_method: t.Callable, expressions: t.Dict 4157 ) -> t.Optional[exp.Expression]: 4158 this = parse_method() 4159 4160 while self._match_set(expressions): 4161 this = self.expression( 4162 expressions[self._prev.token_type], 4163 this=this, 4164 comments=self._prev_comments, 4165 expression=parse_method(), 4166 ) 4167 4168 return this 4169 4170 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4171 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4172 4173 def _parse_wrapped_csv( 4174 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4175 ) -> t.List[t.Optional[exp.Expression]]: 4176 return self._parse_wrapped( 4177 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4178 ) 4179 4180 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4181 wrapped = self._match(TokenType.L_PAREN) 4182 if not wrapped and not optional: 4183 self.raise_error("Expecting (") 4184 parse_result = parse_method() 4185 if wrapped: 4186 self._match_r_paren() 4187 return parse_result 4188 4189 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4190 return self._parse_select() or self._parse_set_operations( 4191 self._parse_expression() if alias else self._parse_conjunction() 4192 ) 4193 4194 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4195 return self._parse_query_modifiers( 4196 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4197 ) 4198 4199 def _parse_transaction(self) -> exp.Expression: 4200 this = None 4201 if self._match_texts(self.TRANSACTION_KIND): 4202 this = self._prev.text 4203 4204 self._match_texts({"TRANSACTION", "WORK"}) 4205 4206 modes = [] 4207 while True: 4208 mode = [] 4209 while self._match(TokenType.VAR): 4210 mode.append(self._prev.text) 4211 4212 if mode: 4213 modes.append(" ".join(mode)) 4214 if not self._match(TokenType.COMMA): 4215 break 4216 4217 return self.expression(exp.Transaction, this=this, modes=modes) 4218 4219 def _parse_commit_or_rollback(self) -> exp.Expression: 4220 chain = None 4221 savepoint = None 4222 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4223 4224 self._match_texts({"TRANSACTION", "WORK"}) 4225 4226 if self._match_text_seq("TO"): 4227 self._match_text_seq("SAVEPOINT") 4228 savepoint = self._parse_id_var() 4229 4230 if self._match(TokenType.AND): 4231 chain = not self._match_text_seq("NO") 4232 self._match_text_seq("CHAIN") 4233 4234 if is_rollback: 4235 return self.expression(exp.Rollback, savepoint=savepoint) 4236 return self.expression(exp.Commit, chain=chain) 4237 4238 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4239 if not self._match_text_seq("ADD"): 4240 return None 4241 4242 self._match(TokenType.COLUMN) 4243 exists_column = self._parse_exists(not_=True) 4244 expression = self._parse_column_def(self._parse_field(any_token=True)) 4245 4246 if expression: 4247 expression.set("exists", exists_column) 4248 4249 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4250 if self._match_texts(("FIRST", "AFTER")): 4251 position = self._prev.text 4252 column_position = self.expression( 4253 exp.ColumnPosition, this=self._parse_column(), position=position 4254 ) 4255 expression.set("position", column_position) 4256 4257 return expression 4258 4259 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 4260 drop = self._match(TokenType.DROP) and self._parse_drop() 4261 if drop and not isinstance(drop, exp.Command): 4262 drop.set("kind", drop.args.get("kind", "COLUMN")) 4263 return drop 4264 4265 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4266 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 4267 return self.expression( 4268 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4269 ) 4270 4271 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 4272 this = None 4273 kind = self._prev.token_type 4274 4275 if kind == TokenType.CONSTRAINT: 4276 this = self._parse_id_var() 4277 4278 if self._match_text_seq("CHECK"): 4279 expression = self._parse_wrapped(self._parse_conjunction) 4280 enforced = self._match_text_seq("ENFORCED") 4281 4282 return self.expression( 4283 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4284 ) 4285 4286 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4287 expression = self._parse_foreign_key() 4288 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4289 expression = self._parse_primary_key() 4290 else: 4291 expression = None 4292 4293 return self.expression(exp.AddConstraint, this=this, expression=expression) 4294 4295 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4296 index = self._index - 1 4297 4298 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4299 return self._parse_csv(self._parse_add_constraint) 4300 4301 self._retreat(index) 4302 return self._parse_csv(self._parse_add_column) 4303 4304 def _parse_alter_table_alter(self) -> exp.Expression: 4305 self._match(TokenType.COLUMN) 4306 column = self._parse_field(any_token=True) 4307 4308 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4309 return self.expression(exp.AlterColumn, this=column, drop=True) 4310 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4311 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4312 4313 self._match_text_seq("SET", "DATA") 4314 return self.expression( 4315 exp.AlterColumn, 4316 this=column, 4317 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4318 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4319 using=self._match(TokenType.USING) and self._parse_conjunction(), 4320 ) 4321 4322 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4323 index = self._index - 1 4324 4325 partition_exists = self._parse_exists() 4326 if self._match(TokenType.PARTITION, advance=False): 4327 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4328 4329 self._retreat(index) 4330 return self._parse_csv(self._parse_drop_column) 4331 4332 def _parse_alter_table_rename(self) -> exp.Expression: 4333 self._match_text_seq("TO") 4334 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4335 4336 def _parse_alter(self) -> t.Optional[exp.Expression]: 4337 start = self._prev 4338 4339 if not self._match(TokenType.TABLE): 4340 return self._parse_as_command(start) 4341 4342 exists = self._parse_exists() 4343 this = self._parse_table(schema=True) 4344 4345 if self._next: 4346 self._advance() 4347 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4348 4349 if parser: 4350 actions = ensure_list(parser(self)) 4351 4352 if not self._curr: 4353 return self.expression( 4354 exp.AlterTable, 4355 this=this, 4356 exists=exists, 4357 actions=actions, 4358 ) 4359 return self._parse_as_command(start) 4360 4361 def _parse_merge(self) -> exp.Expression: 4362 self._match(TokenType.INTO) 4363 target = self._parse_table() 4364 4365 self._match(TokenType.USING) 4366 using = self._parse_table() 4367 4368 self._match(TokenType.ON) 4369 on = self._parse_conjunction() 4370 4371 whens = [] 4372 while self._match(TokenType.WHEN): 4373 matched = not self._match(TokenType.NOT) 4374 self._match_text_seq("MATCHED") 4375 source = ( 4376 False 4377 if self._match_text_seq("BY", "TARGET") 4378 else self._match_text_seq("BY", "SOURCE") 4379 ) 4380 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4381 4382 self._match(TokenType.THEN) 4383 4384 if self._match(TokenType.INSERT): 4385 _this = self._parse_star() 4386 if _this: 4387 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4388 else: 4389 then = self.expression( 4390 exp.Insert, 4391 this=self._parse_value(), 4392 expression=self._match(TokenType.VALUES) and self._parse_value(), 4393 ) 4394 elif self._match(TokenType.UPDATE): 4395 expressions = self._parse_star() 4396 if expressions: 4397 then = self.expression(exp.Update, expressions=expressions) 4398 else: 4399 then = self.expression( 4400 exp.Update, 4401 expressions=self._match(TokenType.SET) 4402 and self._parse_csv(self._parse_equality), 4403 ) 4404 elif self._match(TokenType.DELETE): 4405 then = self.expression(exp.Var, this=self._prev.text) 4406 else: 4407 then = None 4408 4409 whens.append( 4410 self.expression( 4411 exp.When, 4412 matched=matched, 4413 source=source, 4414 condition=condition, 4415 then=then, 4416 ) 4417 ) 4418 4419 return self.expression( 4420 exp.Merge, 4421 this=target, 4422 using=using, 4423 on=on, 4424 expressions=whens, 4425 ) 4426 4427 def _parse_show(self) -> t.Optional[exp.Expression]: 4428 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 4429 if parser: 4430 return parser(self) 4431 self._advance() 4432 return self.expression(exp.Show, this=self._prev.text.upper()) 4433 4434 def _parse_set_item_assignment( 4435 self, kind: t.Optional[str] = None 4436 ) -> t.Optional[exp.Expression]: 4437 index = self._index 4438 4439 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4440 return self._parse_set_transaction(global_=kind == "GLOBAL") 4441 4442 left = self._parse_primary() or self._parse_id_var() 4443 4444 if not self._match_texts(("=", "TO")): 4445 self._retreat(index) 4446 return None 4447 4448 right = self._parse_statement() or self._parse_id_var() 4449 this = self.expression( 4450 exp.EQ, 4451 this=left, 4452 expression=right, 4453 ) 4454 4455 return self.expression( 4456 exp.SetItem, 4457 this=this, 4458 kind=kind, 4459 ) 4460 4461 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4462 self._match_text_seq("TRANSACTION") 4463 characteristics = self._parse_csv( 4464 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4465 ) 4466 return self.expression( 4467 exp.SetItem, 4468 expressions=characteristics, 4469 kind="TRANSACTION", 4470 **{"global": global_}, # type: ignore 4471 ) 4472 4473 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4474 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 4475 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4476 4477 def _parse_set(self) -> exp.Expression: 4478 index = self._index 4479 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4480 4481 if self._curr: 4482 self._retreat(index) 4483 return self._parse_as_command(self._prev) 4484 4485 return set_ 4486 4487 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]: 4488 for option in options: 4489 if self._match_text_seq(*option.split(" ")): 4490 return exp.Var(this=option) 4491 return None 4492 4493 def _parse_as_command(self, start: Token) -> exp.Command: 4494 while self._curr: 4495 self._advance() 4496 text = self._find_sql(start, self._prev) 4497 size = len(start.text) 4498 return exp.Command(this=text[:size], expression=text[size:]) 4499 4500 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4501 settings = [] 4502 4503 self._match_l_paren() 4504 kind = self._parse_id_var() 4505 4506 if self._match(TokenType.L_PAREN): 4507 while True: 4508 key = self._parse_id_var() 4509 value = self._parse_primary() 4510 4511 if not key and value is None: 4512 break 4513 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4514 self._match(TokenType.R_PAREN) 4515 4516 self._match_r_paren() 4517 4518 return self.expression( 4519 exp.DictProperty, 4520 this=this, 4521 kind=kind.this if kind else None, 4522 settings=settings, 4523 ) 4524 4525 def _parse_dict_range(self, this: str) -> exp.DictRange: 4526 self._match_l_paren() 4527 has_min = self._match_text_seq("MIN") 4528 if has_min: 4529 min = self._parse_var() or self._parse_primary() 4530 self._match_text_seq("MAX") 4531 max = self._parse_var() or self._parse_primary() 4532 else: 4533 max = self._parse_var() or self._parse_primary() 4534 min = exp.Literal.number(0) 4535 self._match_r_paren() 4536 return self.expression(exp.DictRange, this=this, min=min, max=max) 4537 4538 def _find_parser( 4539 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4540 ) -> t.Optional[t.Callable]: 4541 if not self._curr: 4542 return None 4543 4544 index = self._index 4545 this = [] 4546 while True: 4547 # The current token might be multiple words 4548 curr = self._curr.text.upper() 4549 key = curr.split(" ") 4550 this.append(curr) 4551 self._advance() 4552 result, trie = in_trie(trie, key) 4553 if result == 0: 4554 break 4555 if result == 2: 4556 subparser = parsers[" ".join(this)] 4557 return subparser 4558 self._retreat(index) 4559 return None 4560 4561 def _match(self, token_type, advance=True, expression=None): 4562 if not self._curr: 4563 return None 4564 4565 if self._curr.token_type == token_type: 4566 if advance: 4567 self._advance() 4568 self._add_comments(expression) 4569 return True 4570 4571 return None 4572 4573 def _match_set(self, types, advance=True): 4574 if not self._curr: 4575 return None 4576 4577 if self._curr.token_type in types: 4578 if advance: 4579 self._advance() 4580 return True 4581 4582 return None 4583 4584 def _match_pair(self, token_type_a, token_type_b, advance=True): 4585 if not self._curr or not self._next: 4586 return None 4587 4588 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4589 if advance: 4590 self._advance(2) 4591 return True 4592 4593 return None 4594 4595 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4596 if not self._match(TokenType.L_PAREN, expression=expression): 4597 self.raise_error("Expecting (") 4598 4599 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4600 if not self._match(TokenType.R_PAREN, expression=expression): 4601 self.raise_error("Expecting )") 4602 4603 def _match_texts(self, texts, advance=True): 4604 if self._curr and self._curr.text.upper() in texts: 4605 if advance: 4606 self._advance() 4607 return True 4608 return False 4609 4610 def _match_text_seq(self, *texts, advance=True): 4611 index = self._index 4612 for text in texts: 4613 if self._curr and self._curr.text.upper() == text: 4614 self._advance() 4615 else: 4616 self._retreat(index) 4617 return False 4618 4619 if not advance: 4620 self._retreat(index) 4621 4622 return True 4623 4624 @t.overload 4625 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4626 ... 4627 4628 @t.overload 4629 def _replace_columns_with_dots( 4630 self, this: t.Optional[exp.Expression] 4631 ) -> t.Optional[exp.Expression]: 4632 ... 4633 4634 def _replace_columns_with_dots(self, this): 4635 if isinstance(this, exp.Dot): 4636 exp.replace_children(this, self._replace_columns_with_dots) 4637 elif isinstance(this, exp.Column): 4638 exp.replace_children(this, self._replace_columns_with_dots) 4639 table = this.args.get("table") 4640 this = ( 4641 self.expression(exp.Dot, this=table, expression=this.this) 4642 if table 4643 else self.expression(exp.Var, this=this.name) 4644 ) 4645 elif isinstance(this, exp.Identifier): 4646 this = self.expression(exp.Var, this=this.name) 4647 4648 return this 4649 4650 def _replace_lambda( 4651 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4652 ) -> t.Optional[exp.Expression]: 4653 if not node: 4654 return node 4655 4656 for column in node.find_all(exp.Column): 4657 if column.parts[0].name in lambda_variables: 4658 dot_or_id = column.to_dot() if column.table else column.this 4659 parent = column.parent 4660 4661 while isinstance(parent, exp.Dot): 4662 if not isinstance(parent.parent, exp.Dot): 4663 parent.replace(dot_or_id) 4664 break 4665 parent = parent.parent 4666 else: 4667 if column is node: 4668 node = dot_or_id 4669 else: 4670 column.replace(dot_or_id) 4671 return node
Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer
and produces
a parsed syntax tree.
Arguments:
- error_level: the desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
- index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
- alias_post_tablesample: If the table alias comes after tablesample. Default: False
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
810 def __init__( 811 self, 812 error_level: t.Optional[ErrorLevel] = None, 813 error_message_context: int = 100, 814 index_offset: int = 0, 815 unnest_column_only: bool = False, 816 alias_post_tablesample: bool = False, 817 max_errors: int = 3, 818 null_ordering: t.Optional[str] = None, 819 ): 820 self.error_level = error_level or ErrorLevel.IMMEDIATE 821 self.error_message_context = error_message_context 822 self.index_offset = index_offset 823 self.unnest_column_only = unnest_column_only 824 self.alias_post_tablesample = alias_post_tablesample 825 self.max_errors = max_errors 826 self.null_ordering = null_ordering 827 self.reset()
839 def parse( 840 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 841 ) -> t.List[t.Optional[exp.Expression]]: 842 """ 843 Parses a list of tokens and returns a list of syntax trees, one tree 844 per parsed SQL statement. 845 846 Args: 847 raw_tokens: the list of tokens. 848 sql: the original SQL string, used to produce helpful debug messages. 849 850 Returns: 851 The list of syntax trees. 852 """ 853 return self._parse( 854 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 855 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The list of syntax trees.
857 def parse_into( 858 self, 859 expression_types: exp.IntoType, 860 raw_tokens: t.List[Token], 861 sql: t.Optional[str] = None, 862 ) -> t.List[t.Optional[exp.Expression]]: 863 """ 864 Parses a list of tokens into a given Expression type. If a collection of Expression 865 types is given instead, this method will try to parse the token list into each one 866 of them, stopping at the first for which the parsing succeeds. 867 868 Args: 869 expression_types: the expression type(s) to try and parse the token list into. 870 raw_tokens: the list of tokens. 871 sql: the original SQL string, used to produce helpful debug messages. 872 873 Returns: 874 The target Expression. 875 """ 876 errors = [] 877 for expression_type in ensure_collection(expression_types): 878 parser = self.EXPRESSION_PARSERS.get(expression_type) 879 if not parser: 880 raise TypeError(f"No parser registered for {expression_type}") 881 try: 882 return self._parse(parser, raw_tokens, sql) 883 except ParseError as e: 884 e.errors[0]["into_expression"] = expression_type 885 errors.append(e) 886 raise ParseError( 887 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 888 errors=merge_errors(errors), 889 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: the expression type(s) to try and parse the token list into.
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
925 def check_errors(self) -> None: 926 """ 927 Logs or raises any found errors, depending on the chosen error level setting. 928 """ 929 if self.error_level == ErrorLevel.WARN: 930 for error in self.errors: 931 logger.error(str(error)) 932 elif self.error_level == ErrorLevel.RAISE and self.errors: 933 raise ParseError( 934 concat_messages(self.errors, self.max_errors), 935 errors=merge_errors(self.errors), 936 )
Logs or raises any found errors, depending on the chosen error level setting.
938 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 939 """ 940 Appends an error in the list of recorded errors or raises it, depending on the chosen 941 error level setting. 942 """ 943 token = token or self._curr or self._prev or Token.string("") 944 start = token.start 945 end = token.end + 1 946 start_context = self.sql[max(start - self.error_message_context, 0) : start] 947 highlight = self.sql[start:end] 948 end_context = self.sql[end : end + self.error_message_context] 949 950 error = ParseError.new( 951 f"{message}. Line {token.line}, Col: {token.col}.\n" 952 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 953 description=message, 954 line=token.line, 955 col=token.col, 956 start_context=start_context, 957 highlight=highlight, 958 end_context=end_context, 959 ) 960 961 if self.error_level == ErrorLevel.IMMEDIATE: 962 raise error 963 964 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
966 def expression( 967 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 968 ) -> E: 969 """ 970 Creates a new, validated Expression. 971 972 Args: 973 exp_class: the expression class to instantiate. 974 comments: an optional list of comments to attach to the expression. 975 kwargs: the arguments to set for the expression along with their respective values. 976 977 Returns: 978 The target expression. 979 """ 980 instance = exp_class(**kwargs) 981 instance.add_comments(comments) if comments else self._add_comments(instance) 982 self.validate_expression(instance) 983 return instance
Creates a new, validated Expression.
Arguments:
- exp_class: the expression class to instantiate.
- comments: an optional list of comments to attach to the expression.
- kwargs: the arguments to set for the expression along with their respective values.
Returns:
The target expression.
990 def validate_expression( 991 self, expression: exp.Expression, args: t.Optional[t.List] = None 992 ) -> None: 993 """ 994 Validates an already instantiated expression, making sure that all its mandatory arguments 995 are set. 996 997 Args: 998 expression: the expression to validate. 999 args: an optional list of items that was used to instantiate the expression, if it's a Func. 1000 """ 1001 if self.error_level == ErrorLevel.IGNORE: 1002 return 1003 1004 for error_message in expression.error_messages(args): 1005 self.raise_error(error_message)
Validates an already instantiated expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: the expression to validate.
- args: an optional list of items that was used to instantiate the expression, if it's a Func.