sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import ( 10 apply_index_offset, 11 count_params, 12 ensure_collection, 13 ensure_list, 14 seq_get, 15) 16from sqlglot.tokens import Token, Tokenizer, TokenType 17from sqlglot.trie import in_trie, new_trie 18 19logger = logging.getLogger("sqlglot") 20 21 22def parse_var_map(args): 23 keys = [] 24 values = [] 25 for i in range(0, len(args), 2): 26 keys.append(args[i]) 27 values.append(args[i + 1]) 28 return exp.VarMap( 29 keys=exp.Array(expressions=keys), 30 values=exp.Array(expressions=values), 31 ) 32 33 34class _Parser(type): 35 def __new__(cls, clsname, bases, attrs): 36 klass = super().__new__(cls, clsname, bases, attrs) 37 klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 38 klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS) 39 return klass 40 41 42class Parser(metaclass=_Parser): 43 """ 44 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 45 a parsed syntax tree. 46 47 Args: 48 error_level: the desired error level. 49 Default: ErrorLevel.RAISE 50 error_message_context: determines the amount of context to capture from a 51 query string when displaying the error message (in number of characters). 52 Default: 50. 53 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 54 Default: 0 55 alias_post_tablesample: If the table alias comes after tablesample. 56 Default: False 57 max_errors: Maximum number of error messages to include in a raised ParseError. 58 This is only relevant if error_level is ErrorLevel.RAISE. 59 Default: 3 60 null_ordering: Indicates the default null ordering method to use if not explicitly set. 61 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 62 Default: "nulls_are_small" 63 """ 64 65 FUNCTIONS: t.Dict[str, t.Callable] = { 66 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 67 "DATE_TO_DATE_STR": lambda args: exp.Cast( 68 this=seq_get(args, 0), 69 to=exp.DataType(this=exp.DataType.Type.TEXT), 70 ), 71 "TIME_TO_TIME_STR": lambda args: exp.Cast( 72 this=seq_get(args, 0), 73 to=exp.DataType(this=exp.DataType.Type.TEXT), 74 ), 75 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 76 this=exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 start=exp.Literal.number(1), 81 length=exp.Literal.number(10), 82 ), 83 "VAR_MAP": parse_var_map, 84 "IFNULL": exp.Coalesce.from_arg_list, 85 } 86 87 NO_PAREN_FUNCTIONS = { 88 TokenType.CURRENT_DATE: exp.CurrentDate, 89 TokenType.CURRENT_DATETIME: exp.CurrentDate, 90 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 91 } 92 93 NESTED_TYPE_TOKENS = { 94 TokenType.ARRAY, 95 TokenType.MAP, 96 TokenType.STRUCT, 97 TokenType.NULLABLE, 98 } 99 100 TYPE_TOKENS = { 101 TokenType.BOOLEAN, 102 TokenType.TINYINT, 103 TokenType.SMALLINT, 104 TokenType.INT, 105 TokenType.BIGINT, 106 TokenType.FLOAT, 107 TokenType.DOUBLE, 108 TokenType.CHAR, 109 TokenType.NCHAR, 110 TokenType.VARCHAR, 111 TokenType.NVARCHAR, 112 TokenType.TEXT, 113 TokenType.MEDIUMTEXT, 114 TokenType.LONGTEXT, 115 TokenType.MEDIUMBLOB, 116 TokenType.LONGBLOB, 117 TokenType.BINARY, 118 TokenType.VARBINARY, 119 TokenType.JSON, 120 TokenType.JSONB, 121 TokenType.INTERVAL, 122 TokenType.TIME, 123 TokenType.TIMESTAMP, 124 TokenType.TIMESTAMPTZ, 125 TokenType.TIMESTAMPLTZ, 126 TokenType.DATETIME, 127 TokenType.DATE, 128 TokenType.DECIMAL, 129 TokenType.UUID, 130 TokenType.GEOGRAPHY, 131 TokenType.GEOMETRY, 132 TokenType.HLLSKETCH, 133 TokenType.HSTORE, 134 TokenType.PSEUDO_TYPE, 135 TokenType.SUPER, 136 TokenType.SERIAL, 137 TokenType.SMALLSERIAL, 138 TokenType.BIGSERIAL, 139 TokenType.XML, 140 TokenType.UNIQUEIDENTIFIER, 141 TokenType.MONEY, 142 TokenType.SMALLMONEY, 143 TokenType.ROWVERSION, 144 TokenType.IMAGE, 145 TokenType.VARIANT, 146 TokenType.OBJECT, 147 TokenType.INET, 148 *NESTED_TYPE_TOKENS, 149 } 150 151 SUBQUERY_PREDICATES = { 152 TokenType.ANY: exp.Any, 153 TokenType.ALL: exp.All, 154 TokenType.EXISTS: exp.Exists, 155 TokenType.SOME: exp.Any, 156 } 157 158 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 159 160 ID_VAR_TOKENS = { 161 TokenType.VAR, 162 TokenType.ANTI, 163 TokenType.APPLY, 164 TokenType.AUTO_INCREMENT, 165 TokenType.BEGIN, 166 TokenType.BOTH, 167 TokenType.BUCKET, 168 TokenType.CACHE, 169 TokenType.CASCADE, 170 TokenType.COLLATE, 171 TokenType.COLUMN, 172 TokenType.COMMAND, 173 TokenType.COMMIT, 174 TokenType.COMPOUND, 175 TokenType.CONSTRAINT, 176 TokenType.CURRENT_TIME, 177 TokenType.DEFAULT, 178 TokenType.DELETE, 179 TokenType.DESCRIBE, 180 TokenType.DIV, 181 TokenType.END, 182 TokenType.EXECUTE, 183 TokenType.ESCAPE, 184 TokenType.FALSE, 185 TokenType.FIRST, 186 TokenType.FILTER, 187 TokenType.FOLLOWING, 188 TokenType.FORMAT, 189 TokenType.FUNCTION, 190 TokenType.IF, 191 TokenType.INDEX, 192 TokenType.ISNULL, 193 TokenType.INTERVAL, 194 TokenType.LAZY, 195 TokenType.LEADING, 196 TokenType.LEFT, 197 TokenType.LOCAL, 198 TokenType.MATERIALIZED, 199 TokenType.MERGE, 200 TokenType.NATURAL, 201 TokenType.NEXT, 202 TokenType.OFFSET, 203 TokenType.ONLY, 204 TokenType.OPTIONS, 205 TokenType.ORDINALITY, 206 TokenType.PERCENT, 207 TokenType.PIVOT, 208 TokenType.PRECEDING, 209 TokenType.RANGE, 210 TokenType.REFERENCES, 211 TokenType.RIGHT, 212 TokenType.ROW, 213 TokenType.ROWS, 214 TokenType.SCHEMA, 215 TokenType.SEED, 216 TokenType.SEMI, 217 TokenType.SET, 218 TokenType.SHOW, 219 TokenType.SORTKEY, 220 TokenType.TABLE, 221 TokenType.TEMPORARY, 222 TokenType.TOP, 223 TokenType.TRAILING, 224 TokenType.TRUE, 225 TokenType.UNBOUNDED, 226 TokenType.UNIQUE, 227 TokenType.UNLOGGED, 228 TokenType.UNPIVOT, 229 TokenType.PROCEDURE, 230 TokenType.VIEW, 231 TokenType.VOLATILE, 232 TokenType.WINDOW, 233 *SUBQUERY_PREDICATES, 234 *TYPE_TOKENS, 235 *NO_PAREN_FUNCTIONS, 236 } 237 238 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 239 TokenType.APPLY, 240 TokenType.LEFT, 241 TokenType.NATURAL, 242 TokenType.OFFSET, 243 TokenType.RIGHT, 244 TokenType.WINDOW, 245 } 246 247 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 248 249 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} 250 251 FUNC_TOKENS = { 252 TokenType.COMMAND, 253 TokenType.CURRENT_DATE, 254 TokenType.CURRENT_DATETIME, 255 TokenType.CURRENT_TIMESTAMP, 256 TokenType.CURRENT_TIME, 257 TokenType.FILTER, 258 TokenType.FIRST, 259 TokenType.FORMAT, 260 TokenType.IDENTIFIER, 261 TokenType.INDEX, 262 TokenType.ISNULL, 263 TokenType.ILIKE, 264 TokenType.LIKE, 265 TokenType.MERGE, 266 TokenType.OFFSET, 267 TokenType.PRIMARY_KEY, 268 TokenType.REPLACE, 269 TokenType.ROW, 270 TokenType.UNNEST, 271 TokenType.VAR, 272 TokenType.LEFT, 273 TokenType.RIGHT, 274 TokenType.DATE, 275 TokenType.DATETIME, 276 TokenType.TABLE, 277 TokenType.TIMESTAMP, 278 TokenType.TIMESTAMPTZ, 279 TokenType.WINDOW, 280 *TYPE_TOKENS, 281 *SUBQUERY_PREDICATES, 282 } 283 284 CONJUNCTION = { 285 TokenType.AND: exp.And, 286 TokenType.OR: exp.Or, 287 } 288 289 EQUALITY = { 290 TokenType.EQ: exp.EQ, 291 TokenType.NEQ: exp.NEQ, 292 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 293 } 294 295 COMPARISON = { 296 TokenType.GT: exp.GT, 297 TokenType.GTE: exp.GTE, 298 TokenType.LT: exp.LT, 299 TokenType.LTE: exp.LTE, 300 } 301 302 BITWISE = { 303 TokenType.AMP: exp.BitwiseAnd, 304 TokenType.CARET: exp.BitwiseXor, 305 TokenType.PIPE: exp.BitwiseOr, 306 TokenType.DPIPE: exp.DPipe, 307 } 308 309 TERM = { 310 TokenType.DASH: exp.Sub, 311 TokenType.PLUS: exp.Add, 312 TokenType.MOD: exp.Mod, 313 TokenType.COLLATE: exp.Collate, 314 } 315 316 FACTOR = { 317 TokenType.DIV: exp.IntDiv, 318 TokenType.LR_ARROW: exp.Distance, 319 TokenType.SLASH: exp.Div, 320 TokenType.STAR: exp.Mul, 321 } 322 323 TIMESTAMPS = { 324 TokenType.TIME, 325 TokenType.TIMESTAMP, 326 TokenType.TIMESTAMPTZ, 327 TokenType.TIMESTAMPLTZ, 328 } 329 330 SET_OPERATIONS = { 331 TokenType.UNION, 332 TokenType.INTERSECT, 333 TokenType.EXCEPT, 334 } 335 336 JOIN_SIDES = { 337 TokenType.LEFT, 338 TokenType.RIGHT, 339 TokenType.FULL, 340 } 341 342 JOIN_KINDS = { 343 TokenType.INNER, 344 TokenType.OUTER, 345 TokenType.CROSS, 346 TokenType.SEMI, 347 TokenType.ANTI, 348 } 349 350 LAMBDAS = { 351 TokenType.ARROW: lambda self, expressions: self.expression( 352 exp.Lambda, 353 this=self._parse_conjunction().transform( 354 self._replace_lambda, {node.name for node in expressions} 355 ), 356 expressions=expressions, 357 ), 358 TokenType.FARROW: lambda self, expressions: self.expression( 359 exp.Kwarg, 360 this=exp.Var(this=expressions[0].name), 361 expression=self._parse_conjunction(), 362 ), 363 } 364 365 COLUMN_OPERATORS = { 366 TokenType.DOT: None, 367 TokenType.DCOLON: lambda self, this, to: self.expression( 368 exp.Cast, 369 this=this, 370 to=to, 371 ), 372 TokenType.ARROW: lambda self, this, path: self.expression( 373 exp.JSONExtract, 374 this=this, 375 expression=path, 376 ), 377 TokenType.DARROW: lambda self, this, path: self.expression( 378 exp.JSONExtractScalar, 379 this=this, 380 expression=path, 381 ), 382 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 383 exp.JSONBExtract, 384 this=this, 385 expression=path, 386 ), 387 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 388 exp.JSONBExtractScalar, 389 this=this, 390 expression=path, 391 ), 392 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 393 exp.JSONBContains, 394 this=this, 395 expression=key, 396 ), 397 } 398 399 EXPRESSION_PARSERS = { 400 exp.Column: lambda self: self._parse_column(), 401 exp.DataType: lambda self: self._parse_types(), 402 exp.From: lambda self: self._parse_from(), 403 exp.Group: lambda self: self._parse_group(), 404 exp.Identifier: lambda self: self._parse_id_var(), 405 exp.Lateral: lambda self: self._parse_lateral(), 406 exp.Join: lambda self: self._parse_join(), 407 exp.Order: lambda self: self._parse_order(), 408 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 409 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 410 exp.Lambda: lambda self: self._parse_lambda(), 411 exp.Limit: lambda self: self._parse_limit(), 412 exp.Offset: lambda self: self._parse_offset(), 413 exp.TableAlias: lambda self: self._parse_table_alias(), 414 exp.Table: lambda self: self._parse_table(), 415 exp.Condition: lambda self: self._parse_conjunction(), 416 exp.Expression: lambda self: self._parse_statement(), 417 exp.Properties: lambda self: self._parse_properties(), 418 exp.Where: lambda self: self._parse_where(), 419 exp.Ordered: lambda self: self._parse_ordered(), 420 exp.Having: lambda self: self._parse_having(), 421 exp.With: lambda self: self._parse_with(), 422 exp.Window: lambda self: self._parse_named_window(), 423 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 424 } 425 426 STATEMENT_PARSERS = { 427 TokenType.ALTER: lambda self: self._parse_alter(), 428 TokenType.BEGIN: lambda self: self._parse_transaction(), 429 TokenType.CACHE: lambda self: self._parse_cache(), 430 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 431 TokenType.CREATE: lambda self: self._parse_create(), 432 TokenType.DELETE: lambda self: self._parse_delete(), 433 TokenType.DESC: lambda self: self._parse_describe(), 434 TokenType.DESCRIBE: lambda self: self._parse_describe(), 435 TokenType.DROP: lambda self: self._parse_drop(), 436 TokenType.END: lambda self: self._parse_commit_or_rollback(), 437 TokenType.INSERT: lambda self: self._parse_insert(), 438 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), 439 TokenType.MERGE: lambda self: self._parse_merge(), 440 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 441 TokenType.UNCACHE: lambda self: self._parse_uncache(), 442 TokenType.UPDATE: lambda self: self._parse_update(), 443 TokenType.USE: lambda self: self.expression( 444 exp.Use, 445 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 446 and exp.Var(this=self._prev.text), 447 this=self._parse_table(schema=False), 448 ), 449 } 450 451 UNARY_PARSERS = { 452 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 453 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 454 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 455 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 456 } 457 458 PRIMARY_PARSERS = { 459 TokenType.STRING: lambda self, token: self.expression( 460 exp.Literal, this=token.text, is_string=True 461 ), 462 TokenType.NUMBER: lambda self, token: self.expression( 463 exp.Literal, this=token.text, is_string=False 464 ), 465 TokenType.STAR: lambda self, _: self.expression( 466 exp.Star, 467 **{"except": self._parse_except(), "replace": self._parse_replace()}, 468 ), 469 TokenType.NULL: lambda self, _: self.expression(exp.Null), 470 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 471 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 472 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 473 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 474 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 475 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 476 TokenType.NATIONAL: lambda self, token: self._parse_national(token), 477 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 478 } 479 480 PLACEHOLDER_PARSERS = { 481 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 482 TokenType.PARAMETER: lambda self: self._parse_parameter(), 483 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 484 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 485 else None, 486 } 487 488 RANGE_PARSERS = { 489 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 490 TokenType.GLOB: lambda self, this: self._parse_escape( 491 self.expression(exp.Glob, this=this, expression=self._parse_bitwise()) 492 ), 493 TokenType.IN: lambda self, this: self._parse_in(this), 494 TokenType.IS: lambda self, this: self._parse_is(this), 495 TokenType.LIKE: lambda self, this: self._parse_escape( 496 self.expression(exp.Like, this=this, expression=self._parse_bitwise()) 497 ), 498 TokenType.ILIKE: lambda self, this: self._parse_escape( 499 self.expression(exp.ILike, this=this, expression=self._parse_bitwise()) 500 ), 501 TokenType.IRLIKE: lambda self, this: self.expression( 502 exp.RegexpILike, this=this, expression=self._parse_bitwise() 503 ), 504 TokenType.RLIKE: lambda self, this: self.expression( 505 exp.RegexpLike, this=this, expression=self._parse_bitwise() 506 ), 507 TokenType.SIMILAR_TO: lambda self, this: self.expression( 508 exp.SimilarTo, this=this, expression=self._parse_bitwise() 509 ), 510 } 511 512 PROPERTY_PARSERS = { 513 "AFTER": lambda self: self._parse_afterjournal( 514 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 515 ), 516 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 517 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 518 "BEFORE": lambda self: self._parse_journal( 519 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 520 ), 521 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 522 "CHARACTER SET": lambda self: self._parse_character_set(), 523 "CHECKSUM": lambda self: self._parse_checksum(), 524 "CLUSTER BY": lambda self: self.expression( 525 exp.Cluster, expressions=self._parse_csv(self._parse_ordered) 526 ), 527 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 528 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 529 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( 530 default=self._prev.text.upper() == "DEFAULT" 531 ), 532 "DEFINER": lambda self: self._parse_definer(), 533 "DETERMINISTIC": lambda self: self.expression( 534 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 535 ), 536 "DISTKEY": lambda self: self._parse_distkey(), 537 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 538 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 539 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 540 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), 541 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 542 "FREESPACE": lambda self: self._parse_freespace(), 543 "GLOBAL": lambda self: self._parse_temporary(global_=True), 544 "IMMUTABLE": lambda self: self.expression( 545 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 546 ), 547 "JOURNAL": lambda self: self._parse_journal( 548 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 549 ), 550 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 551 "LIKE": lambda self: self._parse_create_like(), 552 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), 553 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 554 "LOCK": lambda self: self._parse_locking(), 555 "LOCKING": lambda self: self._parse_locking(), 556 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), 557 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 558 "MAX": lambda self: self._parse_datablocksize(), 559 "MAXIMUM": lambda self: self._parse_datablocksize(), 560 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( 561 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" 562 ), 563 "MIN": lambda self: self._parse_datablocksize(), 564 "MINIMUM": lambda self: self._parse_datablocksize(), 565 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 566 "NO": lambda self: self._parse_noprimaryindex(), 567 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), 568 "ON": lambda self: self._parse_oncommit(), 569 "PARTITION BY": lambda self: self._parse_partitioned_by(), 570 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 571 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 572 "RETURNS": lambda self: self._parse_returns(), 573 "ROW": lambda self: self._parse_row(), 574 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 575 "SORTKEY": lambda self: self._parse_sortkey(), 576 "STABLE": lambda self: self.expression( 577 exp.VolatilityProperty, this=exp.Literal.string("STABLE") 578 ), 579 "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 580 "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 581 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 582 "TEMPORARY": lambda self: self._parse_temporary(global_=False), 583 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 584 "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 585 "VOLATILE": lambda self: self.expression( 586 exp.VolatilityProperty, this=exp.Literal.string("VOLATILE") 587 ), 588 "WITH": lambda self: self._parse_with_property(), 589 } 590 591 CONSTRAINT_PARSERS = { 592 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 593 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 594 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 595 "CHARACTER SET": lambda self: self.expression( 596 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 597 ), 598 "CHECK": lambda self: self.expression( 599 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 600 ), 601 "COLLATE": lambda self: self.expression( 602 exp.CollateColumnConstraint, this=self._parse_var() 603 ), 604 "COMMENT": lambda self: self.expression( 605 exp.CommentColumnConstraint, this=self._parse_string() 606 ), 607 "COMPRESS": lambda self: self._parse_compress(), 608 "DEFAULT": lambda self: self.expression( 609 exp.DefaultColumnConstraint, this=self._parse_bitwise() 610 ), 611 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 612 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 613 "FORMAT": lambda self: self.expression( 614 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 615 ), 616 "GENERATED": lambda self: self._parse_generated_as_identity(), 617 "IDENTITY": lambda self: self._parse_auto_increment(), 618 "INLINE": lambda self: self._parse_inline(), 619 "LIKE": lambda self: self._parse_create_like(), 620 "NOT": lambda self: self._parse_not_constraint(), 621 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 622 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 623 "PRIMARY KEY": lambda self: self._parse_primary_key(), 624 "TITLE": lambda self: self.expression( 625 exp.TitleColumnConstraint, this=self._parse_var_or_string() 626 ), 627 "UNIQUE": lambda self: self._parse_unique(), 628 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 629 } 630 631 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 632 633 NO_PAREN_FUNCTION_PARSERS = { 634 TokenType.CASE: lambda self: self._parse_case(), 635 TokenType.IF: lambda self: self._parse_if(), 636 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 637 } 638 639 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 640 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 641 "TRY_CONVERT": lambda self: self._parse_convert(False), 642 "EXTRACT": lambda self: self._parse_extract(), 643 "POSITION": lambda self: self._parse_position(), 644 "SUBSTRING": lambda self: self._parse_substring(), 645 "TRIM": lambda self: self._parse_trim(), 646 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 647 "TRY_CAST": lambda self: self._parse_cast(False), 648 "STRING_AGG": lambda self: self._parse_string_agg(), 649 } 650 651 QUERY_MODIFIER_PARSERS = { 652 "match": lambda self: self._parse_match_recognize(), 653 "where": lambda self: self._parse_where(), 654 "group": lambda self: self._parse_group(), 655 "having": lambda self: self._parse_having(), 656 "qualify": lambda self: self._parse_qualify(), 657 "windows": lambda self: self._parse_window_clause(), 658 "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute), 659 "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 660 "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 661 "order": lambda self: self._parse_order(), 662 "limit": lambda self: self._parse_limit(), 663 "offset": lambda self: self._parse_offset(), 664 "lock": lambda self: self._parse_lock(), 665 } 666 667 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 668 SET_PARSERS: t.Dict[str, t.Callable] = {} 669 670 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 671 672 CREATABLES = { 673 TokenType.COLUMN, 674 TokenType.FUNCTION, 675 TokenType.INDEX, 676 TokenType.PROCEDURE, 677 TokenType.SCHEMA, 678 TokenType.TABLE, 679 TokenType.VIEW, 680 } 681 682 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 683 684 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 685 686 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 687 688 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 689 690 STRICT_CAST = True 691 692 __slots__ = ( 693 "error_level", 694 "error_message_context", 695 "sql", 696 "errors", 697 "index_offset", 698 "unnest_column_only", 699 "alias_post_tablesample", 700 "max_errors", 701 "null_ordering", 702 "_tokens", 703 "_index", 704 "_curr", 705 "_next", 706 "_prev", 707 "_prev_comments", 708 "_show_trie", 709 "_set_trie", 710 ) 711 712 def __init__( 713 self, 714 error_level: t.Optional[ErrorLevel] = None, 715 error_message_context: int = 100, 716 index_offset: int = 0, 717 unnest_column_only: bool = False, 718 alias_post_tablesample: bool = False, 719 max_errors: int = 3, 720 null_ordering: t.Optional[str] = None, 721 ): 722 self.error_level = error_level or ErrorLevel.IMMEDIATE 723 self.error_message_context = error_message_context 724 self.index_offset = index_offset 725 self.unnest_column_only = unnest_column_only 726 self.alias_post_tablesample = alias_post_tablesample 727 self.max_errors = max_errors 728 self.null_ordering = null_ordering 729 self.reset() 730 731 def reset(self): 732 self.sql = "" 733 self.errors = [] 734 self._tokens = [] 735 self._index = 0 736 self._curr = None 737 self._next = None 738 self._prev = None 739 self._prev_comments = None 740 741 def parse( 742 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 743 ) -> t.List[t.Optional[exp.Expression]]: 744 """ 745 Parses a list of tokens and returns a list of syntax trees, one tree 746 per parsed SQL statement. 747 748 Args: 749 raw_tokens: the list of tokens. 750 sql: the original SQL string, used to produce helpful debug messages. 751 752 Returns: 753 The list of syntax trees. 754 """ 755 return self._parse( 756 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 757 ) 758 759 def parse_into( 760 self, 761 expression_types: exp.IntoType, 762 raw_tokens: t.List[Token], 763 sql: t.Optional[str] = None, 764 ) -> t.List[t.Optional[exp.Expression]]: 765 """ 766 Parses a list of tokens into a given Expression type. If a collection of Expression 767 types is given instead, this method will try to parse the token list into each one 768 of them, stopping at the first for which the parsing succeeds. 769 770 Args: 771 expression_types: the expression type(s) to try and parse the token list into. 772 raw_tokens: the list of tokens. 773 sql: the original SQL string, used to produce helpful debug messages. 774 775 Returns: 776 The target Expression. 777 """ 778 errors = [] 779 for expression_type in ensure_collection(expression_types): 780 parser = self.EXPRESSION_PARSERS.get(expression_type) 781 if not parser: 782 raise TypeError(f"No parser registered for {expression_type}") 783 try: 784 return self._parse(parser, raw_tokens, sql) 785 except ParseError as e: 786 e.errors[0]["into_expression"] = expression_type 787 errors.append(e) 788 raise ParseError( 789 f"Failed to parse into {expression_types}", 790 errors=merge_errors(errors), 791 ) from errors[-1] 792 793 def _parse( 794 self, 795 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 796 raw_tokens: t.List[Token], 797 sql: t.Optional[str] = None, 798 ) -> t.List[t.Optional[exp.Expression]]: 799 self.reset() 800 self.sql = sql or "" 801 total = len(raw_tokens) 802 chunks: t.List[t.List[Token]] = [[]] 803 804 for i, token in enumerate(raw_tokens): 805 if token.token_type == TokenType.SEMICOLON: 806 if i < total - 1: 807 chunks.append([]) 808 else: 809 chunks[-1].append(token) 810 811 expressions = [] 812 813 for tokens in chunks: 814 self._index = -1 815 self._tokens = tokens 816 self._advance() 817 818 expressions.append(parse_method(self)) 819 820 if self._index < len(self._tokens): 821 self.raise_error("Invalid expression / Unexpected token") 822 823 self.check_errors() 824 825 return expressions 826 827 def check_errors(self) -> None: 828 """ 829 Logs or raises any found errors, depending on the chosen error level setting. 830 """ 831 if self.error_level == ErrorLevel.WARN: 832 for error in self.errors: 833 logger.error(str(error)) 834 elif self.error_level == ErrorLevel.RAISE and self.errors: 835 raise ParseError( 836 concat_messages(self.errors, self.max_errors), 837 errors=merge_errors(self.errors), 838 ) 839 840 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 841 """ 842 Appends an error in the list of recorded errors or raises it, depending on the chosen 843 error level setting. 844 """ 845 token = token or self._curr or self._prev or Token.string("") 846 start = self._find_token(token) 847 end = start + len(token.text) 848 start_context = self.sql[max(start - self.error_message_context, 0) : start] 849 highlight = self.sql[start:end] 850 end_context = self.sql[end : end + self.error_message_context] 851 852 error = ParseError.new( 853 f"{message}. Line {token.line}, Col: {token.col}.\n" 854 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 855 description=message, 856 line=token.line, 857 col=token.col, 858 start_context=start_context, 859 highlight=highlight, 860 end_context=end_context, 861 ) 862 863 if self.error_level == ErrorLevel.IMMEDIATE: 864 raise error 865 866 self.errors.append(error) 867 868 def expression( 869 self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs 870 ) -> exp.Expression: 871 """ 872 Creates a new, validated Expression. 873 874 Args: 875 exp_class: the expression class to instantiate. 876 comments: an optional list of comments to attach to the expression. 877 kwargs: the arguments to set for the expression along with their respective values. 878 879 Returns: 880 The target expression. 881 """ 882 instance = exp_class(**kwargs) 883 if self._prev_comments: 884 instance.comments = self._prev_comments 885 self._prev_comments = None 886 if comments: 887 instance.comments = comments 888 self.validate_expression(instance) 889 return instance 890 891 def validate_expression( 892 self, expression: exp.Expression, args: t.Optional[t.List] = None 893 ) -> None: 894 """ 895 Validates an already instantiated expression, making sure that all its mandatory arguments 896 are set. 897 898 Args: 899 expression: the expression to validate. 900 args: an optional list of items that was used to instantiate the expression, if it's a Func. 901 """ 902 if self.error_level == ErrorLevel.IGNORE: 903 return 904 905 for error_message in expression.error_messages(args): 906 self.raise_error(error_message) 907 908 def _find_sql(self, start: Token, end: Token) -> str: 909 return self.sql[self._find_token(start) : self._find_token(end) + len(end.text)] 910 911 def _find_token(self, token: Token) -> int: 912 line = 1 913 col = 1 914 index = 0 915 916 while line < token.line or col < token.col: 917 if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK: 918 line += 1 919 col = 1 920 else: 921 col += 1 922 index += 1 923 924 return index 925 926 def _advance(self, times: int = 1) -> None: 927 self._index += times 928 self._curr = seq_get(self._tokens, self._index) 929 self._next = seq_get(self._tokens, self._index + 1) 930 if self._index > 0: 931 self._prev = self._tokens[self._index - 1] 932 self._prev_comments = self._prev.comments 933 else: 934 self._prev = None 935 self._prev_comments = None 936 937 def _retreat(self, index: int) -> None: 938 self._advance(index - self._index) 939 940 def _parse_command(self) -> exp.Expression: 941 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 942 943 def _parse_statement(self) -> t.Optional[exp.Expression]: 944 if self._curr is None: 945 return None 946 947 if self._match_set(self.STATEMENT_PARSERS): 948 return self.STATEMENT_PARSERS[self._prev.token_type](self) 949 950 if self._match_set(Tokenizer.COMMANDS): 951 return self._parse_command() 952 953 expression = self._parse_expression() 954 expression = self._parse_set_operations(expression) if expression else self._parse_select() 955 956 self._parse_query_modifiers(expression) 957 return expression 958 959 def _parse_drop(self, default_kind: t.Optional[str] = None) -> t.Optional[exp.Expression]: 960 start = self._prev 961 temporary = self._match(TokenType.TEMPORARY) 962 materialized = self._match(TokenType.MATERIALIZED) 963 kind = self._match_set(self.CREATABLES) and self._prev.text 964 if not kind: 965 if default_kind: 966 kind = default_kind 967 else: 968 return self._parse_as_command(start) 969 970 return self.expression( 971 exp.Drop, 972 exists=self._parse_exists(), 973 this=self._parse_table(schema=True), 974 kind=kind, 975 temporary=temporary, 976 materialized=materialized, 977 cascade=self._match(TokenType.CASCADE), 978 ) 979 980 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 981 return ( 982 self._match(TokenType.IF) 983 and (not not_ or self._match(TokenType.NOT)) 984 and self._match(TokenType.EXISTS) 985 ) 986 987 def _parse_create(self) -> t.Optional[exp.Expression]: 988 start = self._prev 989 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 990 TokenType.OR, TokenType.REPLACE 991 ) 992 unique = self._match(TokenType.UNIQUE) 993 994 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 995 self._match(TokenType.TABLE) 996 997 properties = None 998 create_token = self._match_set(self.CREATABLES) and self._prev 999 1000 if not create_token: 1001 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1002 create_token = self._match_set(self.CREATABLES) and self._prev 1003 1004 if not properties or not create_token: 1005 return self._parse_as_command(start) 1006 1007 exists = self._parse_exists(not_=True) 1008 this = None 1009 expression = None 1010 indexes = None 1011 no_schema_binding = None 1012 begin = None 1013 1014 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1015 this = self._parse_user_defined_function(kind=create_token.token_type) 1016 temp_properties = self._parse_properties() 1017 if properties and temp_properties: 1018 properties.expressions.extend(temp_properties.expressions) 1019 elif temp_properties: 1020 properties = temp_properties 1021 1022 self._match(TokenType.ALIAS) 1023 begin = self._match(TokenType.BEGIN) 1024 return_ = self._match_text_seq("RETURN") 1025 expression = self._parse_statement() 1026 1027 if return_: 1028 expression = self.expression(exp.Return, this=expression) 1029 elif create_token.token_type == TokenType.INDEX: 1030 this = self._parse_index() 1031 elif create_token.token_type in ( 1032 TokenType.TABLE, 1033 TokenType.VIEW, 1034 TokenType.SCHEMA, 1035 ): 1036 table_parts = self._parse_table_parts(schema=True) 1037 1038 # exp.Properties.Location.POST_NAME 1039 if self._match(TokenType.COMMA): 1040 temp_properties = self._parse_properties(before=True) 1041 if properties and temp_properties: 1042 properties.expressions.extend(temp_properties.expressions) 1043 elif temp_properties: 1044 properties = temp_properties 1045 1046 this = self._parse_schema(this=table_parts) 1047 1048 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1049 temp_properties = self._parse_properties() 1050 if properties and temp_properties: 1051 properties.expressions.extend(temp_properties.expressions) 1052 elif temp_properties: 1053 properties = temp_properties 1054 1055 self._match(TokenType.ALIAS) 1056 1057 # exp.Properties.Location.POST_ALIAS 1058 if not ( 1059 self._match(TokenType.SELECT, advance=False) 1060 or self._match(TokenType.WITH, advance=False) 1061 or self._match(TokenType.L_PAREN, advance=False) 1062 ): 1063 temp_properties = self._parse_properties() 1064 if properties and temp_properties: 1065 properties.expressions.extend(temp_properties.expressions) 1066 elif temp_properties: 1067 properties = temp_properties 1068 1069 expression = self._parse_ddl_select() 1070 1071 if create_token.token_type == TokenType.TABLE: 1072 # exp.Properties.Location.POST_EXPRESSION 1073 temp_properties = self._parse_properties() 1074 if properties and temp_properties: 1075 properties.expressions.extend(temp_properties.expressions) 1076 elif temp_properties: 1077 properties = temp_properties 1078 1079 indexes = [] 1080 while True: 1081 index = self._parse_create_table_index() 1082 1083 # exp.Properties.Location.POST_INDEX 1084 if self._match(TokenType.PARTITION_BY, advance=False): 1085 temp_properties = self._parse_properties() 1086 if properties and temp_properties: 1087 properties.expressions.extend(temp_properties.expressions) 1088 elif temp_properties: 1089 properties = temp_properties 1090 1091 if not index: 1092 break 1093 else: 1094 indexes.append(index) 1095 elif create_token.token_type == TokenType.VIEW: 1096 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1097 no_schema_binding = True 1098 1099 return self.expression( 1100 exp.Create, 1101 this=this, 1102 kind=create_token.text, 1103 unique=unique, 1104 expression=expression, 1105 exists=exists, 1106 properties=properties, 1107 replace=replace, 1108 indexes=indexes, 1109 no_schema_binding=no_schema_binding, 1110 begin=begin, 1111 ) 1112 1113 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1114 self._match(TokenType.COMMA) 1115 1116 # parsers look to _prev for no/dual/default, so need to consume first 1117 self._match_text_seq("NO") 1118 self._match_text_seq("DUAL") 1119 self._match_text_seq("DEFAULT") 1120 1121 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): 1122 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) 1123 1124 return None 1125 1126 def _parse_property(self) -> t.Optional[exp.Expression]: 1127 if self._match_texts(self.PROPERTY_PARSERS): 1128 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1129 1130 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1131 return self._parse_character_set(default=True) 1132 1133 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): 1134 return self._parse_sortkey(compound=True) 1135 1136 if self._match_text_seq("SQL", "SECURITY"): 1137 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1138 1139 assignment = self._match_pair( 1140 TokenType.VAR, TokenType.EQ, advance=False 1141 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1142 1143 if assignment: 1144 key = self._parse_var_or_string() 1145 self._match(TokenType.EQ) 1146 return self.expression(exp.Property, this=key, value=self._parse_column()) 1147 1148 return None 1149 1150 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1151 self._match(TokenType.EQ) 1152 self._match(TokenType.ALIAS) 1153 return self.expression( 1154 exp_class, 1155 this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1156 ) 1157 1158 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: 1159 properties = [] 1160 1161 while True: 1162 if before: 1163 identified_property = self._parse_property_before() 1164 else: 1165 identified_property = self._parse_property() 1166 1167 if not identified_property: 1168 break 1169 for p in ensure_collection(identified_property): 1170 properties.append(p) 1171 1172 if properties: 1173 return self.expression(exp.Properties, expressions=properties) 1174 1175 return None 1176 1177 def _parse_fallback(self, no=False) -> exp.Expression: 1178 self._match_text_seq("FALLBACK") 1179 return self.expression( 1180 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1181 ) 1182 1183 def _parse_with_property( 1184 self, 1185 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1186 self._match(TokenType.WITH) 1187 if self._match(TokenType.L_PAREN, advance=False): 1188 return self._parse_wrapped_csv(self._parse_property) 1189 1190 if self._match_text_seq("JOURNAL"): 1191 return self._parse_withjournaltable() 1192 1193 if self._match_text_seq("DATA"): 1194 return self._parse_withdata(no=False) 1195 elif self._match_text_seq("NO", "DATA"): 1196 return self._parse_withdata(no=True) 1197 1198 if not self._next: 1199 return None 1200 1201 return self._parse_withisolatedloading() 1202 1203 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1204 def _parse_definer(self) -> t.Optional[exp.Expression]: 1205 self._match(TokenType.EQ) 1206 1207 user = self._parse_id_var() 1208 self._match(TokenType.PARAMETER) 1209 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1210 1211 if not user or not host: 1212 return None 1213 1214 return exp.DefinerProperty(this=f"{user}@{host}") 1215 1216 def _parse_withjournaltable(self) -> exp.Expression: 1217 self._match(TokenType.TABLE) 1218 self._match(TokenType.EQ) 1219 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1220 1221 def _parse_log(self, no=False) -> exp.Expression: 1222 self._match_text_seq("LOG") 1223 return self.expression(exp.LogProperty, no=no) 1224 1225 def _parse_journal(self, no=False, dual=False) -> exp.Expression: 1226 before = self._match_text_seq("BEFORE") 1227 self._match_text_seq("JOURNAL") 1228 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) 1229 1230 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: 1231 self._match_text_seq("NOT") 1232 self._match_text_seq("LOCAL") 1233 self._match_text_seq("AFTER", "JOURNAL") 1234 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) 1235 1236 def _parse_checksum(self) -> exp.Expression: 1237 self._match_text_seq("CHECKSUM") 1238 self._match(TokenType.EQ) 1239 1240 on = None 1241 if self._match(TokenType.ON): 1242 on = True 1243 elif self._match_text_seq("OFF"): 1244 on = False 1245 default = self._match(TokenType.DEFAULT) 1246 1247 return self.expression( 1248 exp.ChecksumProperty, 1249 on=on, 1250 default=default, 1251 ) 1252 1253 def _parse_freespace(self) -> exp.Expression: 1254 self._match_text_seq("FREESPACE") 1255 self._match(TokenType.EQ) 1256 return self.expression( 1257 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1258 ) 1259 1260 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: 1261 self._match_text_seq("MERGEBLOCKRATIO") 1262 if self._match(TokenType.EQ): 1263 return self.expression( 1264 exp.MergeBlockRatioProperty, 1265 this=self._parse_number(), 1266 percent=self._match(TokenType.PERCENT), 1267 ) 1268 else: 1269 return self.expression( 1270 exp.MergeBlockRatioProperty, 1271 no=no, 1272 default=default, 1273 ) 1274 1275 def _parse_datablocksize(self, default=None) -> exp.Expression: 1276 if default: 1277 self._match_text_seq("DATABLOCKSIZE") 1278 return self.expression(exp.DataBlocksizeProperty, default=True) 1279 elif self._match_texts(("MIN", "MINIMUM")): 1280 self._match_text_seq("DATABLOCKSIZE") 1281 return self.expression(exp.DataBlocksizeProperty, min=True) 1282 elif self._match_texts(("MAX", "MAXIMUM")): 1283 self._match_text_seq("DATABLOCKSIZE") 1284 return self.expression(exp.DataBlocksizeProperty, min=False) 1285 1286 self._match_text_seq("DATABLOCKSIZE") 1287 self._match(TokenType.EQ) 1288 size = self._parse_number() 1289 units = None 1290 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1291 units = self._prev.text 1292 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) 1293 1294 def _parse_blockcompression(self) -> exp.Expression: 1295 self._match_text_seq("BLOCKCOMPRESSION") 1296 self._match(TokenType.EQ) 1297 always = self._match_text_seq("ALWAYS") 1298 manual = self._match_text_seq("MANUAL") 1299 never = self._match_text_seq("NEVER") 1300 default = self._match_text_seq("DEFAULT") 1301 autotemp = None 1302 if self._match_text_seq("AUTOTEMP"): 1303 autotemp = self._parse_schema() 1304 1305 return self.expression( 1306 exp.BlockCompressionProperty, 1307 always=always, 1308 manual=manual, 1309 never=never, 1310 default=default, 1311 autotemp=autotemp, 1312 ) 1313 1314 def _parse_withisolatedloading(self) -> exp.Expression: 1315 no = self._match_text_seq("NO") 1316 concurrent = self._match_text_seq("CONCURRENT") 1317 self._match_text_seq("ISOLATED", "LOADING") 1318 for_all = self._match_text_seq("FOR", "ALL") 1319 for_insert = self._match_text_seq("FOR", "INSERT") 1320 for_none = self._match_text_seq("FOR", "NONE") 1321 return self.expression( 1322 exp.IsolatedLoadingProperty, 1323 no=no, 1324 concurrent=concurrent, 1325 for_all=for_all, 1326 for_insert=for_insert, 1327 for_none=for_none, 1328 ) 1329 1330 def _parse_locking(self) -> exp.Expression: 1331 if self._match(TokenType.TABLE): 1332 kind = "TABLE" 1333 elif self._match(TokenType.VIEW): 1334 kind = "VIEW" 1335 elif self._match(TokenType.ROW): 1336 kind = "ROW" 1337 elif self._match_text_seq("DATABASE"): 1338 kind = "DATABASE" 1339 else: 1340 kind = None 1341 1342 if kind in ("DATABASE", "TABLE", "VIEW"): 1343 this = self._parse_table_parts() 1344 else: 1345 this = None 1346 1347 if self._match(TokenType.FOR): 1348 for_or_in = "FOR" 1349 elif self._match(TokenType.IN): 1350 for_or_in = "IN" 1351 else: 1352 for_or_in = None 1353 1354 if self._match_text_seq("ACCESS"): 1355 lock_type = "ACCESS" 1356 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1357 lock_type = "EXCLUSIVE" 1358 elif self._match_text_seq("SHARE"): 1359 lock_type = "SHARE" 1360 elif self._match_text_seq("READ"): 1361 lock_type = "READ" 1362 elif self._match_text_seq("WRITE"): 1363 lock_type = "WRITE" 1364 elif self._match_text_seq("CHECKSUM"): 1365 lock_type = "CHECKSUM" 1366 else: 1367 lock_type = None 1368 1369 override = self._match_text_seq("OVERRIDE") 1370 1371 return self.expression( 1372 exp.LockingProperty, 1373 this=this, 1374 kind=kind, 1375 for_or_in=for_or_in, 1376 lock_type=lock_type, 1377 override=override, 1378 ) 1379 1380 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1381 if self._match(TokenType.PARTITION_BY): 1382 return self._parse_csv(self._parse_conjunction) 1383 return [] 1384 1385 def _parse_partitioned_by(self) -> exp.Expression: 1386 self._match(TokenType.EQ) 1387 return self.expression( 1388 exp.PartitionedByProperty, 1389 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1390 ) 1391 1392 def _parse_withdata(self, no=False) -> exp.Expression: 1393 if self._match_text_seq("AND", "STATISTICS"): 1394 statistics = True 1395 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1396 statistics = False 1397 else: 1398 statistics = None 1399 1400 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1401 1402 def _parse_noprimaryindex(self) -> exp.Expression: 1403 self._match_text_seq("PRIMARY", "INDEX") 1404 return exp.NoPrimaryIndexProperty() 1405 1406 def _parse_oncommit(self) -> exp.Expression: 1407 self._match_text_seq("COMMIT", "PRESERVE", "ROWS") 1408 return exp.OnCommitProperty() 1409 1410 def _parse_distkey(self) -> exp.Expression: 1411 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1412 1413 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1414 table = self._parse_table(schema=True) 1415 options = [] 1416 while self._match_texts(("INCLUDING", "EXCLUDING")): 1417 this = self._prev.text.upper() 1418 id_var = self._parse_id_var() 1419 1420 if not id_var: 1421 return None 1422 1423 options.append( 1424 self.expression( 1425 exp.Property, 1426 this=this, 1427 value=exp.Var(this=id_var.this.upper()), 1428 ) 1429 ) 1430 return self.expression(exp.LikeProperty, this=table, expressions=options) 1431 1432 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1433 return self.expression( 1434 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1435 ) 1436 1437 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1438 self._match(TokenType.EQ) 1439 return self.expression( 1440 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1441 ) 1442 1443 def _parse_returns(self) -> exp.Expression: 1444 value: t.Optional[exp.Expression] 1445 is_table = self._match(TokenType.TABLE) 1446 1447 if is_table: 1448 if self._match(TokenType.LT): 1449 value = self.expression( 1450 exp.Schema, 1451 this="TABLE", 1452 expressions=self._parse_csv(self._parse_struct_kwargs), 1453 ) 1454 if not self._match(TokenType.GT): 1455 self.raise_error("Expecting >") 1456 else: 1457 value = self._parse_schema(exp.Var(this="TABLE")) 1458 else: 1459 value = self._parse_types() 1460 1461 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1462 1463 def _parse_temporary(self, global_=False) -> exp.Expression: 1464 self._match(TokenType.TEMPORARY) # in case calling from "GLOBAL" 1465 return self.expression(exp.TemporaryProperty, global_=global_) 1466 1467 def _parse_describe(self) -> exp.Expression: 1468 kind = self._match_set(self.CREATABLES) and self._prev.text 1469 this = self._parse_table() 1470 1471 return self.expression(exp.Describe, this=this, kind=kind) 1472 1473 def _parse_insert(self) -> exp.Expression: 1474 overwrite = self._match(TokenType.OVERWRITE) 1475 local = self._match(TokenType.LOCAL) 1476 1477 this: t.Optional[exp.Expression] 1478 1479 alternative = None 1480 if self._match_text_seq("DIRECTORY"): 1481 this = self.expression( 1482 exp.Directory, 1483 this=self._parse_var_or_string(), 1484 local=local, 1485 row_format=self._parse_row_format(match_row=True), 1486 ) 1487 else: 1488 if self._match(TokenType.OR): 1489 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1490 1491 self._match(TokenType.INTO) 1492 self._match(TokenType.TABLE) 1493 this = self._parse_table(schema=True) 1494 1495 return self.expression( 1496 exp.Insert, 1497 this=this, 1498 exists=self._parse_exists(), 1499 partition=self._parse_partition(), 1500 expression=self._parse_ddl_select(), 1501 overwrite=overwrite, 1502 alternative=alternative, 1503 ) 1504 1505 def _parse_row(self) -> t.Optional[exp.Expression]: 1506 if not self._match(TokenType.FORMAT): 1507 return None 1508 return self._parse_row_format() 1509 1510 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1511 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1512 return None 1513 1514 if self._match_text_seq("SERDE"): 1515 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1516 1517 self._match_text_seq("DELIMITED") 1518 1519 kwargs = {} 1520 1521 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1522 kwargs["fields"] = self._parse_string() 1523 if self._match_text_seq("ESCAPED", "BY"): 1524 kwargs["escaped"] = self._parse_string() 1525 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1526 kwargs["collection_items"] = self._parse_string() 1527 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1528 kwargs["map_keys"] = self._parse_string() 1529 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1530 kwargs["lines"] = self._parse_string() 1531 if self._match_text_seq("NULL", "DEFINED", "AS"): 1532 kwargs["null"] = self._parse_string() 1533 1534 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1535 1536 def _parse_load_data(self) -> exp.Expression: 1537 local = self._match(TokenType.LOCAL) 1538 self._match_text_seq("INPATH") 1539 inpath = self._parse_string() 1540 overwrite = self._match(TokenType.OVERWRITE) 1541 self._match_pair(TokenType.INTO, TokenType.TABLE) 1542 1543 return self.expression( 1544 exp.LoadData, 1545 this=self._parse_table(schema=True), 1546 local=local, 1547 overwrite=overwrite, 1548 inpath=inpath, 1549 partition=self._parse_partition(), 1550 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1551 serde=self._match_text_seq("SERDE") and self._parse_string(), 1552 ) 1553 1554 def _parse_delete(self) -> exp.Expression: 1555 self._match(TokenType.FROM) 1556 1557 return self.expression( 1558 exp.Delete, 1559 this=self._parse_table(schema=True), 1560 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1561 where=self._parse_where(), 1562 ) 1563 1564 def _parse_update(self) -> exp.Expression: 1565 return self.expression( 1566 exp.Update, 1567 **{ # type: ignore 1568 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1569 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1570 "from": self._parse_from(), 1571 "where": self._parse_where(), 1572 }, 1573 ) 1574 1575 def _parse_uncache(self) -> exp.Expression: 1576 if not self._match(TokenType.TABLE): 1577 self.raise_error("Expecting TABLE after UNCACHE") 1578 1579 return self.expression( 1580 exp.Uncache, 1581 exists=self._parse_exists(), 1582 this=self._parse_table(schema=True), 1583 ) 1584 1585 def _parse_cache(self) -> exp.Expression: 1586 lazy = self._match(TokenType.LAZY) 1587 self._match(TokenType.TABLE) 1588 table = self._parse_table(schema=True) 1589 options = [] 1590 1591 if self._match(TokenType.OPTIONS): 1592 self._match_l_paren() 1593 k = self._parse_string() 1594 self._match(TokenType.EQ) 1595 v = self._parse_string() 1596 options = [k, v] 1597 self._match_r_paren() 1598 1599 self._match(TokenType.ALIAS) 1600 return self.expression( 1601 exp.Cache, 1602 this=table, 1603 lazy=lazy, 1604 options=options, 1605 expression=self._parse_select(nested=True), 1606 ) 1607 1608 def _parse_partition(self) -> t.Optional[exp.Expression]: 1609 if not self._match(TokenType.PARTITION): 1610 return None 1611 1612 return self.expression( 1613 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1614 ) 1615 1616 def _parse_value(self) -> exp.Expression: 1617 if self._match(TokenType.L_PAREN): 1618 expressions = self._parse_csv(self._parse_conjunction) 1619 self._match_r_paren() 1620 return self.expression(exp.Tuple, expressions=expressions) 1621 1622 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1623 # Source: https://prestodb.io/docs/current/sql/values.html 1624 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1625 1626 def _parse_select( 1627 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1628 ) -> t.Optional[exp.Expression]: 1629 cte = self._parse_with() 1630 if cte: 1631 this = self._parse_statement() 1632 1633 if not this: 1634 self.raise_error("Failed to parse any statement following CTE") 1635 return cte 1636 1637 if "with" in this.arg_types: 1638 this.set("with", cte) 1639 else: 1640 self.raise_error(f"{this.key} does not support CTE") 1641 this = cte 1642 elif self._match(TokenType.SELECT): 1643 comments = self._prev_comments 1644 1645 hint = self._parse_hint() 1646 all_ = self._match(TokenType.ALL) 1647 distinct = self._match(TokenType.DISTINCT) 1648 1649 if distinct: 1650 distinct = self.expression( 1651 exp.Distinct, 1652 on=self._parse_value() if self._match(TokenType.ON) else None, 1653 ) 1654 1655 if all_ and distinct: 1656 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1657 1658 limit = self._parse_limit(top=True) 1659 expressions = self._parse_csv(self._parse_expression) 1660 1661 this = self.expression( 1662 exp.Select, 1663 hint=hint, 1664 distinct=distinct, 1665 expressions=expressions, 1666 limit=limit, 1667 ) 1668 this.comments = comments 1669 1670 into = self._parse_into() 1671 if into: 1672 this.set("into", into) 1673 1674 from_ = self._parse_from() 1675 if from_: 1676 this.set("from", from_) 1677 1678 self._parse_query_modifiers(this) 1679 elif (table or nested) and self._match(TokenType.L_PAREN): 1680 this = self._parse_table() if table else self._parse_select(nested=True) 1681 self._parse_query_modifiers(this) 1682 this = self._parse_set_operations(this) 1683 self._match_r_paren() 1684 1685 # early return so that subquery unions aren't parsed again 1686 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1687 # Union ALL should be a property of the top select node, not the subquery 1688 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1689 elif self._match(TokenType.VALUES): 1690 this = self.expression( 1691 exp.Values, 1692 expressions=self._parse_csv(self._parse_value), 1693 alias=self._parse_table_alias(), 1694 ) 1695 else: 1696 this = None 1697 1698 return self._parse_set_operations(this) 1699 1700 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1701 if not skip_with_token and not self._match(TokenType.WITH): 1702 return None 1703 1704 recursive = self._match(TokenType.RECURSIVE) 1705 1706 expressions = [] 1707 while True: 1708 expressions.append(self._parse_cte()) 1709 1710 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1711 break 1712 else: 1713 self._match(TokenType.WITH) 1714 1715 return self.expression(exp.With, expressions=expressions, recursive=recursive) 1716 1717 def _parse_cte(self) -> exp.Expression: 1718 alias = self._parse_table_alias() 1719 if not alias or not alias.this: 1720 self.raise_error("Expected CTE to have alias") 1721 1722 self._match(TokenType.ALIAS) 1723 1724 return self.expression( 1725 exp.CTE, 1726 this=self._parse_wrapped(self._parse_statement), 1727 alias=alias, 1728 ) 1729 1730 def _parse_table_alias( 1731 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1732 ) -> t.Optional[exp.Expression]: 1733 any_token = self._match(TokenType.ALIAS) 1734 alias = self._parse_id_var( 1735 any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 1736 ) 1737 index = self._index 1738 1739 if self._match(TokenType.L_PAREN): 1740 columns = self._parse_csv(self._parse_function_parameter) 1741 self._match_r_paren() if columns else self._retreat(index) 1742 else: 1743 columns = None 1744 1745 if not alias and not columns: 1746 return None 1747 1748 return self.expression(exp.TableAlias, this=alias, columns=columns) 1749 1750 def _parse_subquery( 1751 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1752 ) -> exp.Expression: 1753 return self.expression( 1754 exp.Subquery, 1755 this=this, 1756 pivots=self._parse_pivots(), 1757 alias=self._parse_table_alias() if parse_alias else None, 1758 ) 1759 1760 def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None: 1761 if not isinstance(this, self.MODIFIABLES): 1762 return 1763 1764 table = isinstance(this, exp.Table) 1765 1766 while True: 1767 lateral = self._parse_lateral() 1768 join = self._parse_join() 1769 comma = None if table else self._match(TokenType.COMMA) 1770 if lateral: 1771 this.append("laterals", lateral) 1772 if join: 1773 this.append("joins", join) 1774 if comma: 1775 this.args["from"].append("expressions", self._parse_table()) 1776 if not (lateral or join or comma): 1777 break 1778 1779 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1780 expression = parser(self) 1781 1782 if expression: 1783 this.set(key, expression) 1784 1785 def _parse_hint(self) -> t.Optional[exp.Expression]: 1786 if self._match(TokenType.HINT): 1787 hints = self._parse_csv(self._parse_function) 1788 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 1789 self.raise_error("Expected */ after HINT") 1790 return self.expression(exp.Hint, expressions=hints) 1791 1792 return None 1793 1794 def _parse_into(self) -> t.Optional[exp.Expression]: 1795 if not self._match(TokenType.INTO): 1796 return None 1797 1798 temp = self._match(TokenType.TEMPORARY) 1799 unlogged = self._match(TokenType.UNLOGGED) 1800 self._match(TokenType.TABLE) 1801 1802 return self.expression( 1803 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 1804 ) 1805 1806 def _parse_from(self) -> t.Optional[exp.Expression]: 1807 if not self._match(TokenType.FROM): 1808 return None 1809 1810 return self.expression( 1811 exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table) 1812 ) 1813 1814 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 1815 if not self._match(TokenType.MATCH_RECOGNIZE): 1816 return None 1817 self._match_l_paren() 1818 1819 partition = self._parse_partition_by() 1820 order = self._parse_order() 1821 measures = ( 1822 self._parse_alias(self._parse_conjunction()) 1823 if self._match_text_seq("MEASURES") 1824 else None 1825 ) 1826 1827 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 1828 rows = exp.Var(this="ONE ROW PER MATCH") 1829 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 1830 text = "ALL ROWS PER MATCH" 1831 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 1832 text += f" SHOW EMPTY MATCHES" 1833 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 1834 text += f" OMIT EMPTY MATCHES" 1835 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 1836 text += f" WITH UNMATCHED ROWS" 1837 rows = exp.Var(this=text) 1838 else: 1839 rows = None 1840 1841 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 1842 text = "AFTER MATCH SKIP" 1843 if self._match_text_seq("PAST", "LAST", "ROW"): 1844 text += f" PAST LAST ROW" 1845 elif self._match_text_seq("TO", "NEXT", "ROW"): 1846 text += f" TO NEXT ROW" 1847 elif self._match_text_seq("TO", "FIRST"): 1848 text += f" TO FIRST {self._advance_any().text}" # type: ignore 1849 elif self._match_text_seq("TO", "LAST"): 1850 text += f" TO LAST {self._advance_any().text}" # type: ignore 1851 after = exp.Var(this=text) 1852 else: 1853 after = None 1854 1855 if self._match_text_seq("PATTERN"): 1856 self._match_l_paren() 1857 1858 if not self._curr: 1859 self.raise_error("Expecting )", self._curr) 1860 1861 paren = 1 1862 start = self._curr 1863 1864 while self._curr and paren > 0: 1865 if self._curr.token_type == TokenType.L_PAREN: 1866 paren += 1 1867 if self._curr.token_type == TokenType.R_PAREN: 1868 paren -= 1 1869 end = self._prev 1870 self._advance() 1871 if paren > 0: 1872 self.raise_error("Expecting )", self._curr) 1873 pattern = exp.Var(this=self._find_sql(start, end)) 1874 else: 1875 pattern = None 1876 1877 define = ( 1878 self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None 1879 ) 1880 self._match_r_paren() 1881 1882 return self.expression( 1883 exp.MatchRecognize, 1884 partition_by=partition, 1885 order=order, 1886 measures=measures, 1887 rows=rows, 1888 after=after, 1889 pattern=pattern, 1890 define=define, 1891 ) 1892 1893 def _parse_lateral(self) -> t.Optional[exp.Expression]: 1894 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 1895 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 1896 1897 if outer_apply or cross_apply: 1898 this = self._parse_select(table=True) 1899 view = None 1900 outer = not cross_apply 1901 elif self._match(TokenType.LATERAL): 1902 this = self._parse_select(table=True) 1903 view = self._match(TokenType.VIEW) 1904 outer = self._match(TokenType.OUTER) 1905 else: 1906 return None 1907 1908 if not this: 1909 this = self._parse_function() or self._parse_id_var(any_token=False) 1910 while self._match(TokenType.DOT): 1911 this = exp.Dot( 1912 this=this, 1913 expression=self._parse_function() or self._parse_id_var(any_token=False), 1914 ) 1915 1916 table_alias: t.Optional[exp.Expression] 1917 1918 if view: 1919 table = self._parse_id_var(any_token=False) 1920 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 1921 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 1922 else: 1923 table_alias = self._parse_table_alias() 1924 1925 expression = self.expression( 1926 exp.Lateral, 1927 this=this, 1928 view=view, 1929 outer=outer, 1930 alias=table_alias, 1931 ) 1932 1933 if outer_apply or cross_apply: 1934 return self.expression(exp.Join, this=expression, side=None if cross_apply else "LEFT") 1935 1936 return expression 1937 1938 def _parse_join_side_and_kind( 1939 self, 1940 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 1941 return ( 1942 self._match(TokenType.NATURAL) and self._prev, 1943 self._match_set(self.JOIN_SIDES) and self._prev, 1944 self._match_set(self.JOIN_KINDS) and self._prev, 1945 ) 1946 1947 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 1948 natural, side, kind = self._parse_join_side_and_kind() 1949 1950 if not skip_join_token and not self._match(TokenType.JOIN): 1951 return None 1952 1953 kwargs: t.Dict[ 1954 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 1955 ] = {"this": self._parse_table()} 1956 1957 if natural: 1958 kwargs["natural"] = True 1959 if side: 1960 kwargs["side"] = side.text 1961 if kind: 1962 kwargs["kind"] = kind.text 1963 1964 if self._match(TokenType.ON): 1965 kwargs["on"] = self._parse_conjunction() 1966 elif self._match(TokenType.USING): 1967 kwargs["using"] = self._parse_wrapped_id_vars() 1968 1969 return self.expression(exp.Join, **kwargs) # type: ignore 1970 1971 def _parse_index(self) -> exp.Expression: 1972 index = self._parse_id_var() 1973 self._match(TokenType.ON) 1974 self._match(TokenType.TABLE) # hive 1975 1976 return self.expression( 1977 exp.Index, 1978 this=index, 1979 table=self.expression(exp.Table, this=self._parse_id_var()), 1980 columns=self._parse_expression(), 1981 ) 1982 1983 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: 1984 unique = self._match(TokenType.UNIQUE) 1985 primary = self._match_text_seq("PRIMARY") 1986 amp = self._match_text_seq("AMP") 1987 if not self._match(TokenType.INDEX): 1988 return None 1989 index = self._parse_id_var() 1990 columns = None 1991 if self._match(TokenType.L_PAREN, advance=False): 1992 columns = self._parse_wrapped_csv(self._parse_column) 1993 return self.expression( 1994 exp.Index, 1995 this=index, 1996 columns=columns, 1997 unique=unique, 1998 primary=primary, 1999 amp=amp, 2000 ) 2001 2002 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 2003 catalog = None 2004 db = None 2005 table = (not schema and self._parse_function()) or self._parse_id_var(any_token=False) 2006 2007 while self._match(TokenType.DOT): 2008 if catalog: 2009 # This allows nesting the table in arbitrarily many dot expressions if needed 2010 table = self.expression(exp.Dot, this=table, expression=self._parse_id_var()) 2011 else: 2012 catalog = db 2013 db = table 2014 table = self._parse_id_var() 2015 2016 if not table: 2017 self.raise_error(f"Expected table name but got {self._curr}") 2018 2019 return self.expression( 2020 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2021 ) 2022 2023 def _parse_table( 2024 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2025 ) -> t.Optional[exp.Expression]: 2026 lateral = self._parse_lateral() 2027 2028 if lateral: 2029 return lateral 2030 2031 unnest = self._parse_unnest() 2032 2033 if unnest: 2034 return unnest 2035 2036 values = self._parse_derived_table_values() 2037 2038 if values: 2039 return values 2040 2041 subquery = self._parse_select(table=True) 2042 2043 if subquery: 2044 return subquery 2045 2046 this = self._parse_table_parts(schema=schema) 2047 2048 if schema: 2049 return self._parse_schema(this=this) 2050 2051 if self.alias_post_tablesample: 2052 table_sample = self._parse_table_sample() 2053 2054 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2055 2056 if alias: 2057 this.set("alias", alias) 2058 2059 if not this.args.get("pivots"): 2060 this.set("pivots", self._parse_pivots()) 2061 2062 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2063 this.set( 2064 "hints", 2065 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2066 ) 2067 self._match_r_paren() 2068 2069 if not self.alias_post_tablesample: 2070 table_sample = self._parse_table_sample() 2071 2072 if table_sample: 2073 table_sample.set("this", this) 2074 this = table_sample 2075 2076 return this 2077 2078 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2079 if not self._match(TokenType.UNNEST): 2080 return None 2081 2082 expressions = self._parse_wrapped_csv(self._parse_column) 2083 ordinality = bool(self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY)) 2084 alias = self._parse_table_alias() 2085 2086 if alias and self.unnest_column_only: 2087 if alias.args.get("columns"): 2088 self.raise_error("Unexpected extra column alias in unnest.") 2089 alias.set("columns", [alias.this]) 2090 alias.set("this", None) 2091 2092 offset = None 2093 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2094 self._match(TokenType.ALIAS) 2095 offset = self._parse_conjunction() 2096 2097 return self.expression( 2098 exp.Unnest, 2099 expressions=expressions, 2100 ordinality=ordinality, 2101 alias=alias, 2102 offset=offset, 2103 ) 2104 2105 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2106 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2107 if not is_derived and not self._match(TokenType.VALUES): 2108 return None 2109 2110 expressions = self._parse_csv(self._parse_value) 2111 2112 if is_derived: 2113 self._match_r_paren() 2114 2115 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2116 2117 def _parse_table_sample(self) -> t.Optional[exp.Expression]: 2118 if not self._match(TokenType.TABLE_SAMPLE): 2119 return None 2120 2121 method = self._parse_var() 2122 bucket_numerator = None 2123 bucket_denominator = None 2124 bucket_field = None 2125 percent = None 2126 rows = None 2127 size = None 2128 seed = None 2129 2130 self._match_l_paren() 2131 2132 if self._match(TokenType.BUCKET): 2133 bucket_numerator = self._parse_number() 2134 self._match(TokenType.OUT_OF) 2135 bucket_denominator = bucket_denominator = self._parse_number() 2136 self._match(TokenType.ON) 2137 bucket_field = self._parse_field() 2138 else: 2139 num = self._parse_number() 2140 2141 if self._match(TokenType.PERCENT): 2142 percent = num 2143 elif self._match(TokenType.ROWS): 2144 rows = num 2145 else: 2146 size = num 2147 2148 self._match_r_paren() 2149 2150 if self._match(TokenType.SEED): 2151 seed = self._parse_wrapped(self._parse_number) 2152 2153 return self.expression( 2154 exp.TableSample, 2155 method=method, 2156 bucket_numerator=bucket_numerator, 2157 bucket_denominator=bucket_denominator, 2158 bucket_field=bucket_field, 2159 percent=percent, 2160 rows=rows, 2161 size=size, 2162 seed=seed, 2163 ) 2164 2165 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2166 return list(iter(self._parse_pivot, None)) 2167 2168 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2169 index = self._index 2170 2171 if self._match(TokenType.PIVOT): 2172 unpivot = False 2173 elif self._match(TokenType.UNPIVOT): 2174 unpivot = True 2175 else: 2176 return None 2177 2178 expressions = [] 2179 field = None 2180 2181 if not self._match(TokenType.L_PAREN): 2182 self._retreat(index) 2183 return None 2184 2185 if unpivot: 2186 expressions = self._parse_csv(self._parse_column) 2187 else: 2188 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2189 2190 if not self._match(TokenType.FOR): 2191 self.raise_error("Expecting FOR") 2192 2193 value = self._parse_column() 2194 2195 if not self._match(TokenType.IN): 2196 self.raise_error("Expecting IN") 2197 2198 field = self._parse_in(value) 2199 2200 self._match_r_paren() 2201 2202 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2203 2204 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2205 pivot.set("alias", self._parse_table_alias()) 2206 2207 return pivot 2208 2209 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2210 if not skip_where_token and not self._match(TokenType.WHERE): 2211 return None 2212 2213 return self.expression( 2214 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2215 ) 2216 2217 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2218 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2219 return None 2220 2221 elements = defaultdict(list) 2222 2223 while True: 2224 expressions = self._parse_csv(self._parse_conjunction) 2225 if expressions: 2226 elements["expressions"].extend(expressions) 2227 2228 grouping_sets = self._parse_grouping_sets() 2229 if grouping_sets: 2230 elements["grouping_sets"].extend(grouping_sets) 2231 2232 rollup = None 2233 cube = None 2234 2235 with_ = self._match(TokenType.WITH) 2236 if self._match(TokenType.ROLLUP): 2237 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2238 elements["rollup"].extend(ensure_list(rollup)) 2239 2240 if self._match(TokenType.CUBE): 2241 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2242 elements["cube"].extend(ensure_list(cube)) 2243 2244 if not (expressions or grouping_sets or rollup or cube): 2245 break 2246 2247 return self.expression(exp.Group, **elements) # type: ignore 2248 2249 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2250 if not self._match(TokenType.GROUPING_SETS): 2251 return None 2252 2253 return self._parse_wrapped_csv(self._parse_grouping_set) 2254 2255 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2256 if self._match(TokenType.L_PAREN): 2257 grouping_set = self._parse_csv(self._parse_column) 2258 self._match_r_paren() 2259 return self.expression(exp.Tuple, expressions=grouping_set) 2260 2261 return self._parse_column() 2262 2263 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2264 if not skip_having_token and not self._match(TokenType.HAVING): 2265 return None 2266 return self.expression(exp.Having, this=self._parse_conjunction()) 2267 2268 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2269 if not self._match(TokenType.QUALIFY): 2270 return None 2271 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2272 2273 def _parse_order( 2274 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2275 ) -> t.Optional[exp.Expression]: 2276 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2277 return this 2278 2279 return self.expression( 2280 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2281 ) 2282 2283 def _parse_sort( 2284 self, token_type: TokenType, exp_class: t.Type[exp.Expression] 2285 ) -> t.Optional[exp.Expression]: 2286 if not self._match(token_type): 2287 return None 2288 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2289 2290 def _parse_ordered(self) -> exp.Expression: 2291 this = self._parse_conjunction() 2292 self._match(TokenType.ASC) 2293 is_desc = self._match(TokenType.DESC) 2294 is_nulls_first = self._match(TokenType.NULLS_FIRST) 2295 is_nulls_last = self._match(TokenType.NULLS_LAST) 2296 desc = is_desc or False 2297 asc = not desc 2298 nulls_first = is_nulls_first or False 2299 explicitly_null_ordered = is_nulls_first or is_nulls_last 2300 if ( 2301 not explicitly_null_ordered 2302 and ( 2303 (asc and self.null_ordering == "nulls_are_small") 2304 or (desc and self.null_ordering != "nulls_are_small") 2305 ) 2306 and self.null_ordering != "nulls_are_last" 2307 ): 2308 nulls_first = True 2309 2310 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2311 2312 def _parse_limit( 2313 self, this: t.Optional[exp.Expression] = None, top: bool = False 2314 ) -> t.Optional[exp.Expression]: 2315 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2316 limit_paren = self._match(TokenType.L_PAREN) 2317 limit_exp = self.expression( 2318 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2319 ) 2320 2321 if limit_paren: 2322 self._match_r_paren() 2323 2324 return limit_exp 2325 2326 if self._match(TokenType.FETCH): 2327 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2328 direction = self._prev.text if direction else "FIRST" 2329 count = self._parse_number() 2330 self._match_set((TokenType.ROW, TokenType.ROWS)) 2331 self._match(TokenType.ONLY) 2332 return self.expression(exp.Fetch, direction=direction, count=count) 2333 2334 return this 2335 2336 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2337 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2338 return this 2339 2340 count = self._parse_number() 2341 self._match_set((TokenType.ROW, TokenType.ROWS)) 2342 return self.expression(exp.Offset, this=this, expression=count) 2343 2344 def _parse_lock(self) -> t.Optional[exp.Expression]: 2345 if self._match_text_seq("FOR", "UPDATE"): 2346 return self.expression(exp.Lock, update=True) 2347 if self._match_text_seq("FOR", "SHARE"): 2348 return self.expression(exp.Lock, update=False) 2349 2350 return None 2351 2352 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2353 if not self._match_set(self.SET_OPERATIONS): 2354 return this 2355 2356 token_type = self._prev.token_type 2357 2358 if token_type == TokenType.UNION: 2359 expression = exp.Union 2360 elif token_type == TokenType.EXCEPT: 2361 expression = exp.Except 2362 else: 2363 expression = exp.Intersect 2364 2365 return self.expression( 2366 expression, 2367 this=this, 2368 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2369 expression=self._parse_set_operations(self._parse_select(nested=True)), 2370 ) 2371 2372 def _parse_expression(self) -> t.Optional[exp.Expression]: 2373 return self._parse_alias(self._parse_conjunction()) 2374 2375 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2376 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2377 2378 def _parse_equality(self) -> t.Optional[exp.Expression]: 2379 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2380 2381 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2382 return self._parse_tokens(self._parse_range, self.COMPARISON) 2383 2384 def _parse_range(self) -> t.Optional[exp.Expression]: 2385 this = self._parse_bitwise() 2386 negate = self._match(TokenType.NOT) 2387 2388 if self._match_set(self.RANGE_PARSERS): 2389 this = self.RANGE_PARSERS[self._prev.token_type](self, this) 2390 elif self._match(TokenType.ISNULL): 2391 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2392 2393 # Postgres supports ISNULL and NOTNULL for conditions. 2394 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2395 if self._match(TokenType.NOTNULL): 2396 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2397 this = self.expression(exp.Not, this=this) 2398 2399 if negate: 2400 this = self.expression(exp.Not, this=this) 2401 2402 if self._match(TokenType.IS): 2403 this = self._parse_is(this) 2404 2405 return this 2406 2407 def _parse_is(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2408 negate = self._match(TokenType.NOT) 2409 if self._match(TokenType.DISTINCT_FROM): 2410 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2411 return self.expression(klass, this=this, expression=self._parse_expression()) 2412 2413 this = self.expression( 2414 exp.Is, 2415 this=this, 2416 expression=self._parse_null() or self._parse_boolean(), 2417 ) 2418 return self.expression(exp.Not, this=this) if negate else this 2419 2420 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2421 unnest = self._parse_unnest() 2422 if unnest: 2423 this = self.expression(exp.In, this=this, unnest=unnest) 2424 elif self._match(TokenType.L_PAREN): 2425 expressions = self._parse_csv(self._parse_select_or_expression) 2426 2427 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2428 this = self.expression(exp.In, this=this, query=expressions[0]) 2429 else: 2430 this = self.expression(exp.In, this=this, expressions=expressions) 2431 2432 self._match_r_paren() 2433 else: 2434 this = self.expression(exp.In, this=this, field=self._parse_field()) 2435 2436 return this 2437 2438 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2439 low = self._parse_bitwise() 2440 self._match(TokenType.AND) 2441 high = self._parse_bitwise() 2442 return self.expression(exp.Between, this=this, low=low, high=high) 2443 2444 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2445 if not self._match(TokenType.ESCAPE): 2446 return this 2447 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2448 2449 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2450 this = self._parse_term() 2451 2452 while True: 2453 if self._match_set(self.BITWISE): 2454 this = self.expression( 2455 self.BITWISE[self._prev.token_type], 2456 this=this, 2457 expression=self._parse_term(), 2458 ) 2459 elif self._match_pair(TokenType.LT, TokenType.LT): 2460 this = self.expression( 2461 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2462 ) 2463 elif self._match_pair(TokenType.GT, TokenType.GT): 2464 this = self.expression( 2465 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2466 ) 2467 else: 2468 break 2469 2470 return this 2471 2472 def _parse_term(self) -> t.Optional[exp.Expression]: 2473 return self._parse_tokens(self._parse_factor, self.TERM) 2474 2475 def _parse_factor(self) -> t.Optional[exp.Expression]: 2476 return self._parse_tokens(self._parse_unary, self.FACTOR) 2477 2478 def _parse_unary(self) -> t.Optional[exp.Expression]: 2479 if self._match_set(self.UNARY_PARSERS): 2480 return self.UNARY_PARSERS[self._prev.token_type](self) 2481 return self._parse_at_time_zone(self._parse_type()) 2482 2483 def _parse_type(self) -> t.Optional[exp.Expression]: 2484 if self._match(TokenType.INTERVAL): 2485 return self.expression(exp.Interval, this=self._parse_term(), unit=self._parse_var()) 2486 2487 index = self._index 2488 type_token = self._parse_types(check_func=True) 2489 this = self._parse_column() 2490 2491 if type_token: 2492 if this and not isinstance(this, exp.Star): 2493 return self.expression(exp.Cast, this=this, to=type_token) 2494 if not type_token.args.get("expressions"): 2495 self._retreat(index) 2496 return self._parse_column() 2497 return type_token 2498 2499 return this 2500 2501 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2502 index = self._index 2503 2504 prefix = self._match_text_seq("SYSUDTLIB", ".") 2505 2506 if not self._match_set(self.TYPE_TOKENS): 2507 return None 2508 2509 type_token = self._prev.token_type 2510 2511 if type_token == TokenType.PSEUDO_TYPE: 2512 return self.expression(exp.PseudoType, this=self._prev.text) 2513 2514 nested = type_token in self.NESTED_TYPE_TOKENS 2515 is_struct = type_token == TokenType.STRUCT 2516 expressions = None 2517 maybe_func = False 2518 2519 if self._match(TokenType.L_PAREN): 2520 if is_struct: 2521 expressions = self._parse_csv(self._parse_struct_kwargs) 2522 elif nested: 2523 expressions = self._parse_csv(self._parse_types) 2524 else: 2525 expressions = self._parse_csv(self._parse_conjunction) 2526 2527 if not expressions: 2528 self._retreat(index) 2529 return None 2530 2531 self._match_r_paren() 2532 maybe_func = True 2533 2534 if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2535 this = exp.DataType( 2536 this=exp.DataType.Type.ARRAY, 2537 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2538 nested=True, 2539 ) 2540 2541 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2542 this = exp.DataType( 2543 this=exp.DataType.Type.ARRAY, 2544 expressions=[this], 2545 nested=True, 2546 ) 2547 2548 return this 2549 2550 if self._match(TokenType.L_BRACKET): 2551 self._retreat(index) 2552 return None 2553 2554 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2555 if nested and self._match(TokenType.LT): 2556 if is_struct: 2557 expressions = self._parse_csv(self._parse_struct_kwargs) 2558 else: 2559 expressions = self._parse_csv(self._parse_types) 2560 2561 if not self._match(TokenType.GT): 2562 self.raise_error("Expecting >") 2563 2564 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2565 values = self._parse_csv(self._parse_conjunction) 2566 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2567 2568 value: t.Optional[exp.Expression] = None 2569 if type_token in self.TIMESTAMPS: 2570 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: 2571 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2572 elif ( 2573 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ 2574 ): 2575 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2576 elif self._match(TokenType.WITHOUT_TIME_ZONE): 2577 if type_token == TokenType.TIME: 2578 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2579 else: 2580 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2581 2582 maybe_func = maybe_func and value is None 2583 2584 if value is None: 2585 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2586 elif type_token == TokenType.INTERVAL: 2587 value = self.expression(exp.Interval, unit=self._parse_var()) 2588 2589 if maybe_func and check_func: 2590 index2 = self._index 2591 peek = self._parse_string() 2592 2593 if not peek: 2594 self._retreat(index) 2595 return None 2596 2597 self._retreat(index2) 2598 2599 if value: 2600 return value 2601 2602 return exp.DataType( 2603 this=exp.DataType.Type[type_token.value.upper()], 2604 expressions=expressions, 2605 nested=nested, 2606 values=values, 2607 prefix=prefix, 2608 ) 2609 2610 def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]: 2611 if self._curr and self._curr.token_type in self.TYPE_TOKENS: 2612 return self._parse_types() 2613 2614 this = self._parse_id_var() 2615 self._match(TokenType.COLON) 2616 data_type = self._parse_types() 2617 2618 if not data_type: 2619 return None 2620 return self.expression(exp.StructKwarg, this=this, expression=data_type) 2621 2622 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2623 if not self._match(TokenType.AT_TIME_ZONE): 2624 return this 2625 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 2626 2627 def _parse_column(self) -> t.Optional[exp.Expression]: 2628 this = self._parse_field() 2629 if isinstance(this, exp.Identifier): 2630 this = self.expression(exp.Column, this=this) 2631 elif not this: 2632 return self._parse_bracket(this) 2633 this = self._parse_bracket(this) 2634 2635 while self._match_set(self.COLUMN_OPERATORS): 2636 op_token = self._prev.token_type 2637 op = self.COLUMN_OPERATORS.get(op_token) 2638 2639 if op_token == TokenType.DCOLON: 2640 field = self._parse_types() 2641 if not field: 2642 self.raise_error("Expected type") 2643 elif op: 2644 self._advance() 2645 value = self._prev.text 2646 field = ( 2647 exp.Literal.number(value) 2648 if self._prev.token_type == TokenType.NUMBER 2649 else exp.Literal.string(value) 2650 ) 2651 else: 2652 field = self._parse_star() or self._parse_function() or self._parse_id_var() 2653 2654 if isinstance(field, exp.Func): 2655 # bigquery allows function calls like x.y.count(...) 2656 # SAFE.SUBSTR(...) 2657 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 2658 this = self._replace_columns_with_dots(this) 2659 2660 if op: 2661 this = op(self, this, field) 2662 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 2663 this = self.expression( 2664 exp.Column, 2665 this=field, 2666 table=this.this, 2667 db=this.args.get("table"), 2668 catalog=this.args.get("db"), 2669 ) 2670 else: 2671 this = self.expression(exp.Dot, this=this, expression=field) 2672 this = self._parse_bracket(this) 2673 2674 return this 2675 2676 def _parse_primary(self) -> t.Optional[exp.Expression]: 2677 if self._match_set(self.PRIMARY_PARSERS): 2678 token_type = self._prev.token_type 2679 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 2680 2681 if token_type == TokenType.STRING: 2682 expressions = [primary] 2683 while self._match(TokenType.STRING): 2684 expressions.append(exp.Literal.string(self._prev.text)) 2685 if len(expressions) > 1: 2686 return self.expression(exp.Concat, expressions=expressions) 2687 return primary 2688 2689 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 2690 return exp.Literal.number(f"0.{self._prev.text}") 2691 2692 if self._match(TokenType.L_PAREN): 2693 comments = self._prev_comments 2694 query = self._parse_select() 2695 2696 if query: 2697 expressions = [query] 2698 else: 2699 expressions = self._parse_csv( 2700 lambda: self._parse_alias(self._parse_conjunction(), explicit=True) 2701 ) 2702 2703 this = seq_get(expressions, 0) 2704 self._parse_query_modifiers(this) 2705 self._match_r_paren() 2706 2707 if isinstance(this, exp.Subqueryable): 2708 this = self._parse_set_operations( 2709 self._parse_subquery(this=this, parse_alias=False) 2710 ) 2711 elif len(expressions) > 1: 2712 this = self.expression(exp.Tuple, expressions=expressions) 2713 else: 2714 this = self.expression(exp.Paren, this=this) 2715 2716 if this and comments: 2717 this.comments = comments 2718 2719 return this 2720 2721 return None 2722 2723 def _parse_field(self, any_token: bool = False) -> t.Optional[exp.Expression]: 2724 return self._parse_primary() or self._parse_function() or self._parse_id_var(any_token) 2725 2726 def _parse_function( 2727 self, functions: t.Optional[t.Dict[str, t.Callable]] = None 2728 ) -> t.Optional[exp.Expression]: 2729 if not self._curr: 2730 return None 2731 2732 token_type = self._curr.token_type 2733 2734 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 2735 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 2736 2737 if not self._next or self._next.token_type != TokenType.L_PAREN: 2738 if token_type in self.NO_PAREN_FUNCTIONS: 2739 self._advance() 2740 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 2741 2742 return None 2743 2744 if token_type not in self.FUNC_TOKENS: 2745 return None 2746 2747 this = self._curr.text 2748 upper = this.upper() 2749 self._advance(2) 2750 2751 parser = self.FUNCTION_PARSERS.get(upper) 2752 2753 if parser: 2754 this = parser(self) 2755 else: 2756 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 2757 2758 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 2759 this = self.expression(subquery_predicate, this=self._parse_select()) 2760 self._match_r_paren() 2761 return this 2762 2763 if functions is None: 2764 functions = self.FUNCTIONS 2765 2766 function = functions.get(upper) 2767 args = self._parse_csv(self._parse_lambda) 2768 2769 if function: 2770 # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the 2771 # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists. 2772 if count_params(function) == 2: 2773 params = None 2774 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 2775 params = self._parse_csv(self._parse_lambda) 2776 2777 this = function(args, params) 2778 else: 2779 this = function(args) 2780 2781 self.validate_expression(this, args) 2782 else: 2783 this = self.expression(exp.Anonymous, this=this, expressions=args) 2784 2785 self._match_r_paren(this) 2786 return self._parse_window(this) 2787 2788 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 2789 return self._parse_column_def(self._parse_id_var()) 2790 2791 def _parse_user_defined_function( 2792 self, kind: t.Optional[TokenType] = None 2793 ) -> t.Optional[exp.Expression]: 2794 this = self._parse_id_var() 2795 2796 while self._match(TokenType.DOT): 2797 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 2798 2799 if not self._match(TokenType.L_PAREN): 2800 return this 2801 2802 expressions = self._parse_csv(self._parse_function_parameter) 2803 self._match_r_paren() 2804 return self.expression( 2805 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 2806 ) 2807 2808 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 2809 literal = self._parse_primary() 2810 if literal: 2811 return self.expression(exp.Introducer, this=token.text, expression=literal) 2812 2813 return self.expression(exp.Identifier, this=token.text) 2814 2815 def _parse_national(self, token: Token) -> exp.Expression: 2816 return self.expression(exp.National, this=exp.Literal.string(token.text)) 2817 2818 def _parse_session_parameter(self) -> exp.Expression: 2819 kind = None 2820 this = self._parse_id_var() or self._parse_primary() 2821 2822 if this and self._match(TokenType.DOT): 2823 kind = this.name 2824 this = self._parse_var() or self._parse_primary() 2825 2826 return self.expression(exp.SessionParameter, this=this, kind=kind) 2827 2828 def _parse_lambda(self) -> t.Optional[exp.Expression]: 2829 index = self._index 2830 2831 if self._match(TokenType.L_PAREN): 2832 expressions = self._parse_csv(self._parse_id_var) 2833 2834 if not self._match(TokenType.R_PAREN): 2835 self._retreat(index) 2836 else: 2837 expressions = [self._parse_id_var()] 2838 2839 if self._match_set(self.LAMBDAS): 2840 return self.LAMBDAS[self._prev.token_type](self, expressions) 2841 2842 self._retreat(index) 2843 2844 this: t.Optional[exp.Expression] 2845 2846 if self._match(TokenType.DISTINCT): 2847 this = self.expression( 2848 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 2849 ) 2850 else: 2851 this = self._parse_select_or_expression() 2852 2853 if self._match(TokenType.IGNORE_NULLS): 2854 this = self.expression(exp.IgnoreNulls, this=this) 2855 else: 2856 self._match(TokenType.RESPECT_NULLS) 2857 2858 return self._parse_limit(self._parse_order(this)) 2859 2860 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2861 index = self._index 2862 if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT): 2863 self._retreat(index) 2864 return this 2865 2866 args = self._parse_csv( 2867 lambda: self._parse_constraint() 2868 or self._parse_column_def(self._parse_field(any_token=True)) 2869 ) 2870 self._match_r_paren() 2871 return self.expression(exp.Schema, this=this, expressions=args) 2872 2873 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2874 kind = self._parse_types() 2875 2876 if self._match_text_seq("FOR", "ORDINALITY"): 2877 return self.expression(exp.ColumnDef, this=this, ordinality=True) 2878 2879 constraints = [] 2880 while True: 2881 constraint = self._parse_column_constraint() 2882 if not constraint: 2883 break 2884 constraints.append(constraint) 2885 2886 if not kind and not constraints: 2887 return this 2888 2889 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 2890 2891 def _parse_auto_increment(self) -> exp.Expression: 2892 start = None 2893 increment = None 2894 2895 if self._match(TokenType.L_PAREN, advance=False): 2896 args = self._parse_wrapped_csv(self._parse_bitwise) 2897 start = seq_get(args, 0) 2898 increment = seq_get(args, 1) 2899 elif self._match_text_seq("START"): 2900 start = self._parse_bitwise() 2901 self._match_text_seq("INCREMENT") 2902 increment = self._parse_bitwise() 2903 2904 if start and increment: 2905 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 2906 2907 return exp.AutoIncrementColumnConstraint() 2908 2909 def _parse_compress(self) -> exp.Expression: 2910 if self._match(TokenType.L_PAREN, advance=False): 2911 return self.expression( 2912 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 2913 ) 2914 2915 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 2916 2917 def _parse_generated_as_identity(self) -> exp.Expression: 2918 if self._match(TokenType.BY_DEFAULT): 2919 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False) 2920 else: 2921 self._match_text_seq("ALWAYS") 2922 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 2923 2924 self._match_text_seq("AS", "IDENTITY") 2925 if self._match(TokenType.L_PAREN): 2926 if self._match_text_seq("START", "WITH"): 2927 this.set("start", self._parse_bitwise()) 2928 if self._match_text_seq("INCREMENT", "BY"): 2929 this.set("increment", self._parse_bitwise()) 2930 if self._match_text_seq("MINVALUE"): 2931 this.set("minvalue", self._parse_bitwise()) 2932 if self._match_text_seq("MAXVALUE"): 2933 this.set("maxvalue", self._parse_bitwise()) 2934 2935 if self._match_text_seq("CYCLE"): 2936 this.set("cycle", True) 2937 elif self._match_text_seq("NO", "CYCLE"): 2938 this.set("cycle", False) 2939 2940 self._match_r_paren() 2941 2942 return this 2943 2944 def _parse_inline(self) -> t.Optional[exp.Expression]: 2945 self._match_text_seq("LENGTH") 2946 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 2947 2948 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 2949 if self._match_text_seq("NULL"): 2950 return self.expression(exp.NotNullColumnConstraint) 2951 if self._match_text_seq("CASESPECIFIC"): 2952 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 2953 return None 2954 2955 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 2956 this = self._parse_references() 2957 if this: 2958 return this 2959 2960 if self._match(TokenType.CONSTRAINT): 2961 this = self._parse_id_var() 2962 2963 if self._match_texts(self.CONSTRAINT_PARSERS): 2964 return self.expression( 2965 exp.ColumnConstraint, 2966 this=this, 2967 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 2968 ) 2969 2970 return this 2971 2972 def _parse_constraint(self) -> t.Optional[exp.Expression]: 2973 if not self._match(TokenType.CONSTRAINT): 2974 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 2975 2976 this = self._parse_id_var() 2977 expressions = [] 2978 2979 while True: 2980 constraint = self._parse_unnamed_constraint() or self._parse_function() 2981 if not constraint: 2982 break 2983 expressions.append(constraint) 2984 2985 return self.expression(exp.Constraint, this=this, expressions=expressions) 2986 2987 def _parse_unnamed_constraint( 2988 self, constraints: t.Optional[t.Collection[str]] = None 2989 ) -> t.Optional[exp.Expression]: 2990 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 2991 return None 2992 2993 constraint = self._prev.text.upper() 2994 if constraint not in self.CONSTRAINT_PARSERS: 2995 self.raise_error(f"No parser found for schema constraint {constraint}.") 2996 2997 return self.CONSTRAINT_PARSERS[constraint](self) 2998 2999 def _parse_unique(self) -> exp.Expression: 3000 if not self._match(TokenType.L_PAREN, advance=False): 3001 return self.expression(exp.UniqueColumnConstraint) 3002 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 3003 3004 def _parse_key_constraint_options(self) -> t.List[str]: 3005 options = [] 3006 while True: 3007 if not self._curr: 3008 break 3009 3010 if self._match(TokenType.ON): 3011 action = None 3012 on = self._advance_any() and self._prev.text 3013 3014 if self._match(TokenType.NO_ACTION): 3015 action = "NO ACTION" 3016 elif self._match(TokenType.CASCADE): 3017 action = "CASCADE" 3018 elif self._match_pair(TokenType.SET, TokenType.NULL): 3019 action = "SET NULL" 3020 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3021 action = "SET DEFAULT" 3022 else: 3023 self.raise_error("Invalid key constraint") 3024 3025 options.append(f"ON {on} {action}") 3026 elif self._match_text_seq("NOT", "ENFORCED"): 3027 options.append("NOT ENFORCED") 3028 elif self._match_text_seq("DEFERRABLE"): 3029 options.append("DEFERRABLE") 3030 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3031 options.append("INITIALLY DEFERRED") 3032 elif self._match_text_seq("NORELY"): 3033 options.append("NORELY") 3034 elif self._match_text_seq("MATCH", "FULL"): 3035 options.append("MATCH FULL") 3036 else: 3037 break 3038 3039 return options 3040 3041 def _parse_references(self) -> t.Optional[exp.Expression]: 3042 if not self._match(TokenType.REFERENCES): 3043 return None 3044 3045 expressions = None 3046 this = self._parse_id_var() 3047 3048 if self._match(TokenType.L_PAREN, advance=False): 3049 expressions = self._parse_wrapped_id_vars() 3050 3051 options = self._parse_key_constraint_options() 3052 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3053 3054 def _parse_foreign_key(self) -> exp.Expression: 3055 expressions = self._parse_wrapped_id_vars() 3056 reference = self._parse_references() 3057 options = {} 3058 3059 while self._match(TokenType.ON): 3060 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3061 self.raise_error("Expected DELETE or UPDATE") 3062 3063 kind = self._prev.text.lower() 3064 3065 if self._match(TokenType.NO_ACTION): 3066 action = "NO ACTION" 3067 elif self._match(TokenType.SET): 3068 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3069 action = "SET " + self._prev.text.upper() 3070 else: 3071 self._advance() 3072 action = self._prev.text.upper() 3073 3074 options[kind] = action 3075 3076 return self.expression( 3077 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3078 ) 3079 3080 def _parse_primary_key(self) -> exp.Expression: 3081 desc = ( 3082 self._match_set((TokenType.ASC, TokenType.DESC)) 3083 and self._prev.token_type == TokenType.DESC 3084 ) 3085 3086 if not self._match(TokenType.L_PAREN, advance=False): 3087 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3088 3089 expressions = self._parse_wrapped_id_vars() 3090 options = self._parse_key_constraint_options() 3091 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3092 3093 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3094 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3095 return this 3096 3097 bracket_kind = self._prev.token_type 3098 expressions: t.List[t.Optional[exp.Expression]] 3099 3100 if self._match(TokenType.COLON): 3101 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3102 else: 3103 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3104 3105 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3106 if bracket_kind == TokenType.L_BRACE: 3107 this = self.expression(exp.Struct, expressions=expressions) 3108 elif not this or this.name.upper() == "ARRAY": 3109 this = self.expression(exp.Array, expressions=expressions) 3110 else: 3111 expressions = apply_index_offset(expressions, -self.index_offset) 3112 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3113 3114 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3115 self.raise_error("Expected ]") 3116 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3117 self.raise_error("Expected }") 3118 3119 this.comments = self._prev_comments 3120 return self._parse_bracket(this) 3121 3122 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3123 if self._match(TokenType.COLON): 3124 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3125 return this 3126 3127 def _parse_case(self) -> t.Optional[exp.Expression]: 3128 ifs = [] 3129 default = None 3130 3131 expression = self._parse_conjunction() 3132 3133 while self._match(TokenType.WHEN): 3134 this = self._parse_conjunction() 3135 self._match(TokenType.THEN) 3136 then = self._parse_conjunction() 3137 ifs.append(self.expression(exp.If, this=this, true=then)) 3138 3139 if self._match(TokenType.ELSE): 3140 default = self._parse_conjunction() 3141 3142 if not self._match(TokenType.END): 3143 self.raise_error("Expected END after CASE", self._prev) 3144 3145 return self._parse_window( 3146 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3147 ) 3148 3149 def _parse_if(self) -> t.Optional[exp.Expression]: 3150 if self._match(TokenType.L_PAREN): 3151 args = self._parse_csv(self._parse_conjunction) 3152 this = exp.If.from_arg_list(args) 3153 self.validate_expression(this, args) 3154 self._match_r_paren() 3155 else: 3156 condition = self._parse_conjunction() 3157 self._match(TokenType.THEN) 3158 true = self._parse_conjunction() 3159 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3160 self._match(TokenType.END) 3161 this = self.expression(exp.If, this=condition, true=true, false=false) 3162 3163 return self._parse_window(this) 3164 3165 def _parse_extract(self) -> exp.Expression: 3166 this = self._parse_function() or self._parse_var() or self._parse_type() 3167 3168 if self._match(TokenType.FROM): 3169 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3170 3171 if not self._match(TokenType.COMMA): 3172 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3173 3174 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3175 3176 def _parse_cast(self, strict: bool) -> exp.Expression: 3177 this = self._parse_conjunction() 3178 3179 if not self._match(TokenType.ALIAS): 3180 self.raise_error("Expected AS after CAST") 3181 3182 to = self._parse_types() 3183 3184 if not to: 3185 self.raise_error("Expected TYPE after CAST") 3186 elif to.this == exp.DataType.Type.CHAR: 3187 if self._match(TokenType.CHARACTER_SET): 3188 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3189 3190 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3191 3192 def _parse_string_agg(self) -> exp.Expression: 3193 expression: t.Optional[exp.Expression] 3194 3195 if self._match(TokenType.DISTINCT): 3196 args = self._parse_csv(self._parse_conjunction) 3197 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3198 else: 3199 args = self._parse_csv(self._parse_conjunction) 3200 expression = seq_get(args, 0) 3201 3202 index = self._index 3203 if not self._match(TokenType.R_PAREN): 3204 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3205 order = self._parse_order(this=expression) 3206 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3207 3208 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3209 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3210 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3211 if not self._match(TokenType.WITHIN_GROUP): 3212 self._retreat(index) 3213 this = exp.GroupConcat.from_arg_list(args) 3214 self.validate_expression(this, args) 3215 return this 3216 3217 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3218 order = self._parse_order(this=expression) 3219 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3220 3221 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3222 to: t.Optional[exp.Expression] 3223 this = self._parse_column() 3224 3225 if self._match(TokenType.USING): 3226 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3227 elif self._match(TokenType.COMMA): 3228 to = self._parse_types() 3229 else: 3230 to = None 3231 3232 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3233 3234 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3235 args = self._parse_csv(self._parse_bitwise) 3236 3237 if self._match(TokenType.IN): 3238 return self.expression( 3239 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3240 ) 3241 3242 if haystack_first: 3243 haystack = seq_get(args, 0) 3244 needle = seq_get(args, 1) 3245 else: 3246 needle = seq_get(args, 0) 3247 haystack = seq_get(args, 1) 3248 3249 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3250 3251 self.validate_expression(this, args) 3252 3253 return this 3254 3255 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3256 args = self._parse_csv(self._parse_table) 3257 return exp.JoinHint(this=func_name.upper(), expressions=args) 3258 3259 def _parse_substring(self) -> exp.Expression: 3260 # Postgres supports the form: substring(string [from int] [for int]) 3261 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3262 3263 args = self._parse_csv(self._parse_bitwise) 3264 3265 if self._match(TokenType.FROM): 3266 args.append(self._parse_bitwise()) 3267 if self._match(TokenType.FOR): 3268 args.append(self._parse_bitwise()) 3269 3270 this = exp.Substring.from_arg_list(args) 3271 self.validate_expression(this, args) 3272 3273 return this 3274 3275 def _parse_trim(self) -> exp.Expression: 3276 # https://www.w3resource.com/sql/character-functions/trim.php 3277 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3278 3279 position = None 3280 collation = None 3281 3282 if self._match_set(self.TRIM_TYPES): 3283 position = self._prev.text.upper() 3284 3285 expression = self._parse_term() 3286 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3287 this = self._parse_term() 3288 else: 3289 this = expression 3290 expression = None 3291 3292 if self._match(TokenType.COLLATE): 3293 collation = self._parse_term() 3294 3295 return self.expression( 3296 exp.Trim, 3297 this=this, 3298 position=position, 3299 expression=expression, 3300 collation=collation, 3301 ) 3302 3303 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3304 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3305 3306 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3307 return self._parse_window(self._parse_id_var(), alias=True) 3308 3309 def _parse_window( 3310 self, this: t.Optional[exp.Expression], alias: bool = False 3311 ) -> t.Optional[exp.Expression]: 3312 if self._match(TokenType.FILTER): 3313 where = self._parse_wrapped(self._parse_where) 3314 this = self.expression(exp.Filter, this=this, expression=where) 3315 3316 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3317 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3318 if self._match(TokenType.WITHIN_GROUP): 3319 order = self._parse_wrapped(self._parse_order) 3320 this = self.expression(exp.WithinGroup, this=this, expression=order) 3321 3322 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3323 # Some dialects choose to implement and some do not. 3324 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3325 3326 # There is some code above in _parse_lambda that handles 3327 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3328 3329 # The below changes handle 3330 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3331 3332 # Oracle allows both formats 3333 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3334 # and Snowflake chose to do the same for familiarity 3335 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3336 if self._match(TokenType.IGNORE_NULLS): 3337 this = self.expression(exp.IgnoreNulls, this=this) 3338 elif self._match(TokenType.RESPECT_NULLS): 3339 this = self.expression(exp.RespectNulls, this=this) 3340 3341 # bigquery select from window x AS (partition by ...) 3342 if alias: 3343 self._match(TokenType.ALIAS) 3344 elif not self._match(TokenType.OVER): 3345 return this 3346 3347 if not self._match(TokenType.L_PAREN): 3348 return self.expression(exp.Window, this=this, alias=self._parse_id_var(False)) 3349 3350 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3351 partition = self._parse_partition_by() 3352 order = self._parse_order() 3353 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3354 3355 if kind: 3356 self._match(TokenType.BETWEEN) 3357 start = self._parse_window_spec() 3358 self._match(TokenType.AND) 3359 end = self._parse_window_spec() 3360 3361 spec = self.expression( 3362 exp.WindowSpec, 3363 kind=kind, 3364 start=start["value"], 3365 start_side=start["side"], 3366 end=end["value"], 3367 end_side=end["side"], 3368 ) 3369 else: 3370 spec = None 3371 3372 self._match_r_paren() 3373 3374 return self.expression( 3375 exp.Window, 3376 this=this, 3377 partition_by=partition, 3378 order=order, 3379 spec=spec, 3380 alias=window_alias, 3381 ) 3382 3383 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3384 self._match(TokenType.BETWEEN) 3385 3386 return { 3387 "value": ( 3388 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text 3389 ) 3390 or self._parse_bitwise(), 3391 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, 3392 } 3393 3394 def _parse_alias( 3395 self, this: t.Optional[exp.Expression], explicit: bool = False 3396 ) -> t.Optional[exp.Expression]: 3397 any_token = self._match(TokenType.ALIAS) 3398 3399 if explicit and not any_token: 3400 return this 3401 3402 if self._match(TokenType.L_PAREN): 3403 aliases = self.expression( 3404 exp.Aliases, 3405 this=this, 3406 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3407 ) 3408 self._match_r_paren(aliases) 3409 return aliases 3410 3411 alias = self._parse_id_var(any_token) 3412 3413 if alias: 3414 return self.expression(exp.Alias, this=this, alias=alias) 3415 3416 return this 3417 3418 def _parse_id_var( 3419 self, 3420 any_token: bool = True, 3421 tokens: t.Optional[t.Collection[TokenType]] = None, 3422 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3423 ) -> t.Optional[exp.Expression]: 3424 identifier = self._parse_identifier() 3425 3426 if identifier: 3427 return identifier 3428 3429 prefix = "" 3430 3431 if prefix_tokens: 3432 while self._match_set(prefix_tokens): 3433 prefix += self._prev.text 3434 3435 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3436 quoted = self._prev.token_type == TokenType.STRING 3437 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3438 3439 return None 3440 3441 def _parse_string(self) -> t.Optional[exp.Expression]: 3442 if self._match(TokenType.STRING): 3443 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 3444 return self._parse_placeholder() 3445 3446 def _parse_number(self) -> t.Optional[exp.Expression]: 3447 if self._match(TokenType.NUMBER): 3448 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 3449 return self._parse_placeholder() 3450 3451 def _parse_identifier(self) -> t.Optional[exp.Expression]: 3452 if self._match(TokenType.IDENTIFIER): 3453 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 3454 return self._parse_placeholder() 3455 3456 def _parse_var(self, any_token: bool = False) -> t.Optional[exp.Expression]: 3457 if (any_token and self._advance_any()) or self._match(TokenType.VAR): 3458 return self.expression(exp.Var, this=self._prev.text) 3459 return self._parse_placeholder() 3460 3461 def _advance_any(self) -> t.Optional[Token]: 3462 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 3463 self._advance() 3464 return self._prev 3465 return None 3466 3467 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 3468 return self._parse_var() or self._parse_string() 3469 3470 def _parse_null(self) -> t.Optional[exp.Expression]: 3471 if self._match(TokenType.NULL): 3472 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 3473 return None 3474 3475 def _parse_boolean(self) -> t.Optional[exp.Expression]: 3476 if self._match(TokenType.TRUE): 3477 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 3478 if self._match(TokenType.FALSE): 3479 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 3480 return None 3481 3482 def _parse_star(self) -> t.Optional[exp.Expression]: 3483 if self._match(TokenType.STAR): 3484 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 3485 return None 3486 3487 def _parse_parameter(self) -> exp.Expression: 3488 wrapped = self._match(TokenType.L_BRACE) 3489 this = self._parse_var() or self._parse_primary() 3490 self._match(TokenType.R_BRACE) 3491 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 3492 3493 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 3494 if self._match_set(self.PLACEHOLDER_PARSERS): 3495 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 3496 if placeholder: 3497 return placeholder 3498 self._advance(-1) 3499 return None 3500 3501 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3502 if not self._match(TokenType.EXCEPT): 3503 return None 3504 if self._match(TokenType.L_PAREN, advance=False): 3505 return self._parse_wrapped_csv(self._parse_column) 3506 return self._parse_csv(self._parse_column) 3507 3508 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3509 if not self._match(TokenType.REPLACE): 3510 return None 3511 if self._match(TokenType.L_PAREN, advance=False): 3512 return self._parse_wrapped_csv(self._parse_expression) 3513 return self._parse_csv(self._parse_expression) 3514 3515 def _parse_csv( 3516 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3517 ) -> t.List[t.Optional[exp.Expression]]: 3518 parse_result = parse_method() 3519 items = [parse_result] if parse_result is not None else [] 3520 3521 while self._match(sep): 3522 if parse_result and self._prev_comments: 3523 parse_result.comments = self._prev_comments 3524 3525 parse_result = parse_method() 3526 if parse_result is not None: 3527 items.append(parse_result) 3528 3529 return items 3530 3531 def _parse_tokens( 3532 self, parse_method: t.Callable, expressions: t.Dict 3533 ) -> t.Optional[exp.Expression]: 3534 this = parse_method() 3535 3536 while self._match_set(expressions): 3537 this = self.expression( 3538 expressions[self._prev.token_type], 3539 this=this, 3540 comments=self._prev_comments, 3541 expression=parse_method(), 3542 ) 3543 3544 return this 3545 3546 def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]: 3547 return self._parse_wrapped_csv(self._parse_id_var) 3548 3549 def _parse_wrapped_csv( 3550 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3551 ) -> t.List[t.Optional[exp.Expression]]: 3552 return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep)) 3553 3554 def _parse_wrapped(self, parse_method: t.Callable) -> t.Any: 3555 self._match_l_paren() 3556 parse_result = parse_method() 3557 self._match_r_paren() 3558 return parse_result 3559 3560 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: 3561 return self._parse_select() or self._parse_expression() 3562 3563 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 3564 return self._parse_set_operations( 3565 self._parse_select(nested=True, parse_subquery_alias=False) 3566 ) 3567 3568 def _parse_transaction(self) -> exp.Expression: 3569 this = None 3570 if self._match_texts(self.TRANSACTION_KIND): 3571 this = self._prev.text 3572 3573 self._match_texts({"TRANSACTION", "WORK"}) 3574 3575 modes = [] 3576 while True: 3577 mode = [] 3578 while self._match(TokenType.VAR): 3579 mode.append(self._prev.text) 3580 3581 if mode: 3582 modes.append(" ".join(mode)) 3583 if not self._match(TokenType.COMMA): 3584 break 3585 3586 return self.expression(exp.Transaction, this=this, modes=modes) 3587 3588 def _parse_commit_or_rollback(self) -> exp.Expression: 3589 chain = None 3590 savepoint = None 3591 is_rollback = self._prev.token_type == TokenType.ROLLBACK 3592 3593 self._match_texts({"TRANSACTION", "WORK"}) 3594 3595 if self._match_text_seq("TO"): 3596 self._match_text_seq("SAVEPOINT") 3597 savepoint = self._parse_id_var() 3598 3599 if self._match(TokenType.AND): 3600 chain = not self._match_text_seq("NO") 3601 self._match_text_seq("CHAIN") 3602 3603 if is_rollback: 3604 return self.expression(exp.Rollback, savepoint=savepoint) 3605 return self.expression(exp.Commit, chain=chain) 3606 3607 def _parse_add_column(self) -> t.Optional[exp.Expression]: 3608 if not self._match_text_seq("ADD"): 3609 return None 3610 3611 self._match(TokenType.COLUMN) 3612 exists_column = self._parse_exists(not_=True) 3613 expression = self._parse_column_def(self._parse_field(any_token=True)) 3614 3615 if expression: 3616 expression.set("exists", exists_column) 3617 3618 return expression 3619 3620 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 3621 return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN") 3622 3623 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 3624 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 3625 return self.expression( 3626 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 3627 ) 3628 3629 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 3630 this = None 3631 kind = self._prev.token_type 3632 3633 if kind == TokenType.CONSTRAINT: 3634 this = self._parse_id_var() 3635 3636 if self._match_text_seq("CHECK"): 3637 expression = self._parse_wrapped(self._parse_conjunction) 3638 enforced = self._match_text_seq("ENFORCED") 3639 3640 return self.expression( 3641 exp.AddConstraint, this=this, expression=expression, enforced=enforced 3642 ) 3643 3644 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 3645 expression = self._parse_foreign_key() 3646 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 3647 expression = self._parse_primary_key() 3648 3649 return self.expression(exp.AddConstraint, this=this, expression=expression) 3650 3651 def _parse_alter(self) -> t.Optional[exp.Expression]: 3652 if not self._match(TokenType.TABLE): 3653 return self._parse_as_command(self._prev) 3654 3655 exists = self._parse_exists() 3656 this = self._parse_table(schema=True) 3657 3658 actions: t.Optional[exp.Expression | t.List[t.Optional[exp.Expression]]] = None 3659 3660 index = self._index 3661 if self._match(TokenType.DELETE): 3662 actions = [self.expression(exp.Delete, where=self._parse_where())] 3663 elif self._match_text_seq("ADD"): 3664 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 3665 actions = self._parse_csv(self._parse_add_constraint) 3666 else: 3667 self._retreat(index) 3668 actions = self._parse_csv(self._parse_add_column) 3669 elif self._match_text_seq("DROP"): 3670 partition_exists = self._parse_exists() 3671 3672 if self._match(TokenType.PARTITION, advance=False): 3673 actions = self._parse_csv( 3674 lambda: self._parse_drop_partition(exists=partition_exists) 3675 ) 3676 else: 3677 self._retreat(index) 3678 actions = self._parse_csv(self._parse_drop_column) 3679 elif self._match_text_seq("RENAME", "TO"): 3680 actions = self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 3681 elif self._match_text_seq("ALTER"): 3682 self._match(TokenType.COLUMN) 3683 column = self._parse_field(any_token=True) 3684 3685 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 3686 actions = self.expression(exp.AlterColumn, this=column, drop=True) 3687 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3688 actions = self.expression( 3689 exp.AlterColumn, this=column, default=self._parse_conjunction() 3690 ) 3691 else: 3692 self._match_text_seq("SET", "DATA") 3693 actions = self.expression( 3694 exp.AlterColumn, 3695 this=column, 3696 dtype=self._match_text_seq("TYPE") and self._parse_types(), 3697 collate=self._match(TokenType.COLLATE) and self._parse_term(), 3698 using=self._match(TokenType.USING) and self._parse_conjunction(), 3699 ) 3700 3701 actions = ensure_list(actions) 3702 return self.expression(exp.AlterTable, this=this, exists=exists, actions=actions) 3703 3704 def _parse_show(self) -> t.Optional[exp.Expression]: 3705 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 3706 if parser: 3707 return parser(self) 3708 self._advance() 3709 return self.expression(exp.Show, this=self._prev.text.upper()) 3710 3711 def _default_parse_set_item(self) -> exp.Expression: 3712 return self.expression( 3713 exp.SetItem, 3714 this=self._parse_statement(), 3715 ) 3716 3717 def _parse_set_item(self) -> t.Optional[exp.Expression]: 3718 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 3719 return parser(self) if parser else self._default_parse_set_item() 3720 3721 def _parse_merge(self) -> exp.Expression: 3722 self._match(TokenType.INTO) 3723 target = self._parse_table() 3724 3725 self._match(TokenType.USING) 3726 using = self._parse_table() 3727 3728 self._match(TokenType.ON) 3729 on = self._parse_conjunction() 3730 3731 whens = [] 3732 while self._match(TokenType.WHEN): 3733 this = self._parse_conjunction() 3734 self._match(TokenType.THEN) 3735 3736 if self._match(TokenType.INSERT): 3737 _this = self._parse_star() 3738 if _this: 3739 then = self.expression(exp.Insert, this=_this) 3740 else: 3741 then = self.expression( 3742 exp.Insert, 3743 this=self._parse_value(), 3744 expression=self._match(TokenType.VALUES) and self._parse_value(), 3745 ) 3746 elif self._match(TokenType.UPDATE): 3747 expressions = self._parse_star() 3748 if expressions: 3749 then = self.expression(exp.Update, expressions=expressions) 3750 else: 3751 then = self.expression( 3752 exp.Update, 3753 expressions=self._match(TokenType.SET) 3754 and self._parse_csv(self._parse_equality), 3755 ) 3756 elif self._match(TokenType.DELETE): 3757 then = self.expression(exp.Var, this=self._prev.text) 3758 3759 whens.append(self.expression(exp.When, this=this, then=then)) 3760 3761 return self.expression( 3762 exp.Merge, 3763 this=target, 3764 using=using, 3765 on=on, 3766 expressions=whens, 3767 ) 3768 3769 def _parse_set(self) -> exp.Expression: 3770 return self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 3771 3772 def _parse_as_command(self, start: Token) -> exp.Command: 3773 while self._curr: 3774 self._advance() 3775 return exp.Command(this=self._find_sql(start, self._prev)) 3776 3777 def _find_parser( 3778 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 3779 ) -> t.Optional[t.Callable]: 3780 index = self._index 3781 this = [] 3782 while True: 3783 # The current token might be multiple words 3784 curr = self._curr.text.upper() 3785 key = curr.split(" ") 3786 this.append(curr) 3787 self._advance() 3788 result, trie = in_trie(trie, key) 3789 if result == 0: 3790 break 3791 if result == 2: 3792 subparser = parsers[" ".join(this)] 3793 return subparser 3794 self._retreat(index) 3795 return None 3796 3797 def _match(self, token_type, advance=True): 3798 if not self._curr: 3799 return None 3800 3801 if self._curr.token_type == token_type: 3802 if advance: 3803 self._advance() 3804 return True 3805 3806 return None 3807 3808 def _match_set(self, types, advance=True): 3809 if not self._curr: 3810 return None 3811 3812 if self._curr.token_type in types: 3813 if advance: 3814 self._advance() 3815 return True 3816 3817 return None 3818 3819 def _match_pair(self, token_type_a, token_type_b, advance=True): 3820 if not self._curr or not self._next: 3821 return None 3822 3823 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 3824 if advance: 3825 self._advance(2) 3826 return True 3827 3828 return None 3829 3830 def _match_l_paren(self, expression=None): 3831 if not self._match(TokenType.L_PAREN): 3832 self.raise_error("Expecting (") 3833 if expression and self._prev_comments: 3834 expression.comments = self._prev_comments 3835 3836 def _match_r_paren(self, expression=None): 3837 if not self._match(TokenType.R_PAREN): 3838 self.raise_error("Expecting )") 3839 if expression and self._prev_comments: 3840 expression.comments = self._prev_comments 3841 3842 def _match_texts(self, texts, advance=True): 3843 if self._curr and self._curr.text.upper() in texts: 3844 if advance: 3845 self._advance() 3846 return True 3847 return False 3848 3849 def _match_text_seq(self, *texts, advance=True): 3850 index = self._index 3851 for text in texts: 3852 if self._curr and self._curr.text.upper() == text: 3853 self._advance() 3854 else: 3855 self._retreat(index) 3856 return False 3857 3858 if not advance: 3859 self._retreat(index) 3860 3861 return True 3862 3863 def _replace_columns_with_dots(self, this): 3864 if isinstance(this, exp.Dot): 3865 exp.replace_children(this, self._replace_columns_with_dots) 3866 elif isinstance(this, exp.Column): 3867 exp.replace_children(this, self._replace_columns_with_dots) 3868 table = this.args.get("table") 3869 this = ( 3870 self.expression(exp.Dot, this=table, expression=this.this) 3871 if table 3872 else self.expression(exp.Var, this=this.name) 3873 ) 3874 elif isinstance(this, exp.Identifier): 3875 this = self.expression(exp.Var, this=this.name) 3876 return this 3877 3878 def _replace_lambda(self, node, lambda_variables): 3879 if isinstance(node, exp.Column): 3880 if node.name in lambda_variables: 3881 return node.this 3882 return node
43class Parser(metaclass=_Parser): 44 """ 45 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 46 a parsed syntax tree. 47 48 Args: 49 error_level: the desired error level. 50 Default: ErrorLevel.RAISE 51 error_message_context: determines the amount of context to capture from a 52 query string when displaying the error message (in number of characters). 53 Default: 50. 54 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 55 Default: 0 56 alias_post_tablesample: If the table alias comes after tablesample. 57 Default: False 58 max_errors: Maximum number of error messages to include in a raised ParseError. 59 This is only relevant if error_level is ErrorLevel.RAISE. 60 Default: 3 61 null_ordering: Indicates the default null ordering method to use if not explicitly set. 62 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 63 Default: "nulls_are_small" 64 """ 65 66 FUNCTIONS: t.Dict[str, t.Callable] = { 67 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 68 "DATE_TO_DATE_STR": lambda args: exp.Cast( 69 this=seq_get(args, 0), 70 to=exp.DataType(this=exp.DataType.Type.TEXT), 71 ), 72 "TIME_TO_TIME_STR": lambda args: exp.Cast( 73 this=seq_get(args, 0), 74 to=exp.DataType(this=exp.DataType.Type.TEXT), 75 ), 76 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 77 this=exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 start=exp.Literal.number(1), 82 length=exp.Literal.number(10), 83 ), 84 "VAR_MAP": parse_var_map, 85 "IFNULL": exp.Coalesce.from_arg_list, 86 } 87 88 NO_PAREN_FUNCTIONS = { 89 TokenType.CURRENT_DATE: exp.CurrentDate, 90 TokenType.CURRENT_DATETIME: exp.CurrentDate, 91 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 92 } 93 94 NESTED_TYPE_TOKENS = { 95 TokenType.ARRAY, 96 TokenType.MAP, 97 TokenType.STRUCT, 98 TokenType.NULLABLE, 99 } 100 101 TYPE_TOKENS = { 102 TokenType.BOOLEAN, 103 TokenType.TINYINT, 104 TokenType.SMALLINT, 105 TokenType.INT, 106 TokenType.BIGINT, 107 TokenType.FLOAT, 108 TokenType.DOUBLE, 109 TokenType.CHAR, 110 TokenType.NCHAR, 111 TokenType.VARCHAR, 112 TokenType.NVARCHAR, 113 TokenType.TEXT, 114 TokenType.MEDIUMTEXT, 115 TokenType.LONGTEXT, 116 TokenType.MEDIUMBLOB, 117 TokenType.LONGBLOB, 118 TokenType.BINARY, 119 TokenType.VARBINARY, 120 TokenType.JSON, 121 TokenType.JSONB, 122 TokenType.INTERVAL, 123 TokenType.TIME, 124 TokenType.TIMESTAMP, 125 TokenType.TIMESTAMPTZ, 126 TokenType.TIMESTAMPLTZ, 127 TokenType.DATETIME, 128 TokenType.DATE, 129 TokenType.DECIMAL, 130 TokenType.UUID, 131 TokenType.GEOGRAPHY, 132 TokenType.GEOMETRY, 133 TokenType.HLLSKETCH, 134 TokenType.HSTORE, 135 TokenType.PSEUDO_TYPE, 136 TokenType.SUPER, 137 TokenType.SERIAL, 138 TokenType.SMALLSERIAL, 139 TokenType.BIGSERIAL, 140 TokenType.XML, 141 TokenType.UNIQUEIDENTIFIER, 142 TokenType.MONEY, 143 TokenType.SMALLMONEY, 144 TokenType.ROWVERSION, 145 TokenType.IMAGE, 146 TokenType.VARIANT, 147 TokenType.OBJECT, 148 TokenType.INET, 149 *NESTED_TYPE_TOKENS, 150 } 151 152 SUBQUERY_PREDICATES = { 153 TokenType.ANY: exp.Any, 154 TokenType.ALL: exp.All, 155 TokenType.EXISTS: exp.Exists, 156 TokenType.SOME: exp.Any, 157 } 158 159 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 160 161 ID_VAR_TOKENS = { 162 TokenType.VAR, 163 TokenType.ANTI, 164 TokenType.APPLY, 165 TokenType.AUTO_INCREMENT, 166 TokenType.BEGIN, 167 TokenType.BOTH, 168 TokenType.BUCKET, 169 TokenType.CACHE, 170 TokenType.CASCADE, 171 TokenType.COLLATE, 172 TokenType.COLUMN, 173 TokenType.COMMAND, 174 TokenType.COMMIT, 175 TokenType.COMPOUND, 176 TokenType.CONSTRAINT, 177 TokenType.CURRENT_TIME, 178 TokenType.DEFAULT, 179 TokenType.DELETE, 180 TokenType.DESCRIBE, 181 TokenType.DIV, 182 TokenType.END, 183 TokenType.EXECUTE, 184 TokenType.ESCAPE, 185 TokenType.FALSE, 186 TokenType.FIRST, 187 TokenType.FILTER, 188 TokenType.FOLLOWING, 189 TokenType.FORMAT, 190 TokenType.FUNCTION, 191 TokenType.IF, 192 TokenType.INDEX, 193 TokenType.ISNULL, 194 TokenType.INTERVAL, 195 TokenType.LAZY, 196 TokenType.LEADING, 197 TokenType.LEFT, 198 TokenType.LOCAL, 199 TokenType.MATERIALIZED, 200 TokenType.MERGE, 201 TokenType.NATURAL, 202 TokenType.NEXT, 203 TokenType.OFFSET, 204 TokenType.ONLY, 205 TokenType.OPTIONS, 206 TokenType.ORDINALITY, 207 TokenType.PERCENT, 208 TokenType.PIVOT, 209 TokenType.PRECEDING, 210 TokenType.RANGE, 211 TokenType.REFERENCES, 212 TokenType.RIGHT, 213 TokenType.ROW, 214 TokenType.ROWS, 215 TokenType.SCHEMA, 216 TokenType.SEED, 217 TokenType.SEMI, 218 TokenType.SET, 219 TokenType.SHOW, 220 TokenType.SORTKEY, 221 TokenType.TABLE, 222 TokenType.TEMPORARY, 223 TokenType.TOP, 224 TokenType.TRAILING, 225 TokenType.TRUE, 226 TokenType.UNBOUNDED, 227 TokenType.UNIQUE, 228 TokenType.UNLOGGED, 229 TokenType.UNPIVOT, 230 TokenType.PROCEDURE, 231 TokenType.VIEW, 232 TokenType.VOLATILE, 233 TokenType.WINDOW, 234 *SUBQUERY_PREDICATES, 235 *TYPE_TOKENS, 236 *NO_PAREN_FUNCTIONS, 237 } 238 239 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 240 TokenType.APPLY, 241 TokenType.LEFT, 242 TokenType.NATURAL, 243 TokenType.OFFSET, 244 TokenType.RIGHT, 245 TokenType.WINDOW, 246 } 247 248 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 249 250 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} 251 252 FUNC_TOKENS = { 253 TokenType.COMMAND, 254 TokenType.CURRENT_DATE, 255 TokenType.CURRENT_DATETIME, 256 TokenType.CURRENT_TIMESTAMP, 257 TokenType.CURRENT_TIME, 258 TokenType.FILTER, 259 TokenType.FIRST, 260 TokenType.FORMAT, 261 TokenType.IDENTIFIER, 262 TokenType.INDEX, 263 TokenType.ISNULL, 264 TokenType.ILIKE, 265 TokenType.LIKE, 266 TokenType.MERGE, 267 TokenType.OFFSET, 268 TokenType.PRIMARY_KEY, 269 TokenType.REPLACE, 270 TokenType.ROW, 271 TokenType.UNNEST, 272 TokenType.VAR, 273 TokenType.LEFT, 274 TokenType.RIGHT, 275 TokenType.DATE, 276 TokenType.DATETIME, 277 TokenType.TABLE, 278 TokenType.TIMESTAMP, 279 TokenType.TIMESTAMPTZ, 280 TokenType.WINDOW, 281 *TYPE_TOKENS, 282 *SUBQUERY_PREDICATES, 283 } 284 285 CONJUNCTION = { 286 TokenType.AND: exp.And, 287 TokenType.OR: exp.Or, 288 } 289 290 EQUALITY = { 291 TokenType.EQ: exp.EQ, 292 TokenType.NEQ: exp.NEQ, 293 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 294 } 295 296 COMPARISON = { 297 TokenType.GT: exp.GT, 298 TokenType.GTE: exp.GTE, 299 TokenType.LT: exp.LT, 300 TokenType.LTE: exp.LTE, 301 } 302 303 BITWISE = { 304 TokenType.AMP: exp.BitwiseAnd, 305 TokenType.CARET: exp.BitwiseXor, 306 TokenType.PIPE: exp.BitwiseOr, 307 TokenType.DPIPE: exp.DPipe, 308 } 309 310 TERM = { 311 TokenType.DASH: exp.Sub, 312 TokenType.PLUS: exp.Add, 313 TokenType.MOD: exp.Mod, 314 TokenType.COLLATE: exp.Collate, 315 } 316 317 FACTOR = { 318 TokenType.DIV: exp.IntDiv, 319 TokenType.LR_ARROW: exp.Distance, 320 TokenType.SLASH: exp.Div, 321 TokenType.STAR: exp.Mul, 322 } 323 324 TIMESTAMPS = { 325 TokenType.TIME, 326 TokenType.TIMESTAMP, 327 TokenType.TIMESTAMPTZ, 328 TokenType.TIMESTAMPLTZ, 329 } 330 331 SET_OPERATIONS = { 332 TokenType.UNION, 333 TokenType.INTERSECT, 334 TokenType.EXCEPT, 335 } 336 337 JOIN_SIDES = { 338 TokenType.LEFT, 339 TokenType.RIGHT, 340 TokenType.FULL, 341 } 342 343 JOIN_KINDS = { 344 TokenType.INNER, 345 TokenType.OUTER, 346 TokenType.CROSS, 347 TokenType.SEMI, 348 TokenType.ANTI, 349 } 350 351 LAMBDAS = { 352 TokenType.ARROW: lambda self, expressions: self.expression( 353 exp.Lambda, 354 this=self._parse_conjunction().transform( 355 self._replace_lambda, {node.name for node in expressions} 356 ), 357 expressions=expressions, 358 ), 359 TokenType.FARROW: lambda self, expressions: self.expression( 360 exp.Kwarg, 361 this=exp.Var(this=expressions[0].name), 362 expression=self._parse_conjunction(), 363 ), 364 } 365 366 COLUMN_OPERATORS = { 367 TokenType.DOT: None, 368 TokenType.DCOLON: lambda self, this, to: self.expression( 369 exp.Cast, 370 this=this, 371 to=to, 372 ), 373 TokenType.ARROW: lambda self, this, path: self.expression( 374 exp.JSONExtract, 375 this=this, 376 expression=path, 377 ), 378 TokenType.DARROW: lambda self, this, path: self.expression( 379 exp.JSONExtractScalar, 380 this=this, 381 expression=path, 382 ), 383 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 384 exp.JSONBExtract, 385 this=this, 386 expression=path, 387 ), 388 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 389 exp.JSONBExtractScalar, 390 this=this, 391 expression=path, 392 ), 393 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 394 exp.JSONBContains, 395 this=this, 396 expression=key, 397 ), 398 } 399 400 EXPRESSION_PARSERS = { 401 exp.Column: lambda self: self._parse_column(), 402 exp.DataType: lambda self: self._parse_types(), 403 exp.From: lambda self: self._parse_from(), 404 exp.Group: lambda self: self._parse_group(), 405 exp.Identifier: lambda self: self._parse_id_var(), 406 exp.Lateral: lambda self: self._parse_lateral(), 407 exp.Join: lambda self: self._parse_join(), 408 exp.Order: lambda self: self._parse_order(), 409 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 410 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 411 exp.Lambda: lambda self: self._parse_lambda(), 412 exp.Limit: lambda self: self._parse_limit(), 413 exp.Offset: lambda self: self._parse_offset(), 414 exp.TableAlias: lambda self: self._parse_table_alias(), 415 exp.Table: lambda self: self._parse_table(), 416 exp.Condition: lambda self: self._parse_conjunction(), 417 exp.Expression: lambda self: self._parse_statement(), 418 exp.Properties: lambda self: self._parse_properties(), 419 exp.Where: lambda self: self._parse_where(), 420 exp.Ordered: lambda self: self._parse_ordered(), 421 exp.Having: lambda self: self._parse_having(), 422 exp.With: lambda self: self._parse_with(), 423 exp.Window: lambda self: self._parse_named_window(), 424 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 425 } 426 427 STATEMENT_PARSERS = { 428 TokenType.ALTER: lambda self: self._parse_alter(), 429 TokenType.BEGIN: lambda self: self._parse_transaction(), 430 TokenType.CACHE: lambda self: self._parse_cache(), 431 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 432 TokenType.CREATE: lambda self: self._parse_create(), 433 TokenType.DELETE: lambda self: self._parse_delete(), 434 TokenType.DESC: lambda self: self._parse_describe(), 435 TokenType.DESCRIBE: lambda self: self._parse_describe(), 436 TokenType.DROP: lambda self: self._parse_drop(), 437 TokenType.END: lambda self: self._parse_commit_or_rollback(), 438 TokenType.INSERT: lambda self: self._parse_insert(), 439 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), 440 TokenType.MERGE: lambda self: self._parse_merge(), 441 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 442 TokenType.UNCACHE: lambda self: self._parse_uncache(), 443 TokenType.UPDATE: lambda self: self._parse_update(), 444 TokenType.USE: lambda self: self.expression( 445 exp.Use, 446 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 447 and exp.Var(this=self._prev.text), 448 this=self._parse_table(schema=False), 449 ), 450 } 451 452 UNARY_PARSERS = { 453 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 454 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 455 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 456 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 457 } 458 459 PRIMARY_PARSERS = { 460 TokenType.STRING: lambda self, token: self.expression( 461 exp.Literal, this=token.text, is_string=True 462 ), 463 TokenType.NUMBER: lambda self, token: self.expression( 464 exp.Literal, this=token.text, is_string=False 465 ), 466 TokenType.STAR: lambda self, _: self.expression( 467 exp.Star, 468 **{"except": self._parse_except(), "replace": self._parse_replace()}, 469 ), 470 TokenType.NULL: lambda self, _: self.expression(exp.Null), 471 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 472 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 473 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 474 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 475 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 476 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 477 TokenType.NATIONAL: lambda self, token: self._parse_national(token), 478 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 479 } 480 481 PLACEHOLDER_PARSERS = { 482 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 483 TokenType.PARAMETER: lambda self: self._parse_parameter(), 484 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 485 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 486 else None, 487 } 488 489 RANGE_PARSERS = { 490 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 491 TokenType.GLOB: lambda self, this: self._parse_escape( 492 self.expression(exp.Glob, this=this, expression=self._parse_bitwise()) 493 ), 494 TokenType.IN: lambda self, this: self._parse_in(this), 495 TokenType.IS: lambda self, this: self._parse_is(this), 496 TokenType.LIKE: lambda self, this: self._parse_escape( 497 self.expression(exp.Like, this=this, expression=self._parse_bitwise()) 498 ), 499 TokenType.ILIKE: lambda self, this: self._parse_escape( 500 self.expression(exp.ILike, this=this, expression=self._parse_bitwise()) 501 ), 502 TokenType.IRLIKE: lambda self, this: self.expression( 503 exp.RegexpILike, this=this, expression=self._parse_bitwise() 504 ), 505 TokenType.RLIKE: lambda self, this: self.expression( 506 exp.RegexpLike, this=this, expression=self._parse_bitwise() 507 ), 508 TokenType.SIMILAR_TO: lambda self, this: self.expression( 509 exp.SimilarTo, this=this, expression=self._parse_bitwise() 510 ), 511 } 512 513 PROPERTY_PARSERS = { 514 "AFTER": lambda self: self._parse_afterjournal( 515 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 516 ), 517 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 518 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 519 "BEFORE": lambda self: self._parse_journal( 520 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 521 ), 522 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 523 "CHARACTER SET": lambda self: self._parse_character_set(), 524 "CHECKSUM": lambda self: self._parse_checksum(), 525 "CLUSTER BY": lambda self: self.expression( 526 exp.Cluster, expressions=self._parse_csv(self._parse_ordered) 527 ), 528 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 529 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 530 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( 531 default=self._prev.text.upper() == "DEFAULT" 532 ), 533 "DEFINER": lambda self: self._parse_definer(), 534 "DETERMINISTIC": lambda self: self.expression( 535 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 536 ), 537 "DISTKEY": lambda self: self._parse_distkey(), 538 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 539 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 540 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 541 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), 542 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 543 "FREESPACE": lambda self: self._parse_freespace(), 544 "GLOBAL": lambda self: self._parse_temporary(global_=True), 545 "IMMUTABLE": lambda self: self.expression( 546 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 547 ), 548 "JOURNAL": lambda self: self._parse_journal( 549 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 550 ), 551 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 552 "LIKE": lambda self: self._parse_create_like(), 553 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), 554 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 555 "LOCK": lambda self: self._parse_locking(), 556 "LOCKING": lambda self: self._parse_locking(), 557 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), 558 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 559 "MAX": lambda self: self._parse_datablocksize(), 560 "MAXIMUM": lambda self: self._parse_datablocksize(), 561 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( 562 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" 563 ), 564 "MIN": lambda self: self._parse_datablocksize(), 565 "MINIMUM": lambda self: self._parse_datablocksize(), 566 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 567 "NO": lambda self: self._parse_noprimaryindex(), 568 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), 569 "ON": lambda self: self._parse_oncommit(), 570 "PARTITION BY": lambda self: self._parse_partitioned_by(), 571 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 572 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 573 "RETURNS": lambda self: self._parse_returns(), 574 "ROW": lambda self: self._parse_row(), 575 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 576 "SORTKEY": lambda self: self._parse_sortkey(), 577 "STABLE": lambda self: self.expression( 578 exp.VolatilityProperty, this=exp.Literal.string("STABLE") 579 ), 580 "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 581 "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 582 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 583 "TEMPORARY": lambda self: self._parse_temporary(global_=False), 584 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 585 "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 586 "VOLATILE": lambda self: self.expression( 587 exp.VolatilityProperty, this=exp.Literal.string("VOLATILE") 588 ), 589 "WITH": lambda self: self._parse_with_property(), 590 } 591 592 CONSTRAINT_PARSERS = { 593 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 594 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 595 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 596 "CHARACTER SET": lambda self: self.expression( 597 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 598 ), 599 "CHECK": lambda self: self.expression( 600 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 601 ), 602 "COLLATE": lambda self: self.expression( 603 exp.CollateColumnConstraint, this=self._parse_var() 604 ), 605 "COMMENT": lambda self: self.expression( 606 exp.CommentColumnConstraint, this=self._parse_string() 607 ), 608 "COMPRESS": lambda self: self._parse_compress(), 609 "DEFAULT": lambda self: self.expression( 610 exp.DefaultColumnConstraint, this=self._parse_bitwise() 611 ), 612 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 613 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 614 "FORMAT": lambda self: self.expression( 615 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 616 ), 617 "GENERATED": lambda self: self._parse_generated_as_identity(), 618 "IDENTITY": lambda self: self._parse_auto_increment(), 619 "INLINE": lambda self: self._parse_inline(), 620 "LIKE": lambda self: self._parse_create_like(), 621 "NOT": lambda self: self._parse_not_constraint(), 622 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 623 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 624 "PRIMARY KEY": lambda self: self._parse_primary_key(), 625 "TITLE": lambda self: self.expression( 626 exp.TitleColumnConstraint, this=self._parse_var_or_string() 627 ), 628 "UNIQUE": lambda self: self._parse_unique(), 629 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 630 } 631 632 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 633 634 NO_PAREN_FUNCTION_PARSERS = { 635 TokenType.CASE: lambda self: self._parse_case(), 636 TokenType.IF: lambda self: self._parse_if(), 637 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 638 } 639 640 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 641 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 642 "TRY_CONVERT": lambda self: self._parse_convert(False), 643 "EXTRACT": lambda self: self._parse_extract(), 644 "POSITION": lambda self: self._parse_position(), 645 "SUBSTRING": lambda self: self._parse_substring(), 646 "TRIM": lambda self: self._parse_trim(), 647 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 648 "TRY_CAST": lambda self: self._parse_cast(False), 649 "STRING_AGG": lambda self: self._parse_string_agg(), 650 } 651 652 QUERY_MODIFIER_PARSERS = { 653 "match": lambda self: self._parse_match_recognize(), 654 "where": lambda self: self._parse_where(), 655 "group": lambda self: self._parse_group(), 656 "having": lambda self: self._parse_having(), 657 "qualify": lambda self: self._parse_qualify(), 658 "windows": lambda self: self._parse_window_clause(), 659 "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute), 660 "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 661 "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 662 "order": lambda self: self._parse_order(), 663 "limit": lambda self: self._parse_limit(), 664 "offset": lambda self: self._parse_offset(), 665 "lock": lambda self: self._parse_lock(), 666 } 667 668 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 669 SET_PARSERS: t.Dict[str, t.Callable] = {} 670 671 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 672 673 CREATABLES = { 674 TokenType.COLUMN, 675 TokenType.FUNCTION, 676 TokenType.INDEX, 677 TokenType.PROCEDURE, 678 TokenType.SCHEMA, 679 TokenType.TABLE, 680 TokenType.VIEW, 681 } 682 683 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 684 685 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 686 687 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 688 689 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 690 691 STRICT_CAST = True 692 693 __slots__ = ( 694 "error_level", 695 "error_message_context", 696 "sql", 697 "errors", 698 "index_offset", 699 "unnest_column_only", 700 "alias_post_tablesample", 701 "max_errors", 702 "null_ordering", 703 "_tokens", 704 "_index", 705 "_curr", 706 "_next", 707 "_prev", 708 "_prev_comments", 709 "_show_trie", 710 "_set_trie", 711 ) 712 713 def __init__( 714 self, 715 error_level: t.Optional[ErrorLevel] = None, 716 error_message_context: int = 100, 717 index_offset: int = 0, 718 unnest_column_only: bool = False, 719 alias_post_tablesample: bool = False, 720 max_errors: int = 3, 721 null_ordering: t.Optional[str] = None, 722 ): 723 self.error_level = error_level or ErrorLevel.IMMEDIATE 724 self.error_message_context = error_message_context 725 self.index_offset = index_offset 726 self.unnest_column_only = unnest_column_only 727 self.alias_post_tablesample = alias_post_tablesample 728 self.max_errors = max_errors 729 self.null_ordering = null_ordering 730 self.reset() 731 732 def reset(self): 733 self.sql = "" 734 self.errors = [] 735 self._tokens = [] 736 self._index = 0 737 self._curr = None 738 self._next = None 739 self._prev = None 740 self._prev_comments = None 741 742 def parse( 743 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 744 ) -> t.List[t.Optional[exp.Expression]]: 745 """ 746 Parses a list of tokens and returns a list of syntax trees, one tree 747 per parsed SQL statement. 748 749 Args: 750 raw_tokens: the list of tokens. 751 sql: the original SQL string, used to produce helpful debug messages. 752 753 Returns: 754 The list of syntax trees. 755 """ 756 return self._parse( 757 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 758 ) 759 760 def parse_into( 761 self, 762 expression_types: exp.IntoType, 763 raw_tokens: t.List[Token], 764 sql: t.Optional[str] = None, 765 ) -> t.List[t.Optional[exp.Expression]]: 766 """ 767 Parses a list of tokens into a given Expression type. If a collection of Expression 768 types is given instead, this method will try to parse the token list into each one 769 of them, stopping at the first for which the parsing succeeds. 770 771 Args: 772 expression_types: the expression type(s) to try and parse the token list into. 773 raw_tokens: the list of tokens. 774 sql: the original SQL string, used to produce helpful debug messages. 775 776 Returns: 777 The target Expression. 778 """ 779 errors = [] 780 for expression_type in ensure_collection(expression_types): 781 parser = self.EXPRESSION_PARSERS.get(expression_type) 782 if not parser: 783 raise TypeError(f"No parser registered for {expression_type}") 784 try: 785 return self._parse(parser, raw_tokens, sql) 786 except ParseError as e: 787 e.errors[0]["into_expression"] = expression_type 788 errors.append(e) 789 raise ParseError( 790 f"Failed to parse into {expression_types}", 791 errors=merge_errors(errors), 792 ) from errors[-1] 793 794 def _parse( 795 self, 796 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 797 raw_tokens: t.List[Token], 798 sql: t.Optional[str] = None, 799 ) -> t.List[t.Optional[exp.Expression]]: 800 self.reset() 801 self.sql = sql or "" 802 total = len(raw_tokens) 803 chunks: t.List[t.List[Token]] = [[]] 804 805 for i, token in enumerate(raw_tokens): 806 if token.token_type == TokenType.SEMICOLON: 807 if i < total - 1: 808 chunks.append([]) 809 else: 810 chunks[-1].append(token) 811 812 expressions = [] 813 814 for tokens in chunks: 815 self._index = -1 816 self._tokens = tokens 817 self._advance() 818 819 expressions.append(parse_method(self)) 820 821 if self._index < len(self._tokens): 822 self.raise_error("Invalid expression / Unexpected token") 823 824 self.check_errors() 825 826 return expressions 827 828 def check_errors(self) -> None: 829 """ 830 Logs or raises any found errors, depending on the chosen error level setting. 831 """ 832 if self.error_level == ErrorLevel.WARN: 833 for error in self.errors: 834 logger.error(str(error)) 835 elif self.error_level == ErrorLevel.RAISE and self.errors: 836 raise ParseError( 837 concat_messages(self.errors, self.max_errors), 838 errors=merge_errors(self.errors), 839 ) 840 841 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 842 """ 843 Appends an error in the list of recorded errors or raises it, depending on the chosen 844 error level setting. 845 """ 846 token = token or self._curr or self._prev or Token.string("") 847 start = self._find_token(token) 848 end = start + len(token.text) 849 start_context = self.sql[max(start - self.error_message_context, 0) : start] 850 highlight = self.sql[start:end] 851 end_context = self.sql[end : end + self.error_message_context] 852 853 error = ParseError.new( 854 f"{message}. Line {token.line}, Col: {token.col}.\n" 855 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 856 description=message, 857 line=token.line, 858 col=token.col, 859 start_context=start_context, 860 highlight=highlight, 861 end_context=end_context, 862 ) 863 864 if self.error_level == ErrorLevel.IMMEDIATE: 865 raise error 866 867 self.errors.append(error) 868 869 def expression( 870 self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs 871 ) -> exp.Expression: 872 """ 873 Creates a new, validated Expression. 874 875 Args: 876 exp_class: the expression class to instantiate. 877 comments: an optional list of comments to attach to the expression. 878 kwargs: the arguments to set for the expression along with their respective values. 879 880 Returns: 881 The target expression. 882 """ 883 instance = exp_class(**kwargs) 884 if self._prev_comments: 885 instance.comments = self._prev_comments 886 self._prev_comments = None 887 if comments: 888 instance.comments = comments 889 self.validate_expression(instance) 890 return instance 891 892 def validate_expression( 893 self, expression: exp.Expression, args: t.Optional[t.List] = None 894 ) -> None: 895 """ 896 Validates an already instantiated expression, making sure that all its mandatory arguments 897 are set. 898 899 Args: 900 expression: the expression to validate. 901 args: an optional list of items that was used to instantiate the expression, if it's a Func. 902 """ 903 if self.error_level == ErrorLevel.IGNORE: 904 return 905 906 for error_message in expression.error_messages(args): 907 self.raise_error(error_message) 908 909 def _find_sql(self, start: Token, end: Token) -> str: 910 return self.sql[self._find_token(start) : self._find_token(end) + len(end.text)] 911 912 def _find_token(self, token: Token) -> int: 913 line = 1 914 col = 1 915 index = 0 916 917 while line < token.line or col < token.col: 918 if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK: 919 line += 1 920 col = 1 921 else: 922 col += 1 923 index += 1 924 925 return index 926 927 def _advance(self, times: int = 1) -> None: 928 self._index += times 929 self._curr = seq_get(self._tokens, self._index) 930 self._next = seq_get(self._tokens, self._index + 1) 931 if self._index > 0: 932 self._prev = self._tokens[self._index - 1] 933 self._prev_comments = self._prev.comments 934 else: 935 self._prev = None 936 self._prev_comments = None 937 938 def _retreat(self, index: int) -> None: 939 self._advance(index - self._index) 940 941 def _parse_command(self) -> exp.Expression: 942 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 943 944 def _parse_statement(self) -> t.Optional[exp.Expression]: 945 if self._curr is None: 946 return None 947 948 if self._match_set(self.STATEMENT_PARSERS): 949 return self.STATEMENT_PARSERS[self._prev.token_type](self) 950 951 if self._match_set(Tokenizer.COMMANDS): 952 return self._parse_command() 953 954 expression = self._parse_expression() 955 expression = self._parse_set_operations(expression) if expression else self._parse_select() 956 957 self._parse_query_modifiers(expression) 958 return expression 959 960 def _parse_drop(self, default_kind: t.Optional[str] = None) -> t.Optional[exp.Expression]: 961 start = self._prev 962 temporary = self._match(TokenType.TEMPORARY) 963 materialized = self._match(TokenType.MATERIALIZED) 964 kind = self._match_set(self.CREATABLES) and self._prev.text 965 if not kind: 966 if default_kind: 967 kind = default_kind 968 else: 969 return self._parse_as_command(start) 970 971 return self.expression( 972 exp.Drop, 973 exists=self._parse_exists(), 974 this=self._parse_table(schema=True), 975 kind=kind, 976 temporary=temporary, 977 materialized=materialized, 978 cascade=self._match(TokenType.CASCADE), 979 ) 980 981 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 982 return ( 983 self._match(TokenType.IF) 984 and (not not_ or self._match(TokenType.NOT)) 985 and self._match(TokenType.EXISTS) 986 ) 987 988 def _parse_create(self) -> t.Optional[exp.Expression]: 989 start = self._prev 990 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 991 TokenType.OR, TokenType.REPLACE 992 ) 993 unique = self._match(TokenType.UNIQUE) 994 995 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 996 self._match(TokenType.TABLE) 997 998 properties = None 999 create_token = self._match_set(self.CREATABLES) and self._prev 1000 1001 if not create_token: 1002 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1003 create_token = self._match_set(self.CREATABLES) and self._prev 1004 1005 if not properties or not create_token: 1006 return self._parse_as_command(start) 1007 1008 exists = self._parse_exists(not_=True) 1009 this = None 1010 expression = None 1011 indexes = None 1012 no_schema_binding = None 1013 begin = None 1014 1015 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1016 this = self._parse_user_defined_function(kind=create_token.token_type) 1017 temp_properties = self._parse_properties() 1018 if properties and temp_properties: 1019 properties.expressions.extend(temp_properties.expressions) 1020 elif temp_properties: 1021 properties = temp_properties 1022 1023 self._match(TokenType.ALIAS) 1024 begin = self._match(TokenType.BEGIN) 1025 return_ = self._match_text_seq("RETURN") 1026 expression = self._parse_statement() 1027 1028 if return_: 1029 expression = self.expression(exp.Return, this=expression) 1030 elif create_token.token_type == TokenType.INDEX: 1031 this = self._parse_index() 1032 elif create_token.token_type in ( 1033 TokenType.TABLE, 1034 TokenType.VIEW, 1035 TokenType.SCHEMA, 1036 ): 1037 table_parts = self._parse_table_parts(schema=True) 1038 1039 # exp.Properties.Location.POST_NAME 1040 if self._match(TokenType.COMMA): 1041 temp_properties = self._parse_properties(before=True) 1042 if properties and temp_properties: 1043 properties.expressions.extend(temp_properties.expressions) 1044 elif temp_properties: 1045 properties = temp_properties 1046 1047 this = self._parse_schema(this=table_parts) 1048 1049 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1050 temp_properties = self._parse_properties() 1051 if properties and temp_properties: 1052 properties.expressions.extend(temp_properties.expressions) 1053 elif temp_properties: 1054 properties = temp_properties 1055 1056 self._match(TokenType.ALIAS) 1057 1058 # exp.Properties.Location.POST_ALIAS 1059 if not ( 1060 self._match(TokenType.SELECT, advance=False) 1061 or self._match(TokenType.WITH, advance=False) 1062 or self._match(TokenType.L_PAREN, advance=False) 1063 ): 1064 temp_properties = self._parse_properties() 1065 if properties and temp_properties: 1066 properties.expressions.extend(temp_properties.expressions) 1067 elif temp_properties: 1068 properties = temp_properties 1069 1070 expression = self._parse_ddl_select() 1071 1072 if create_token.token_type == TokenType.TABLE: 1073 # exp.Properties.Location.POST_EXPRESSION 1074 temp_properties = self._parse_properties() 1075 if properties and temp_properties: 1076 properties.expressions.extend(temp_properties.expressions) 1077 elif temp_properties: 1078 properties = temp_properties 1079 1080 indexes = [] 1081 while True: 1082 index = self._parse_create_table_index() 1083 1084 # exp.Properties.Location.POST_INDEX 1085 if self._match(TokenType.PARTITION_BY, advance=False): 1086 temp_properties = self._parse_properties() 1087 if properties and temp_properties: 1088 properties.expressions.extend(temp_properties.expressions) 1089 elif temp_properties: 1090 properties = temp_properties 1091 1092 if not index: 1093 break 1094 else: 1095 indexes.append(index) 1096 elif create_token.token_type == TokenType.VIEW: 1097 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1098 no_schema_binding = True 1099 1100 return self.expression( 1101 exp.Create, 1102 this=this, 1103 kind=create_token.text, 1104 unique=unique, 1105 expression=expression, 1106 exists=exists, 1107 properties=properties, 1108 replace=replace, 1109 indexes=indexes, 1110 no_schema_binding=no_schema_binding, 1111 begin=begin, 1112 ) 1113 1114 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1115 self._match(TokenType.COMMA) 1116 1117 # parsers look to _prev for no/dual/default, so need to consume first 1118 self._match_text_seq("NO") 1119 self._match_text_seq("DUAL") 1120 self._match_text_seq("DEFAULT") 1121 1122 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): 1123 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) 1124 1125 return None 1126 1127 def _parse_property(self) -> t.Optional[exp.Expression]: 1128 if self._match_texts(self.PROPERTY_PARSERS): 1129 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1130 1131 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1132 return self._parse_character_set(default=True) 1133 1134 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): 1135 return self._parse_sortkey(compound=True) 1136 1137 if self._match_text_seq("SQL", "SECURITY"): 1138 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1139 1140 assignment = self._match_pair( 1141 TokenType.VAR, TokenType.EQ, advance=False 1142 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1143 1144 if assignment: 1145 key = self._parse_var_or_string() 1146 self._match(TokenType.EQ) 1147 return self.expression(exp.Property, this=key, value=self._parse_column()) 1148 1149 return None 1150 1151 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1152 self._match(TokenType.EQ) 1153 self._match(TokenType.ALIAS) 1154 return self.expression( 1155 exp_class, 1156 this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1157 ) 1158 1159 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: 1160 properties = [] 1161 1162 while True: 1163 if before: 1164 identified_property = self._parse_property_before() 1165 else: 1166 identified_property = self._parse_property() 1167 1168 if not identified_property: 1169 break 1170 for p in ensure_collection(identified_property): 1171 properties.append(p) 1172 1173 if properties: 1174 return self.expression(exp.Properties, expressions=properties) 1175 1176 return None 1177 1178 def _parse_fallback(self, no=False) -> exp.Expression: 1179 self._match_text_seq("FALLBACK") 1180 return self.expression( 1181 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1182 ) 1183 1184 def _parse_with_property( 1185 self, 1186 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1187 self._match(TokenType.WITH) 1188 if self._match(TokenType.L_PAREN, advance=False): 1189 return self._parse_wrapped_csv(self._parse_property) 1190 1191 if self._match_text_seq("JOURNAL"): 1192 return self._parse_withjournaltable() 1193 1194 if self._match_text_seq("DATA"): 1195 return self._parse_withdata(no=False) 1196 elif self._match_text_seq("NO", "DATA"): 1197 return self._parse_withdata(no=True) 1198 1199 if not self._next: 1200 return None 1201 1202 return self._parse_withisolatedloading() 1203 1204 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1205 def _parse_definer(self) -> t.Optional[exp.Expression]: 1206 self._match(TokenType.EQ) 1207 1208 user = self._parse_id_var() 1209 self._match(TokenType.PARAMETER) 1210 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1211 1212 if not user or not host: 1213 return None 1214 1215 return exp.DefinerProperty(this=f"{user}@{host}") 1216 1217 def _parse_withjournaltable(self) -> exp.Expression: 1218 self._match(TokenType.TABLE) 1219 self._match(TokenType.EQ) 1220 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1221 1222 def _parse_log(self, no=False) -> exp.Expression: 1223 self._match_text_seq("LOG") 1224 return self.expression(exp.LogProperty, no=no) 1225 1226 def _parse_journal(self, no=False, dual=False) -> exp.Expression: 1227 before = self._match_text_seq("BEFORE") 1228 self._match_text_seq("JOURNAL") 1229 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) 1230 1231 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: 1232 self._match_text_seq("NOT") 1233 self._match_text_seq("LOCAL") 1234 self._match_text_seq("AFTER", "JOURNAL") 1235 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) 1236 1237 def _parse_checksum(self) -> exp.Expression: 1238 self._match_text_seq("CHECKSUM") 1239 self._match(TokenType.EQ) 1240 1241 on = None 1242 if self._match(TokenType.ON): 1243 on = True 1244 elif self._match_text_seq("OFF"): 1245 on = False 1246 default = self._match(TokenType.DEFAULT) 1247 1248 return self.expression( 1249 exp.ChecksumProperty, 1250 on=on, 1251 default=default, 1252 ) 1253 1254 def _parse_freespace(self) -> exp.Expression: 1255 self._match_text_seq("FREESPACE") 1256 self._match(TokenType.EQ) 1257 return self.expression( 1258 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1259 ) 1260 1261 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: 1262 self._match_text_seq("MERGEBLOCKRATIO") 1263 if self._match(TokenType.EQ): 1264 return self.expression( 1265 exp.MergeBlockRatioProperty, 1266 this=self._parse_number(), 1267 percent=self._match(TokenType.PERCENT), 1268 ) 1269 else: 1270 return self.expression( 1271 exp.MergeBlockRatioProperty, 1272 no=no, 1273 default=default, 1274 ) 1275 1276 def _parse_datablocksize(self, default=None) -> exp.Expression: 1277 if default: 1278 self._match_text_seq("DATABLOCKSIZE") 1279 return self.expression(exp.DataBlocksizeProperty, default=True) 1280 elif self._match_texts(("MIN", "MINIMUM")): 1281 self._match_text_seq("DATABLOCKSIZE") 1282 return self.expression(exp.DataBlocksizeProperty, min=True) 1283 elif self._match_texts(("MAX", "MAXIMUM")): 1284 self._match_text_seq("DATABLOCKSIZE") 1285 return self.expression(exp.DataBlocksizeProperty, min=False) 1286 1287 self._match_text_seq("DATABLOCKSIZE") 1288 self._match(TokenType.EQ) 1289 size = self._parse_number() 1290 units = None 1291 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1292 units = self._prev.text 1293 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) 1294 1295 def _parse_blockcompression(self) -> exp.Expression: 1296 self._match_text_seq("BLOCKCOMPRESSION") 1297 self._match(TokenType.EQ) 1298 always = self._match_text_seq("ALWAYS") 1299 manual = self._match_text_seq("MANUAL") 1300 never = self._match_text_seq("NEVER") 1301 default = self._match_text_seq("DEFAULT") 1302 autotemp = None 1303 if self._match_text_seq("AUTOTEMP"): 1304 autotemp = self._parse_schema() 1305 1306 return self.expression( 1307 exp.BlockCompressionProperty, 1308 always=always, 1309 manual=manual, 1310 never=never, 1311 default=default, 1312 autotemp=autotemp, 1313 ) 1314 1315 def _parse_withisolatedloading(self) -> exp.Expression: 1316 no = self._match_text_seq("NO") 1317 concurrent = self._match_text_seq("CONCURRENT") 1318 self._match_text_seq("ISOLATED", "LOADING") 1319 for_all = self._match_text_seq("FOR", "ALL") 1320 for_insert = self._match_text_seq("FOR", "INSERT") 1321 for_none = self._match_text_seq("FOR", "NONE") 1322 return self.expression( 1323 exp.IsolatedLoadingProperty, 1324 no=no, 1325 concurrent=concurrent, 1326 for_all=for_all, 1327 for_insert=for_insert, 1328 for_none=for_none, 1329 ) 1330 1331 def _parse_locking(self) -> exp.Expression: 1332 if self._match(TokenType.TABLE): 1333 kind = "TABLE" 1334 elif self._match(TokenType.VIEW): 1335 kind = "VIEW" 1336 elif self._match(TokenType.ROW): 1337 kind = "ROW" 1338 elif self._match_text_seq("DATABASE"): 1339 kind = "DATABASE" 1340 else: 1341 kind = None 1342 1343 if kind in ("DATABASE", "TABLE", "VIEW"): 1344 this = self._parse_table_parts() 1345 else: 1346 this = None 1347 1348 if self._match(TokenType.FOR): 1349 for_or_in = "FOR" 1350 elif self._match(TokenType.IN): 1351 for_or_in = "IN" 1352 else: 1353 for_or_in = None 1354 1355 if self._match_text_seq("ACCESS"): 1356 lock_type = "ACCESS" 1357 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1358 lock_type = "EXCLUSIVE" 1359 elif self._match_text_seq("SHARE"): 1360 lock_type = "SHARE" 1361 elif self._match_text_seq("READ"): 1362 lock_type = "READ" 1363 elif self._match_text_seq("WRITE"): 1364 lock_type = "WRITE" 1365 elif self._match_text_seq("CHECKSUM"): 1366 lock_type = "CHECKSUM" 1367 else: 1368 lock_type = None 1369 1370 override = self._match_text_seq("OVERRIDE") 1371 1372 return self.expression( 1373 exp.LockingProperty, 1374 this=this, 1375 kind=kind, 1376 for_or_in=for_or_in, 1377 lock_type=lock_type, 1378 override=override, 1379 ) 1380 1381 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1382 if self._match(TokenType.PARTITION_BY): 1383 return self._parse_csv(self._parse_conjunction) 1384 return [] 1385 1386 def _parse_partitioned_by(self) -> exp.Expression: 1387 self._match(TokenType.EQ) 1388 return self.expression( 1389 exp.PartitionedByProperty, 1390 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1391 ) 1392 1393 def _parse_withdata(self, no=False) -> exp.Expression: 1394 if self._match_text_seq("AND", "STATISTICS"): 1395 statistics = True 1396 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1397 statistics = False 1398 else: 1399 statistics = None 1400 1401 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1402 1403 def _parse_noprimaryindex(self) -> exp.Expression: 1404 self._match_text_seq("PRIMARY", "INDEX") 1405 return exp.NoPrimaryIndexProperty() 1406 1407 def _parse_oncommit(self) -> exp.Expression: 1408 self._match_text_seq("COMMIT", "PRESERVE", "ROWS") 1409 return exp.OnCommitProperty() 1410 1411 def _parse_distkey(self) -> exp.Expression: 1412 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1413 1414 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1415 table = self._parse_table(schema=True) 1416 options = [] 1417 while self._match_texts(("INCLUDING", "EXCLUDING")): 1418 this = self._prev.text.upper() 1419 id_var = self._parse_id_var() 1420 1421 if not id_var: 1422 return None 1423 1424 options.append( 1425 self.expression( 1426 exp.Property, 1427 this=this, 1428 value=exp.Var(this=id_var.this.upper()), 1429 ) 1430 ) 1431 return self.expression(exp.LikeProperty, this=table, expressions=options) 1432 1433 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1434 return self.expression( 1435 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1436 ) 1437 1438 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1439 self._match(TokenType.EQ) 1440 return self.expression( 1441 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1442 ) 1443 1444 def _parse_returns(self) -> exp.Expression: 1445 value: t.Optional[exp.Expression] 1446 is_table = self._match(TokenType.TABLE) 1447 1448 if is_table: 1449 if self._match(TokenType.LT): 1450 value = self.expression( 1451 exp.Schema, 1452 this="TABLE", 1453 expressions=self._parse_csv(self._parse_struct_kwargs), 1454 ) 1455 if not self._match(TokenType.GT): 1456 self.raise_error("Expecting >") 1457 else: 1458 value = self._parse_schema(exp.Var(this="TABLE")) 1459 else: 1460 value = self._parse_types() 1461 1462 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1463 1464 def _parse_temporary(self, global_=False) -> exp.Expression: 1465 self._match(TokenType.TEMPORARY) # in case calling from "GLOBAL" 1466 return self.expression(exp.TemporaryProperty, global_=global_) 1467 1468 def _parse_describe(self) -> exp.Expression: 1469 kind = self._match_set(self.CREATABLES) and self._prev.text 1470 this = self._parse_table() 1471 1472 return self.expression(exp.Describe, this=this, kind=kind) 1473 1474 def _parse_insert(self) -> exp.Expression: 1475 overwrite = self._match(TokenType.OVERWRITE) 1476 local = self._match(TokenType.LOCAL) 1477 1478 this: t.Optional[exp.Expression] 1479 1480 alternative = None 1481 if self._match_text_seq("DIRECTORY"): 1482 this = self.expression( 1483 exp.Directory, 1484 this=self._parse_var_or_string(), 1485 local=local, 1486 row_format=self._parse_row_format(match_row=True), 1487 ) 1488 else: 1489 if self._match(TokenType.OR): 1490 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1491 1492 self._match(TokenType.INTO) 1493 self._match(TokenType.TABLE) 1494 this = self._parse_table(schema=True) 1495 1496 return self.expression( 1497 exp.Insert, 1498 this=this, 1499 exists=self._parse_exists(), 1500 partition=self._parse_partition(), 1501 expression=self._parse_ddl_select(), 1502 overwrite=overwrite, 1503 alternative=alternative, 1504 ) 1505 1506 def _parse_row(self) -> t.Optional[exp.Expression]: 1507 if not self._match(TokenType.FORMAT): 1508 return None 1509 return self._parse_row_format() 1510 1511 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1512 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1513 return None 1514 1515 if self._match_text_seq("SERDE"): 1516 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1517 1518 self._match_text_seq("DELIMITED") 1519 1520 kwargs = {} 1521 1522 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1523 kwargs["fields"] = self._parse_string() 1524 if self._match_text_seq("ESCAPED", "BY"): 1525 kwargs["escaped"] = self._parse_string() 1526 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1527 kwargs["collection_items"] = self._parse_string() 1528 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1529 kwargs["map_keys"] = self._parse_string() 1530 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1531 kwargs["lines"] = self._parse_string() 1532 if self._match_text_seq("NULL", "DEFINED", "AS"): 1533 kwargs["null"] = self._parse_string() 1534 1535 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1536 1537 def _parse_load_data(self) -> exp.Expression: 1538 local = self._match(TokenType.LOCAL) 1539 self._match_text_seq("INPATH") 1540 inpath = self._parse_string() 1541 overwrite = self._match(TokenType.OVERWRITE) 1542 self._match_pair(TokenType.INTO, TokenType.TABLE) 1543 1544 return self.expression( 1545 exp.LoadData, 1546 this=self._parse_table(schema=True), 1547 local=local, 1548 overwrite=overwrite, 1549 inpath=inpath, 1550 partition=self._parse_partition(), 1551 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1552 serde=self._match_text_seq("SERDE") and self._parse_string(), 1553 ) 1554 1555 def _parse_delete(self) -> exp.Expression: 1556 self._match(TokenType.FROM) 1557 1558 return self.expression( 1559 exp.Delete, 1560 this=self._parse_table(schema=True), 1561 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1562 where=self._parse_where(), 1563 ) 1564 1565 def _parse_update(self) -> exp.Expression: 1566 return self.expression( 1567 exp.Update, 1568 **{ # type: ignore 1569 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1570 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1571 "from": self._parse_from(), 1572 "where": self._parse_where(), 1573 }, 1574 ) 1575 1576 def _parse_uncache(self) -> exp.Expression: 1577 if not self._match(TokenType.TABLE): 1578 self.raise_error("Expecting TABLE after UNCACHE") 1579 1580 return self.expression( 1581 exp.Uncache, 1582 exists=self._parse_exists(), 1583 this=self._parse_table(schema=True), 1584 ) 1585 1586 def _parse_cache(self) -> exp.Expression: 1587 lazy = self._match(TokenType.LAZY) 1588 self._match(TokenType.TABLE) 1589 table = self._parse_table(schema=True) 1590 options = [] 1591 1592 if self._match(TokenType.OPTIONS): 1593 self._match_l_paren() 1594 k = self._parse_string() 1595 self._match(TokenType.EQ) 1596 v = self._parse_string() 1597 options = [k, v] 1598 self._match_r_paren() 1599 1600 self._match(TokenType.ALIAS) 1601 return self.expression( 1602 exp.Cache, 1603 this=table, 1604 lazy=lazy, 1605 options=options, 1606 expression=self._parse_select(nested=True), 1607 ) 1608 1609 def _parse_partition(self) -> t.Optional[exp.Expression]: 1610 if not self._match(TokenType.PARTITION): 1611 return None 1612 1613 return self.expression( 1614 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1615 ) 1616 1617 def _parse_value(self) -> exp.Expression: 1618 if self._match(TokenType.L_PAREN): 1619 expressions = self._parse_csv(self._parse_conjunction) 1620 self._match_r_paren() 1621 return self.expression(exp.Tuple, expressions=expressions) 1622 1623 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1624 # Source: https://prestodb.io/docs/current/sql/values.html 1625 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1626 1627 def _parse_select( 1628 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1629 ) -> t.Optional[exp.Expression]: 1630 cte = self._parse_with() 1631 if cte: 1632 this = self._parse_statement() 1633 1634 if not this: 1635 self.raise_error("Failed to parse any statement following CTE") 1636 return cte 1637 1638 if "with" in this.arg_types: 1639 this.set("with", cte) 1640 else: 1641 self.raise_error(f"{this.key} does not support CTE") 1642 this = cte 1643 elif self._match(TokenType.SELECT): 1644 comments = self._prev_comments 1645 1646 hint = self._parse_hint() 1647 all_ = self._match(TokenType.ALL) 1648 distinct = self._match(TokenType.DISTINCT) 1649 1650 if distinct: 1651 distinct = self.expression( 1652 exp.Distinct, 1653 on=self._parse_value() if self._match(TokenType.ON) else None, 1654 ) 1655 1656 if all_ and distinct: 1657 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1658 1659 limit = self._parse_limit(top=True) 1660 expressions = self._parse_csv(self._parse_expression) 1661 1662 this = self.expression( 1663 exp.Select, 1664 hint=hint, 1665 distinct=distinct, 1666 expressions=expressions, 1667 limit=limit, 1668 ) 1669 this.comments = comments 1670 1671 into = self._parse_into() 1672 if into: 1673 this.set("into", into) 1674 1675 from_ = self._parse_from() 1676 if from_: 1677 this.set("from", from_) 1678 1679 self._parse_query_modifiers(this) 1680 elif (table or nested) and self._match(TokenType.L_PAREN): 1681 this = self._parse_table() if table else self._parse_select(nested=True) 1682 self._parse_query_modifiers(this) 1683 this = self._parse_set_operations(this) 1684 self._match_r_paren() 1685 1686 # early return so that subquery unions aren't parsed again 1687 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1688 # Union ALL should be a property of the top select node, not the subquery 1689 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1690 elif self._match(TokenType.VALUES): 1691 this = self.expression( 1692 exp.Values, 1693 expressions=self._parse_csv(self._parse_value), 1694 alias=self._parse_table_alias(), 1695 ) 1696 else: 1697 this = None 1698 1699 return self._parse_set_operations(this) 1700 1701 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1702 if not skip_with_token and not self._match(TokenType.WITH): 1703 return None 1704 1705 recursive = self._match(TokenType.RECURSIVE) 1706 1707 expressions = [] 1708 while True: 1709 expressions.append(self._parse_cte()) 1710 1711 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1712 break 1713 else: 1714 self._match(TokenType.WITH) 1715 1716 return self.expression(exp.With, expressions=expressions, recursive=recursive) 1717 1718 def _parse_cte(self) -> exp.Expression: 1719 alias = self._parse_table_alias() 1720 if not alias or not alias.this: 1721 self.raise_error("Expected CTE to have alias") 1722 1723 self._match(TokenType.ALIAS) 1724 1725 return self.expression( 1726 exp.CTE, 1727 this=self._parse_wrapped(self._parse_statement), 1728 alias=alias, 1729 ) 1730 1731 def _parse_table_alias( 1732 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1733 ) -> t.Optional[exp.Expression]: 1734 any_token = self._match(TokenType.ALIAS) 1735 alias = self._parse_id_var( 1736 any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 1737 ) 1738 index = self._index 1739 1740 if self._match(TokenType.L_PAREN): 1741 columns = self._parse_csv(self._parse_function_parameter) 1742 self._match_r_paren() if columns else self._retreat(index) 1743 else: 1744 columns = None 1745 1746 if not alias and not columns: 1747 return None 1748 1749 return self.expression(exp.TableAlias, this=alias, columns=columns) 1750 1751 def _parse_subquery( 1752 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1753 ) -> exp.Expression: 1754 return self.expression( 1755 exp.Subquery, 1756 this=this, 1757 pivots=self._parse_pivots(), 1758 alias=self._parse_table_alias() if parse_alias else None, 1759 ) 1760 1761 def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None: 1762 if not isinstance(this, self.MODIFIABLES): 1763 return 1764 1765 table = isinstance(this, exp.Table) 1766 1767 while True: 1768 lateral = self._parse_lateral() 1769 join = self._parse_join() 1770 comma = None if table else self._match(TokenType.COMMA) 1771 if lateral: 1772 this.append("laterals", lateral) 1773 if join: 1774 this.append("joins", join) 1775 if comma: 1776 this.args["from"].append("expressions", self._parse_table()) 1777 if not (lateral or join or comma): 1778 break 1779 1780 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1781 expression = parser(self) 1782 1783 if expression: 1784 this.set(key, expression) 1785 1786 def _parse_hint(self) -> t.Optional[exp.Expression]: 1787 if self._match(TokenType.HINT): 1788 hints = self._parse_csv(self._parse_function) 1789 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 1790 self.raise_error("Expected */ after HINT") 1791 return self.expression(exp.Hint, expressions=hints) 1792 1793 return None 1794 1795 def _parse_into(self) -> t.Optional[exp.Expression]: 1796 if not self._match(TokenType.INTO): 1797 return None 1798 1799 temp = self._match(TokenType.TEMPORARY) 1800 unlogged = self._match(TokenType.UNLOGGED) 1801 self._match(TokenType.TABLE) 1802 1803 return self.expression( 1804 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 1805 ) 1806 1807 def _parse_from(self) -> t.Optional[exp.Expression]: 1808 if not self._match(TokenType.FROM): 1809 return None 1810 1811 return self.expression( 1812 exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table) 1813 ) 1814 1815 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 1816 if not self._match(TokenType.MATCH_RECOGNIZE): 1817 return None 1818 self._match_l_paren() 1819 1820 partition = self._parse_partition_by() 1821 order = self._parse_order() 1822 measures = ( 1823 self._parse_alias(self._parse_conjunction()) 1824 if self._match_text_seq("MEASURES") 1825 else None 1826 ) 1827 1828 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 1829 rows = exp.Var(this="ONE ROW PER MATCH") 1830 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 1831 text = "ALL ROWS PER MATCH" 1832 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 1833 text += f" SHOW EMPTY MATCHES" 1834 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 1835 text += f" OMIT EMPTY MATCHES" 1836 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 1837 text += f" WITH UNMATCHED ROWS" 1838 rows = exp.Var(this=text) 1839 else: 1840 rows = None 1841 1842 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 1843 text = "AFTER MATCH SKIP" 1844 if self._match_text_seq("PAST", "LAST", "ROW"): 1845 text += f" PAST LAST ROW" 1846 elif self._match_text_seq("TO", "NEXT", "ROW"): 1847 text += f" TO NEXT ROW" 1848 elif self._match_text_seq("TO", "FIRST"): 1849 text += f" TO FIRST {self._advance_any().text}" # type: ignore 1850 elif self._match_text_seq("TO", "LAST"): 1851 text += f" TO LAST {self._advance_any().text}" # type: ignore 1852 after = exp.Var(this=text) 1853 else: 1854 after = None 1855 1856 if self._match_text_seq("PATTERN"): 1857 self._match_l_paren() 1858 1859 if not self._curr: 1860 self.raise_error("Expecting )", self._curr) 1861 1862 paren = 1 1863 start = self._curr 1864 1865 while self._curr and paren > 0: 1866 if self._curr.token_type == TokenType.L_PAREN: 1867 paren += 1 1868 if self._curr.token_type == TokenType.R_PAREN: 1869 paren -= 1 1870 end = self._prev 1871 self._advance() 1872 if paren > 0: 1873 self.raise_error("Expecting )", self._curr) 1874 pattern = exp.Var(this=self._find_sql(start, end)) 1875 else: 1876 pattern = None 1877 1878 define = ( 1879 self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None 1880 ) 1881 self._match_r_paren() 1882 1883 return self.expression( 1884 exp.MatchRecognize, 1885 partition_by=partition, 1886 order=order, 1887 measures=measures, 1888 rows=rows, 1889 after=after, 1890 pattern=pattern, 1891 define=define, 1892 ) 1893 1894 def _parse_lateral(self) -> t.Optional[exp.Expression]: 1895 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 1896 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 1897 1898 if outer_apply or cross_apply: 1899 this = self._parse_select(table=True) 1900 view = None 1901 outer = not cross_apply 1902 elif self._match(TokenType.LATERAL): 1903 this = self._parse_select(table=True) 1904 view = self._match(TokenType.VIEW) 1905 outer = self._match(TokenType.OUTER) 1906 else: 1907 return None 1908 1909 if not this: 1910 this = self._parse_function() or self._parse_id_var(any_token=False) 1911 while self._match(TokenType.DOT): 1912 this = exp.Dot( 1913 this=this, 1914 expression=self._parse_function() or self._parse_id_var(any_token=False), 1915 ) 1916 1917 table_alias: t.Optional[exp.Expression] 1918 1919 if view: 1920 table = self._parse_id_var(any_token=False) 1921 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 1922 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 1923 else: 1924 table_alias = self._parse_table_alias() 1925 1926 expression = self.expression( 1927 exp.Lateral, 1928 this=this, 1929 view=view, 1930 outer=outer, 1931 alias=table_alias, 1932 ) 1933 1934 if outer_apply or cross_apply: 1935 return self.expression(exp.Join, this=expression, side=None if cross_apply else "LEFT") 1936 1937 return expression 1938 1939 def _parse_join_side_and_kind( 1940 self, 1941 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 1942 return ( 1943 self._match(TokenType.NATURAL) and self._prev, 1944 self._match_set(self.JOIN_SIDES) and self._prev, 1945 self._match_set(self.JOIN_KINDS) and self._prev, 1946 ) 1947 1948 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 1949 natural, side, kind = self._parse_join_side_and_kind() 1950 1951 if not skip_join_token and not self._match(TokenType.JOIN): 1952 return None 1953 1954 kwargs: t.Dict[ 1955 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 1956 ] = {"this": self._parse_table()} 1957 1958 if natural: 1959 kwargs["natural"] = True 1960 if side: 1961 kwargs["side"] = side.text 1962 if kind: 1963 kwargs["kind"] = kind.text 1964 1965 if self._match(TokenType.ON): 1966 kwargs["on"] = self._parse_conjunction() 1967 elif self._match(TokenType.USING): 1968 kwargs["using"] = self._parse_wrapped_id_vars() 1969 1970 return self.expression(exp.Join, **kwargs) # type: ignore 1971 1972 def _parse_index(self) -> exp.Expression: 1973 index = self._parse_id_var() 1974 self._match(TokenType.ON) 1975 self._match(TokenType.TABLE) # hive 1976 1977 return self.expression( 1978 exp.Index, 1979 this=index, 1980 table=self.expression(exp.Table, this=self._parse_id_var()), 1981 columns=self._parse_expression(), 1982 ) 1983 1984 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: 1985 unique = self._match(TokenType.UNIQUE) 1986 primary = self._match_text_seq("PRIMARY") 1987 amp = self._match_text_seq("AMP") 1988 if not self._match(TokenType.INDEX): 1989 return None 1990 index = self._parse_id_var() 1991 columns = None 1992 if self._match(TokenType.L_PAREN, advance=False): 1993 columns = self._parse_wrapped_csv(self._parse_column) 1994 return self.expression( 1995 exp.Index, 1996 this=index, 1997 columns=columns, 1998 unique=unique, 1999 primary=primary, 2000 amp=amp, 2001 ) 2002 2003 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 2004 catalog = None 2005 db = None 2006 table = (not schema and self._parse_function()) or self._parse_id_var(any_token=False) 2007 2008 while self._match(TokenType.DOT): 2009 if catalog: 2010 # This allows nesting the table in arbitrarily many dot expressions if needed 2011 table = self.expression(exp.Dot, this=table, expression=self._parse_id_var()) 2012 else: 2013 catalog = db 2014 db = table 2015 table = self._parse_id_var() 2016 2017 if not table: 2018 self.raise_error(f"Expected table name but got {self._curr}") 2019 2020 return self.expression( 2021 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2022 ) 2023 2024 def _parse_table( 2025 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2026 ) -> t.Optional[exp.Expression]: 2027 lateral = self._parse_lateral() 2028 2029 if lateral: 2030 return lateral 2031 2032 unnest = self._parse_unnest() 2033 2034 if unnest: 2035 return unnest 2036 2037 values = self._parse_derived_table_values() 2038 2039 if values: 2040 return values 2041 2042 subquery = self._parse_select(table=True) 2043 2044 if subquery: 2045 return subquery 2046 2047 this = self._parse_table_parts(schema=schema) 2048 2049 if schema: 2050 return self._parse_schema(this=this) 2051 2052 if self.alias_post_tablesample: 2053 table_sample = self._parse_table_sample() 2054 2055 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2056 2057 if alias: 2058 this.set("alias", alias) 2059 2060 if not this.args.get("pivots"): 2061 this.set("pivots", self._parse_pivots()) 2062 2063 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2064 this.set( 2065 "hints", 2066 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2067 ) 2068 self._match_r_paren() 2069 2070 if not self.alias_post_tablesample: 2071 table_sample = self._parse_table_sample() 2072 2073 if table_sample: 2074 table_sample.set("this", this) 2075 this = table_sample 2076 2077 return this 2078 2079 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2080 if not self._match(TokenType.UNNEST): 2081 return None 2082 2083 expressions = self._parse_wrapped_csv(self._parse_column) 2084 ordinality = bool(self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY)) 2085 alias = self._parse_table_alias() 2086 2087 if alias and self.unnest_column_only: 2088 if alias.args.get("columns"): 2089 self.raise_error("Unexpected extra column alias in unnest.") 2090 alias.set("columns", [alias.this]) 2091 alias.set("this", None) 2092 2093 offset = None 2094 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2095 self._match(TokenType.ALIAS) 2096 offset = self._parse_conjunction() 2097 2098 return self.expression( 2099 exp.Unnest, 2100 expressions=expressions, 2101 ordinality=ordinality, 2102 alias=alias, 2103 offset=offset, 2104 ) 2105 2106 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2107 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2108 if not is_derived and not self._match(TokenType.VALUES): 2109 return None 2110 2111 expressions = self._parse_csv(self._parse_value) 2112 2113 if is_derived: 2114 self._match_r_paren() 2115 2116 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2117 2118 def _parse_table_sample(self) -> t.Optional[exp.Expression]: 2119 if not self._match(TokenType.TABLE_SAMPLE): 2120 return None 2121 2122 method = self._parse_var() 2123 bucket_numerator = None 2124 bucket_denominator = None 2125 bucket_field = None 2126 percent = None 2127 rows = None 2128 size = None 2129 seed = None 2130 2131 self._match_l_paren() 2132 2133 if self._match(TokenType.BUCKET): 2134 bucket_numerator = self._parse_number() 2135 self._match(TokenType.OUT_OF) 2136 bucket_denominator = bucket_denominator = self._parse_number() 2137 self._match(TokenType.ON) 2138 bucket_field = self._parse_field() 2139 else: 2140 num = self._parse_number() 2141 2142 if self._match(TokenType.PERCENT): 2143 percent = num 2144 elif self._match(TokenType.ROWS): 2145 rows = num 2146 else: 2147 size = num 2148 2149 self._match_r_paren() 2150 2151 if self._match(TokenType.SEED): 2152 seed = self._parse_wrapped(self._parse_number) 2153 2154 return self.expression( 2155 exp.TableSample, 2156 method=method, 2157 bucket_numerator=bucket_numerator, 2158 bucket_denominator=bucket_denominator, 2159 bucket_field=bucket_field, 2160 percent=percent, 2161 rows=rows, 2162 size=size, 2163 seed=seed, 2164 ) 2165 2166 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2167 return list(iter(self._parse_pivot, None)) 2168 2169 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2170 index = self._index 2171 2172 if self._match(TokenType.PIVOT): 2173 unpivot = False 2174 elif self._match(TokenType.UNPIVOT): 2175 unpivot = True 2176 else: 2177 return None 2178 2179 expressions = [] 2180 field = None 2181 2182 if not self._match(TokenType.L_PAREN): 2183 self._retreat(index) 2184 return None 2185 2186 if unpivot: 2187 expressions = self._parse_csv(self._parse_column) 2188 else: 2189 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2190 2191 if not self._match(TokenType.FOR): 2192 self.raise_error("Expecting FOR") 2193 2194 value = self._parse_column() 2195 2196 if not self._match(TokenType.IN): 2197 self.raise_error("Expecting IN") 2198 2199 field = self._parse_in(value) 2200 2201 self._match_r_paren() 2202 2203 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2204 2205 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2206 pivot.set("alias", self._parse_table_alias()) 2207 2208 return pivot 2209 2210 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2211 if not skip_where_token and not self._match(TokenType.WHERE): 2212 return None 2213 2214 return self.expression( 2215 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2216 ) 2217 2218 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2219 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2220 return None 2221 2222 elements = defaultdict(list) 2223 2224 while True: 2225 expressions = self._parse_csv(self._parse_conjunction) 2226 if expressions: 2227 elements["expressions"].extend(expressions) 2228 2229 grouping_sets = self._parse_grouping_sets() 2230 if grouping_sets: 2231 elements["grouping_sets"].extend(grouping_sets) 2232 2233 rollup = None 2234 cube = None 2235 2236 with_ = self._match(TokenType.WITH) 2237 if self._match(TokenType.ROLLUP): 2238 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2239 elements["rollup"].extend(ensure_list(rollup)) 2240 2241 if self._match(TokenType.CUBE): 2242 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2243 elements["cube"].extend(ensure_list(cube)) 2244 2245 if not (expressions or grouping_sets or rollup or cube): 2246 break 2247 2248 return self.expression(exp.Group, **elements) # type: ignore 2249 2250 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2251 if not self._match(TokenType.GROUPING_SETS): 2252 return None 2253 2254 return self._parse_wrapped_csv(self._parse_grouping_set) 2255 2256 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2257 if self._match(TokenType.L_PAREN): 2258 grouping_set = self._parse_csv(self._parse_column) 2259 self._match_r_paren() 2260 return self.expression(exp.Tuple, expressions=grouping_set) 2261 2262 return self._parse_column() 2263 2264 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2265 if not skip_having_token and not self._match(TokenType.HAVING): 2266 return None 2267 return self.expression(exp.Having, this=self._parse_conjunction()) 2268 2269 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2270 if not self._match(TokenType.QUALIFY): 2271 return None 2272 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2273 2274 def _parse_order( 2275 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2276 ) -> t.Optional[exp.Expression]: 2277 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2278 return this 2279 2280 return self.expression( 2281 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2282 ) 2283 2284 def _parse_sort( 2285 self, token_type: TokenType, exp_class: t.Type[exp.Expression] 2286 ) -> t.Optional[exp.Expression]: 2287 if not self._match(token_type): 2288 return None 2289 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2290 2291 def _parse_ordered(self) -> exp.Expression: 2292 this = self._parse_conjunction() 2293 self._match(TokenType.ASC) 2294 is_desc = self._match(TokenType.DESC) 2295 is_nulls_first = self._match(TokenType.NULLS_FIRST) 2296 is_nulls_last = self._match(TokenType.NULLS_LAST) 2297 desc = is_desc or False 2298 asc = not desc 2299 nulls_first = is_nulls_first or False 2300 explicitly_null_ordered = is_nulls_first or is_nulls_last 2301 if ( 2302 not explicitly_null_ordered 2303 and ( 2304 (asc and self.null_ordering == "nulls_are_small") 2305 or (desc and self.null_ordering != "nulls_are_small") 2306 ) 2307 and self.null_ordering != "nulls_are_last" 2308 ): 2309 nulls_first = True 2310 2311 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2312 2313 def _parse_limit( 2314 self, this: t.Optional[exp.Expression] = None, top: bool = False 2315 ) -> t.Optional[exp.Expression]: 2316 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2317 limit_paren = self._match(TokenType.L_PAREN) 2318 limit_exp = self.expression( 2319 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2320 ) 2321 2322 if limit_paren: 2323 self._match_r_paren() 2324 2325 return limit_exp 2326 2327 if self._match(TokenType.FETCH): 2328 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2329 direction = self._prev.text if direction else "FIRST" 2330 count = self._parse_number() 2331 self._match_set((TokenType.ROW, TokenType.ROWS)) 2332 self._match(TokenType.ONLY) 2333 return self.expression(exp.Fetch, direction=direction, count=count) 2334 2335 return this 2336 2337 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2338 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2339 return this 2340 2341 count = self._parse_number() 2342 self._match_set((TokenType.ROW, TokenType.ROWS)) 2343 return self.expression(exp.Offset, this=this, expression=count) 2344 2345 def _parse_lock(self) -> t.Optional[exp.Expression]: 2346 if self._match_text_seq("FOR", "UPDATE"): 2347 return self.expression(exp.Lock, update=True) 2348 if self._match_text_seq("FOR", "SHARE"): 2349 return self.expression(exp.Lock, update=False) 2350 2351 return None 2352 2353 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2354 if not self._match_set(self.SET_OPERATIONS): 2355 return this 2356 2357 token_type = self._prev.token_type 2358 2359 if token_type == TokenType.UNION: 2360 expression = exp.Union 2361 elif token_type == TokenType.EXCEPT: 2362 expression = exp.Except 2363 else: 2364 expression = exp.Intersect 2365 2366 return self.expression( 2367 expression, 2368 this=this, 2369 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2370 expression=self._parse_set_operations(self._parse_select(nested=True)), 2371 ) 2372 2373 def _parse_expression(self) -> t.Optional[exp.Expression]: 2374 return self._parse_alias(self._parse_conjunction()) 2375 2376 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2377 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2378 2379 def _parse_equality(self) -> t.Optional[exp.Expression]: 2380 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2381 2382 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2383 return self._parse_tokens(self._parse_range, self.COMPARISON) 2384 2385 def _parse_range(self) -> t.Optional[exp.Expression]: 2386 this = self._parse_bitwise() 2387 negate = self._match(TokenType.NOT) 2388 2389 if self._match_set(self.RANGE_PARSERS): 2390 this = self.RANGE_PARSERS[self._prev.token_type](self, this) 2391 elif self._match(TokenType.ISNULL): 2392 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2393 2394 # Postgres supports ISNULL and NOTNULL for conditions. 2395 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2396 if self._match(TokenType.NOTNULL): 2397 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2398 this = self.expression(exp.Not, this=this) 2399 2400 if negate: 2401 this = self.expression(exp.Not, this=this) 2402 2403 if self._match(TokenType.IS): 2404 this = self._parse_is(this) 2405 2406 return this 2407 2408 def _parse_is(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2409 negate = self._match(TokenType.NOT) 2410 if self._match(TokenType.DISTINCT_FROM): 2411 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2412 return self.expression(klass, this=this, expression=self._parse_expression()) 2413 2414 this = self.expression( 2415 exp.Is, 2416 this=this, 2417 expression=self._parse_null() or self._parse_boolean(), 2418 ) 2419 return self.expression(exp.Not, this=this) if negate else this 2420 2421 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2422 unnest = self._parse_unnest() 2423 if unnest: 2424 this = self.expression(exp.In, this=this, unnest=unnest) 2425 elif self._match(TokenType.L_PAREN): 2426 expressions = self._parse_csv(self._parse_select_or_expression) 2427 2428 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2429 this = self.expression(exp.In, this=this, query=expressions[0]) 2430 else: 2431 this = self.expression(exp.In, this=this, expressions=expressions) 2432 2433 self._match_r_paren() 2434 else: 2435 this = self.expression(exp.In, this=this, field=self._parse_field()) 2436 2437 return this 2438 2439 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2440 low = self._parse_bitwise() 2441 self._match(TokenType.AND) 2442 high = self._parse_bitwise() 2443 return self.expression(exp.Between, this=this, low=low, high=high) 2444 2445 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2446 if not self._match(TokenType.ESCAPE): 2447 return this 2448 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2449 2450 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2451 this = self._parse_term() 2452 2453 while True: 2454 if self._match_set(self.BITWISE): 2455 this = self.expression( 2456 self.BITWISE[self._prev.token_type], 2457 this=this, 2458 expression=self._parse_term(), 2459 ) 2460 elif self._match_pair(TokenType.LT, TokenType.LT): 2461 this = self.expression( 2462 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2463 ) 2464 elif self._match_pair(TokenType.GT, TokenType.GT): 2465 this = self.expression( 2466 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2467 ) 2468 else: 2469 break 2470 2471 return this 2472 2473 def _parse_term(self) -> t.Optional[exp.Expression]: 2474 return self._parse_tokens(self._parse_factor, self.TERM) 2475 2476 def _parse_factor(self) -> t.Optional[exp.Expression]: 2477 return self._parse_tokens(self._parse_unary, self.FACTOR) 2478 2479 def _parse_unary(self) -> t.Optional[exp.Expression]: 2480 if self._match_set(self.UNARY_PARSERS): 2481 return self.UNARY_PARSERS[self._prev.token_type](self) 2482 return self._parse_at_time_zone(self._parse_type()) 2483 2484 def _parse_type(self) -> t.Optional[exp.Expression]: 2485 if self._match(TokenType.INTERVAL): 2486 return self.expression(exp.Interval, this=self._parse_term(), unit=self._parse_var()) 2487 2488 index = self._index 2489 type_token = self._parse_types(check_func=True) 2490 this = self._parse_column() 2491 2492 if type_token: 2493 if this and not isinstance(this, exp.Star): 2494 return self.expression(exp.Cast, this=this, to=type_token) 2495 if not type_token.args.get("expressions"): 2496 self._retreat(index) 2497 return self._parse_column() 2498 return type_token 2499 2500 return this 2501 2502 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2503 index = self._index 2504 2505 prefix = self._match_text_seq("SYSUDTLIB", ".") 2506 2507 if not self._match_set(self.TYPE_TOKENS): 2508 return None 2509 2510 type_token = self._prev.token_type 2511 2512 if type_token == TokenType.PSEUDO_TYPE: 2513 return self.expression(exp.PseudoType, this=self._prev.text) 2514 2515 nested = type_token in self.NESTED_TYPE_TOKENS 2516 is_struct = type_token == TokenType.STRUCT 2517 expressions = None 2518 maybe_func = False 2519 2520 if self._match(TokenType.L_PAREN): 2521 if is_struct: 2522 expressions = self._parse_csv(self._parse_struct_kwargs) 2523 elif nested: 2524 expressions = self._parse_csv(self._parse_types) 2525 else: 2526 expressions = self._parse_csv(self._parse_conjunction) 2527 2528 if not expressions: 2529 self._retreat(index) 2530 return None 2531 2532 self._match_r_paren() 2533 maybe_func = True 2534 2535 if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2536 this = exp.DataType( 2537 this=exp.DataType.Type.ARRAY, 2538 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2539 nested=True, 2540 ) 2541 2542 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2543 this = exp.DataType( 2544 this=exp.DataType.Type.ARRAY, 2545 expressions=[this], 2546 nested=True, 2547 ) 2548 2549 return this 2550 2551 if self._match(TokenType.L_BRACKET): 2552 self._retreat(index) 2553 return None 2554 2555 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2556 if nested and self._match(TokenType.LT): 2557 if is_struct: 2558 expressions = self._parse_csv(self._parse_struct_kwargs) 2559 else: 2560 expressions = self._parse_csv(self._parse_types) 2561 2562 if not self._match(TokenType.GT): 2563 self.raise_error("Expecting >") 2564 2565 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2566 values = self._parse_csv(self._parse_conjunction) 2567 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2568 2569 value: t.Optional[exp.Expression] = None 2570 if type_token in self.TIMESTAMPS: 2571 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: 2572 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2573 elif ( 2574 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ 2575 ): 2576 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2577 elif self._match(TokenType.WITHOUT_TIME_ZONE): 2578 if type_token == TokenType.TIME: 2579 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2580 else: 2581 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2582 2583 maybe_func = maybe_func and value is None 2584 2585 if value is None: 2586 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2587 elif type_token == TokenType.INTERVAL: 2588 value = self.expression(exp.Interval, unit=self._parse_var()) 2589 2590 if maybe_func and check_func: 2591 index2 = self._index 2592 peek = self._parse_string() 2593 2594 if not peek: 2595 self._retreat(index) 2596 return None 2597 2598 self._retreat(index2) 2599 2600 if value: 2601 return value 2602 2603 return exp.DataType( 2604 this=exp.DataType.Type[type_token.value.upper()], 2605 expressions=expressions, 2606 nested=nested, 2607 values=values, 2608 prefix=prefix, 2609 ) 2610 2611 def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]: 2612 if self._curr and self._curr.token_type in self.TYPE_TOKENS: 2613 return self._parse_types() 2614 2615 this = self._parse_id_var() 2616 self._match(TokenType.COLON) 2617 data_type = self._parse_types() 2618 2619 if not data_type: 2620 return None 2621 return self.expression(exp.StructKwarg, this=this, expression=data_type) 2622 2623 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2624 if not self._match(TokenType.AT_TIME_ZONE): 2625 return this 2626 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 2627 2628 def _parse_column(self) -> t.Optional[exp.Expression]: 2629 this = self._parse_field() 2630 if isinstance(this, exp.Identifier): 2631 this = self.expression(exp.Column, this=this) 2632 elif not this: 2633 return self._parse_bracket(this) 2634 this = self._parse_bracket(this) 2635 2636 while self._match_set(self.COLUMN_OPERATORS): 2637 op_token = self._prev.token_type 2638 op = self.COLUMN_OPERATORS.get(op_token) 2639 2640 if op_token == TokenType.DCOLON: 2641 field = self._parse_types() 2642 if not field: 2643 self.raise_error("Expected type") 2644 elif op: 2645 self._advance() 2646 value = self._prev.text 2647 field = ( 2648 exp.Literal.number(value) 2649 if self._prev.token_type == TokenType.NUMBER 2650 else exp.Literal.string(value) 2651 ) 2652 else: 2653 field = self._parse_star() or self._parse_function() or self._parse_id_var() 2654 2655 if isinstance(field, exp.Func): 2656 # bigquery allows function calls like x.y.count(...) 2657 # SAFE.SUBSTR(...) 2658 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 2659 this = self._replace_columns_with_dots(this) 2660 2661 if op: 2662 this = op(self, this, field) 2663 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 2664 this = self.expression( 2665 exp.Column, 2666 this=field, 2667 table=this.this, 2668 db=this.args.get("table"), 2669 catalog=this.args.get("db"), 2670 ) 2671 else: 2672 this = self.expression(exp.Dot, this=this, expression=field) 2673 this = self._parse_bracket(this) 2674 2675 return this 2676 2677 def _parse_primary(self) -> t.Optional[exp.Expression]: 2678 if self._match_set(self.PRIMARY_PARSERS): 2679 token_type = self._prev.token_type 2680 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 2681 2682 if token_type == TokenType.STRING: 2683 expressions = [primary] 2684 while self._match(TokenType.STRING): 2685 expressions.append(exp.Literal.string(self._prev.text)) 2686 if len(expressions) > 1: 2687 return self.expression(exp.Concat, expressions=expressions) 2688 return primary 2689 2690 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 2691 return exp.Literal.number(f"0.{self._prev.text}") 2692 2693 if self._match(TokenType.L_PAREN): 2694 comments = self._prev_comments 2695 query = self._parse_select() 2696 2697 if query: 2698 expressions = [query] 2699 else: 2700 expressions = self._parse_csv( 2701 lambda: self._parse_alias(self._parse_conjunction(), explicit=True) 2702 ) 2703 2704 this = seq_get(expressions, 0) 2705 self._parse_query_modifiers(this) 2706 self._match_r_paren() 2707 2708 if isinstance(this, exp.Subqueryable): 2709 this = self._parse_set_operations( 2710 self._parse_subquery(this=this, parse_alias=False) 2711 ) 2712 elif len(expressions) > 1: 2713 this = self.expression(exp.Tuple, expressions=expressions) 2714 else: 2715 this = self.expression(exp.Paren, this=this) 2716 2717 if this and comments: 2718 this.comments = comments 2719 2720 return this 2721 2722 return None 2723 2724 def _parse_field(self, any_token: bool = False) -> t.Optional[exp.Expression]: 2725 return self._parse_primary() or self._parse_function() or self._parse_id_var(any_token) 2726 2727 def _parse_function( 2728 self, functions: t.Optional[t.Dict[str, t.Callable]] = None 2729 ) -> t.Optional[exp.Expression]: 2730 if not self._curr: 2731 return None 2732 2733 token_type = self._curr.token_type 2734 2735 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 2736 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 2737 2738 if not self._next or self._next.token_type != TokenType.L_PAREN: 2739 if token_type in self.NO_PAREN_FUNCTIONS: 2740 self._advance() 2741 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 2742 2743 return None 2744 2745 if token_type not in self.FUNC_TOKENS: 2746 return None 2747 2748 this = self._curr.text 2749 upper = this.upper() 2750 self._advance(2) 2751 2752 parser = self.FUNCTION_PARSERS.get(upper) 2753 2754 if parser: 2755 this = parser(self) 2756 else: 2757 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 2758 2759 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 2760 this = self.expression(subquery_predicate, this=self._parse_select()) 2761 self._match_r_paren() 2762 return this 2763 2764 if functions is None: 2765 functions = self.FUNCTIONS 2766 2767 function = functions.get(upper) 2768 args = self._parse_csv(self._parse_lambda) 2769 2770 if function: 2771 # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the 2772 # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists. 2773 if count_params(function) == 2: 2774 params = None 2775 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 2776 params = self._parse_csv(self._parse_lambda) 2777 2778 this = function(args, params) 2779 else: 2780 this = function(args) 2781 2782 self.validate_expression(this, args) 2783 else: 2784 this = self.expression(exp.Anonymous, this=this, expressions=args) 2785 2786 self._match_r_paren(this) 2787 return self._parse_window(this) 2788 2789 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 2790 return self._parse_column_def(self._parse_id_var()) 2791 2792 def _parse_user_defined_function( 2793 self, kind: t.Optional[TokenType] = None 2794 ) -> t.Optional[exp.Expression]: 2795 this = self._parse_id_var() 2796 2797 while self._match(TokenType.DOT): 2798 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 2799 2800 if not self._match(TokenType.L_PAREN): 2801 return this 2802 2803 expressions = self._parse_csv(self._parse_function_parameter) 2804 self._match_r_paren() 2805 return self.expression( 2806 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 2807 ) 2808 2809 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 2810 literal = self._parse_primary() 2811 if literal: 2812 return self.expression(exp.Introducer, this=token.text, expression=literal) 2813 2814 return self.expression(exp.Identifier, this=token.text) 2815 2816 def _parse_national(self, token: Token) -> exp.Expression: 2817 return self.expression(exp.National, this=exp.Literal.string(token.text)) 2818 2819 def _parse_session_parameter(self) -> exp.Expression: 2820 kind = None 2821 this = self._parse_id_var() or self._parse_primary() 2822 2823 if this and self._match(TokenType.DOT): 2824 kind = this.name 2825 this = self._parse_var() or self._parse_primary() 2826 2827 return self.expression(exp.SessionParameter, this=this, kind=kind) 2828 2829 def _parse_lambda(self) -> t.Optional[exp.Expression]: 2830 index = self._index 2831 2832 if self._match(TokenType.L_PAREN): 2833 expressions = self._parse_csv(self._parse_id_var) 2834 2835 if not self._match(TokenType.R_PAREN): 2836 self._retreat(index) 2837 else: 2838 expressions = [self._parse_id_var()] 2839 2840 if self._match_set(self.LAMBDAS): 2841 return self.LAMBDAS[self._prev.token_type](self, expressions) 2842 2843 self._retreat(index) 2844 2845 this: t.Optional[exp.Expression] 2846 2847 if self._match(TokenType.DISTINCT): 2848 this = self.expression( 2849 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 2850 ) 2851 else: 2852 this = self._parse_select_or_expression() 2853 2854 if self._match(TokenType.IGNORE_NULLS): 2855 this = self.expression(exp.IgnoreNulls, this=this) 2856 else: 2857 self._match(TokenType.RESPECT_NULLS) 2858 2859 return self._parse_limit(self._parse_order(this)) 2860 2861 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2862 index = self._index 2863 if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT): 2864 self._retreat(index) 2865 return this 2866 2867 args = self._parse_csv( 2868 lambda: self._parse_constraint() 2869 or self._parse_column_def(self._parse_field(any_token=True)) 2870 ) 2871 self._match_r_paren() 2872 return self.expression(exp.Schema, this=this, expressions=args) 2873 2874 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2875 kind = self._parse_types() 2876 2877 if self._match_text_seq("FOR", "ORDINALITY"): 2878 return self.expression(exp.ColumnDef, this=this, ordinality=True) 2879 2880 constraints = [] 2881 while True: 2882 constraint = self._parse_column_constraint() 2883 if not constraint: 2884 break 2885 constraints.append(constraint) 2886 2887 if not kind and not constraints: 2888 return this 2889 2890 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 2891 2892 def _parse_auto_increment(self) -> exp.Expression: 2893 start = None 2894 increment = None 2895 2896 if self._match(TokenType.L_PAREN, advance=False): 2897 args = self._parse_wrapped_csv(self._parse_bitwise) 2898 start = seq_get(args, 0) 2899 increment = seq_get(args, 1) 2900 elif self._match_text_seq("START"): 2901 start = self._parse_bitwise() 2902 self._match_text_seq("INCREMENT") 2903 increment = self._parse_bitwise() 2904 2905 if start and increment: 2906 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 2907 2908 return exp.AutoIncrementColumnConstraint() 2909 2910 def _parse_compress(self) -> exp.Expression: 2911 if self._match(TokenType.L_PAREN, advance=False): 2912 return self.expression( 2913 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 2914 ) 2915 2916 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 2917 2918 def _parse_generated_as_identity(self) -> exp.Expression: 2919 if self._match(TokenType.BY_DEFAULT): 2920 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False) 2921 else: 2922 self._match_text_seq("ALWAYS") 2923 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 2924 2925 self._match_text_seq("AS", "IDENTITY") 2926 if self._match(TokenType.L_PAREN): 2927 if self._match_text_seq("START", "WITH"): 2928 this.set("start", self._parse_bitwise()) 2929 if self._match_text_seq("INCREMENT", "BY"): 2930 this.set("increment", self._parse_bitwise()) 2931 if self._match_text_seq("MINVALUE"): 2932 this.set("minvalue", self._parse_bitwise()) 2933 if self._match_text_seq("MAXVALUE"): 2934 this.set("maxvalue", self._parse_bitwise()) 2935 2936 if self._match_text_seq("CYCLE"): 2937 this.set("cycle", True) 2938 elif self._match_text_seq("NO", "CYCLE"): 2939 this.set("cycle", False) 2940 2941 self._match_r_paren() 2942 2943 return this 2944 2945 def _parse_inline(self) -> t.Optional[exp.Expression]: 2946 self._match_text_seq("LENGTH") 2947 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 2948 2949 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 2950 if self._match_text_seq("NULL"): 2951 return self.expression(exp.NotNullColumnConstraint) 2952 if self._match_text_seq("CASESPECIFIC"): 2953 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 2954 return None 2955 2956 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 2957 this = self._parse_references() 2958 if this: 2959 return this 2960 2961 if self._match(TokenType.CONSTRAINT): 2962 this = self._parse_id_var() 2963 2964 if self._match_texts(self.CONSTRAINT_PARSERS): 2965 return self.expression( 2966 exp.ColumnConstraint, 2967 this=this, 2968 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 2969 ) 2970 2971 return this 2972 2973 def _parse_constraint(self) -> t.Optional[exp.Expression]: 2974 if not self._match(TokenType.CONSTRAINT): 2975 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 2976 2977 this = self._parse_id_var() 2978 expressions = [] 2979 2980 while True: 2981 constraint = self._parse_unnamed_constraint() or self._parse_function() 2982 if not constraint: 2983 break 2984 expressions.append(constraint) 2985 2986 return self.expression(exp.Constraint, this=this, expressions=expressions) 2987 2988 def _parse_unnamed_constraint( 2989 self, constraints: t.Optional[t.Collection[str]] = None 2990 ) -> t.Optional[exp.Expression]: 2991 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 2992 return None 2993 2994 constraint = self._prev.text.upper() 2995 if constraint not in self.CONSTRAINT_PARSERS: 2996 self.raise_error(f"No parser found for schema constraint {constraint}.") 2997 2998 return self.CONSTRAINT_PARSERS[constraint](self) 2999 3000 def _parse_unique(self) -> exp.Expression: 3001 if not self._match(TokenType.L_PAREN, advance=False): 3002 return self.expression(exp.UniqueColumnConstraint) 3003 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 3004 3005 def _parse_key_constraint_options(self) -> t.List[str]: 3006 options = [] 3007 while True: 3008 if not self._curr: 3009 break 3010 3011 if self._match(TokenType.ON): 3012 action = None 3013 on = self._advance_any() and self._prev.text 3014 3015 if self._match(TokenType.NO_ACTION): 3016 action = "NO ACTION" 3017 elif self._match(TokenType.CASCADE): 3018 action = "CASCADE" 3019 elif self._match_pair(TokenType.SET, TokenType.NULL): 3020 action = "SET NULL" 3021 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3022 action = "SET DEFAULT" 3023 else: 3024 self.raise_error("Invalid key constraint") 3025 3026 options.append(f"ON {on} {action}") 3027 elif self._match_text_seq("NOT", "ENFORCED"): 3028 options.append("NOT ENFORCED") 3029 elif self._match_text_seq("DEFERRABLE"): 3030 options.append("DEFERRABLE") 3031 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3032 options.append("INITIALLY DEFERRED") 3033 elif self._match_text_seq("NORELY"): 3034 options.append("NORELY") 3035 elif self._match_text_seq("MATCH", "FULL"): 3036 options.append("MATCH FULL") 3037 else: 3038 break 3039 3040 return options 3041 3042 def _parse_references(self) -> t.Optional[exp.Expression]: 3043 if not self._match(TokenType.REFERENCES): 3044 return None 3045 3046 expressions = None 3047 this = self._parse_id_var() 3048 3049 if self._match(TokenType.L_PAREN, advance=False): 3050 expressions = self._parse_wrapped_id_vars() 3051 3052 options = self._parse_key_constraint_options() 3053 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3054 3055 def _parse_foreign_key(self) -> exp.Expression: 3056 expressions = self._parse_wrapped_id_vars() 3057 reference = self._parse_references() 3058 options = {} 3059 3060 while self._match(TokenType.ON): 3061 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3062 self.raise_error("Expected DELETE or UPDATE") 3063 3064 kind = self._prev.text.lower() 3065 3066 if self._match(TokenType.NO_ACTION): 3067 action = "NO ACTION" 3068 elif self._match(TokenType.SET): 3069 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3070 action = "SET " + self._prev.text.upper() 3071 else: 3072 self._advance() 3073 action = self._prev.text.upper() 3074 3075 options[kind] = action 3076 3077 return self.expression( 3078 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3079 ) 3080 3081 def _parse_primary_key(self) -> exp.Expression: 3082 desc = ( 3083 self._match_set((TokenType.ASC, TokenType.DESC)) 3084 and self._prev.token_type == TokenType.DESC 3085 ) 3086 3087 if not self._match(TokenType.L_PAREN, advance=False): 3088 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3089 3090 expressions = self._parse_wrapped_id_vars() 3091 options = self._parse_key_constraint_options() 3092 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3093 3094 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3095 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3096 return this 3097 3098 bracket_kind = self._prev.token_type 3099 expressions: t.List[t.Optional[exp.Expression]] 3100 3101 if self._match(TokenType.COLON): 3102 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3103 else: 3104 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3105 3106 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3107 if bracket_kind == TokenType.L_BRACE: 3108 this = self.expression(exp.Struct, expressions=expressions) 3109 elif not this or this.name.upper() == "ARRAY": 3110 this = self.expression(exp.Array, expressions=expressions) 3111 else: 3112 expressions = apply_index_offset(expressions, -self.index_offset) 3113 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3114 3115 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3116 self.raise_error("Expected ]") 3117 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3118 self.raise_error("Expected }") 3119 3120 this.comments = self._prev_comments 3121 return self._parse_bracket(this) 3122 3123 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3124 if self._match(TokenType.COLON): 3125 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3126 return this 3127 3128 def _parse_case(self) -> t.Optional[exp.Expression]: 3129 ifs = [] 3130 default = None 3131 3132 expression = self._parse_conjunction() 3133 3134 while self._match(TokenType.WHEN): 3135 this = self._parse_conjunction() 3136 self._match(TokenType.THEN) 3137 then = self._parse_conjunction() 3138 ifs.append(self.expression(exp.If, this=this, true=then)) 3139 3140 if self._match(TokenType.ELSE): 3141 default = self._parse_conjunction() 3142 3143 if not self._match(TokenType.END): 3144 self.raise_error("Expected END after CASE", self._prev) 3145 3146 return self._parse_window( 3147 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3148 ) 3149 3150 def _parse_if(self) -> t.Optional[exp.Expression]: 3151 if self._match(TokenType.L_PAREN): 3152 args = self._parse_csv(self._parse_conjunction) 3153 this = exp.If.from_arg_list(args) 3154 self.validate_expression(this, args) 3155 self._match_r_paren() 3156 else: 3157 condition = self._parse_conjunction() 3158 self._match(TokenType.THEN) 3159 true = self._parse_conjunction() 3160 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3161 self._match(TokenType.END) 3162 this = self.expression(exp.If, this=condition, true=true, false=false) 3163 3164 return self._parse_window(this) 3165 3166 def _parse_extract(self) -> exp.Expression: 3167 this = self._parse_function() or self._parse_var() or self._parse_type() 3168 3169 if self._match(TokenType.FROM): 3170 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3171 3172 if not self._match(TokenType.COMMA): 3173 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3174 3175 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3176 3177 def _parse_cast(self, strict: bool) -> exp.Expression: 3178 this = self._parse_conjunction() 3179 3180 if not self._match(TokenType.ALIAS): 3181 self.raise_error("Expected AS after CAST") 3182 3183 to = self._parse_types() 3184 3185 if not to: 3186 self.raise_error("Expected TYPE after CAST") 3187 elif to.this == exp.DataType.Type.CHAR: 3188 if self._match(TokenType.CHARACTER_SET): 3189 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3190 3191 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3192 3193 def _parse_string_agg(self) -> exp.Expression: 3194 expression: t.Optional[exp.Expression] 3195 3196 if self._match(TokenType.DISTINCT): 3197 args = self._parse_csv(self._parse_conjunction) 3198 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3199 else: 3200 args = self._parse_csv(self._parse_conjunction) 3201 expression = seq_get(args, 0) 3202 3203 index = self._index 3204 if not self._match(TokenType.R_PAREN): 3205 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3206 order = self._parse_order(this=expression) 3207 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3208 3209 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3210 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3211 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3212 if not self._match(TokenType.WITHIN_GROUP): 3213 self._retreat(index) 3214 this = exp.GroupConcat.from_arg_list(args) 3215 self.validate_expression(this, args) 3216 return this 3217 3218 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3219 order = self._parse_order(this=expression) 3220 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3221 3222 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3223 to: t.Optional[exp.Expression] 3224 this = self._parse_column() 3225 3226 if self._match(TokenType.USING): 3227 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3228 elif self._match(TokenType.COMMA): 3229 to = self._parse_types() 3230 else: 3231 to = None 3232 3233 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3234 3235 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3236 args = self._parse_csv(self._parse_bitwise) 3237 3238 if self._match(TokenType.IN): 3239 return self.expression( 3240 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3241 ) 3242 3243 if haystack_first: 3244 haystack = seq_get(args, 0) 3245 needle = seq_get(args, 1) 3246 else: 3247 needle = seq_get(args, 0) 3248 haystack = seq_get(args, 1) 3249 3250 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3251 3252 self.validate_expression(this, args) 3253 3254 return this 3255 3256 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3257 args = self._parse_csv(self._parse_table) 3258 return exp.JoinHint(this=func_name.upper(), expressions=args) 3259 3260 def _parse_substring(self) -> exp.Expression: 3261 # Postgres supports the form: substring(string [from int] [for int]) 3262 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3263 3264 args = self._parse_csv(self._parse_bitwise) 3265 3266 if self._match(TokenType.FROM): 3267 args.append(self._parse_bitwise()) 3268 if self._match(TokenType.FOR): 3269 args.append(self._parse_bitwise()) 3270 3271 this = exp.Substring.from_arg_list(args) 3272 self.validate_expression(this, args) 3273 3274 return this 3275 3276 def _parse_trim(self) -> exp.Expression: 3277 # https://www.w3resource.com/sql/character-functions/trim.php 3278 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3279 3280 position = None 3281 collation = None 3282 3283 if self._match_set(self.TRIM_TYPES): 3284 position = self._prev.text.upper() 3285 3286 expression = self._parse_term() 3287 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3288 this = self._parse_term() 3289 else: 3290 this = expression 3291 expression = None 3292 3293 if self._match(TokenType.COLLATE): 3294 collation = self._parse_term() 3295 3296 return self.expression( 3297 exp.Trim, 3298 this=this, 3299 position=position, 3300 expression=expression, 3301 collation=collation, 3302 ) 3303 3304 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3305 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3306 3307 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3308 return self._parse_window(self._parse_id_var(), alias=True) 3309 3310 def _parse_window( 3311 self, this: t.Optional[exp.Expression], alias: bool = False 3312 ) -> t.Optional[exp.Expression]: 3313 if self._match(TokenType.FILTER): 3314 where = self._parse_wrapped(self._parse_where) 3315 this = self.expression(exp.Filter, this=this, expression=where) 3316 3317 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3318 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3319 if self._match(TokenType.WITHIN_GROUP): 3320 order = self._parse_wrapped(self._parse_order) 3321 this = self.expression(exp.WithinGroup, this=this, expression=order) 3322 3323 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3324 # Some dialects choose to implement and some do not. 3325 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3326 3327 # There is some code above in _parse_lambda that handles 3328 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3329 3330 # The below changes handle 3331 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3332 3333 # Oracle allows both formats 3334 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3335 # and Snowflake chose to do the same for familiarity 3336 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3337 if self._match(TokenType.IGNORE_NULLS): 3338 this = self.expression(exp.IgnoreNulls, this=this) 3339 elif self._match(TokenType.RESPECT_NULLS): 3340 this = self.expression(exp.RespectNulls, this=this) 3341 3342 # bigquery select from window x AS (partition by ...) 3343 if alias: 3344 self._match(TokenType.ALIAS) 3345 elif not self._match(TokenType.OVER): 3346 return this 3347 3348 if not self._match(TokenType.L_PAREN): 3349 return self.expression(exp.Window, this=this, alias=self._parse_id_var(False)) 3350 3351 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3352 partition = self._parse_partition_by() 3353 order = self._parse_order() 3354 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3355 3356 if kind: 3357 self._match(TokenType.BETWEEN) 3358 start = self._parse_window_spec() 3359 self._match(TokenType.AND) 3360 end = self._parse_window_spec() 3361 3362 spec = self.expression( 3363 exp.WindowSpec, 3364 kind=kind, 3365 start=start["value"], 3366 start_side=start["side"], 3367 end=end["value"], 3368 end_side=end["side"], 3369 ) 3370 else: 3371 spec = None 3372 3373 self._match_r_paren() 3374 3375 return self.expression( 3376 exp.Window, 3377 this=this, 3378 partition_by=partition, 3379 order=order, 3380 spec=spec, 3381 alias=window_alias, 3382 ) 3383 3384 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3385 self._match(TokenType.BETWEEN) 3386 3387 return { 3388 "value": ( 3389 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text 3390 ) 3391 or self._parse_bitwise(), 3392 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, 3393 } 3394 3395 def _parse_alias( 3396 self, this: t.Optional[exp.Expression], explicit: bool = False 3397 ) -> t.Optional[exp.Expression]: 3398 any_token = self._match(TokenType.ALIAS) 3399 3400 if explicit and not any_token: 3401 return this 3402 3403 if self._match(TokenType.L_PAREN): 3404 aliases = self.expression( 3405 exp.Aliases, 3406 this=this, 3407 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3408 ) 3409 self._match_r_paren(aliases) 3410 return aliases 3411 3412 alias = self._parse_id_var(any_token) 3413 3414 if alias: 3415 return self.expression(exp.Alias, this=this, alias=alias) 3416 3417 return this 3418 3419 def _parse_id_var( 3420 self, 3421 any_token: bool = True, 3422 tokens: t.Optional[t.Collection[TokenType]] = None, 3423 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3424 ) -> t.Optional[exp.Expression]: 3425 identifier = self._parse_identifier() 3426 3427 if identifier: 3428 return identifier 3429 3430 prefix = "" 3431 3432 if prefix_tokens: 3433 while self._match_set(prefix_tokens): 3434 prefix += self._prev.text 3435 3436 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3437 quoted = self._prev.token_type == TokenType.STRING 3438 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3439 3440 return None 3441 3442 def _parse_string(self) -> t.Optional[exp.Expression]: 3443 if self._match(TokenType.STRING): 3444 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 3445 return self._parse_placeholder() 3446 3447 def _parse_number(self) -> t.Optional[exp.Expression]: 3448 if self._match(TokenType.NUMBER): 3449 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 3450 return self._parse_placeholder() 3451 3452 def _parse_identifier(self) -> t.Optional[exp.Expression]: 3453 if self._match(TokenType.IDENTIFIER): 3454 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 3455 return self._parse_placeholder() 3456 3457 def _parse_var(self, any_token: bool = False) -> t.Optional[exp.Expression]: 3458 if (any_token and self._advance_any()) or self._match(TokenType.VAR): 3459 return self.expression(exp.Var, this=self._prev.text) 3460 return self._parse_placeholder() 3461 3462 def _advance_any(self) -> t.Optional[Token]: 3463 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 3464 self._advance() 3465 return self._prev 3466 return None 3467 3468 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 3469 return self._parse_var() or self._parse_string() 3470 3471 def _parse_null(self) -> t.Optional[exp.Expression]: 3472 if self._match(TokenType.NULL): 3473 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 3474 return None 3475 3476 def _parse_boolean(self) -> t.Optional[exp.Expression]: 3477 if self._match(TokenType.TRUE): 3478 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 3479 if self._match(TokenType.FALSE): 3480 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 3481 return None 3482 3483 def _parse_star(self) -> t.Optional[exp.Expression]: 3484 if self._match(TokenType.STAR): 3485 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 3486 return None 3487 3488 def _parse_parameter(self) -> exp.Expression: 3489 wrapped = self._match(TokenType.L_BRACE) 3490 this = self._parse_var() or self._parse_primary() 3491 self._match(TokenType.R_BRACE) 3492 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 3493 3494 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 3495 if self._match_set(self.PLACEHOLDER_PARSERS): 3496 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 3497 if placeholder: 3498 return placeholder 3499 self._advance(-1) 3500 return None 3501 3502 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3503 if not self._match(TokenType.EXCEPT): 3504 return None 3505 if self._match(TokenType.L_PAREN, advance=False): 3506 return self._parse_wrapped_csv(self._parse_column) 3507 return self._parse_csv(self._parse_column) 3508 3509 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3510 if not self._match(TokenType.REPLACE): 3511 return None 3512 if self._match(TokenType.L_PAREN, advance=False): 3513 return self._parse_wrapped_csv(self._parse_expression) 3514 return self._parse_csv(self._parse_expression) 3515 3516 def _parse_csv( 3517 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3518 ) -> t.List[t.Optional[exp.Expression]]: 3519 parse_result = parse_method() 3520 items = [parse_result] if parse_result is not None else [] 3521 3522 while self._match(sep): 3523 if parse_result and self._prev_comments: 3524 parse_result.comments = self._prev_comments 3525 3526 parse_result = parse_method() 3527 if parse_result is not None: 3528 items.append(parse_result) 3529 3530 return items 3531 3532 def _parse_tokens( 3533 self, parse_method: t.Callable, expressions: t.Dict 3534 ) -> t.Optional[exp.Expression]: 3535 this = parse_method() 3536 3537 while self._match_set(expressions): 3538 this = self.expression( 3539 expressions[self._prev.token_type], 3540 this=this, 3541 comments=self._prev_comments, 3542 expression=parse_method(), 3543 ) 3544 3545 return this 3546 3547 def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]: 3548 return self._parse_wrapped_csv(self._parse_id_var) 3549 3550 def _parse_wrapped_csv( 3551 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3552 ) -> t.List[t.Optional[exp.Expression]]: 3553 return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep)) 3554 3555 def _parse_wrapped(self, parse_method: t.Callable) -> t.Any: 3556 self._match_l_paren() 3557 parse_result = parse_method() 3558 self._match_r_paren() 3559 return parse_result 3560 3561 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: 3562 return self._parse_select() or self._parse_expression() 3563 3564 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 3565 return self._parse_set_operations( 3566 self._parse_select(nested=True, parse_subquery_alias=False) 3567 ) 3568 3569 def _parse_transaction(self) -> exp.Expression: 3570 this = None 3571 if self._match_texts(self.TRANSACTION_KIND): 3572 this = self._prev.text 3573 3574 self._match_texts({"TRANSACTION", "WORK"}) 3575 3576 modes = [] 3577 while True: 3578 mode = [] 3579 while self._match(TokenType.VAR): 3580 mode.append(self._prev.text) 3581 3582 if mode: 3583 modes.append(" ".join(mode)) 3584 if not self._match(TokenType.COMMA): 3585 break 3586 3587 return self.expression(exp.Transaction, this=this, modes=modes) 3588 3589 def _parse_commit_or_rollback(self) -> exp.Expression: 3590 chain = None 3591 savepoint = None 3592 is_rollback = self._prev.token_type == TokenType.ROLLBACK 3593 3594 self._match_texts({"TRANSACTION", "WORK"}) 3595 3596 if self._match_text_seq("TO"): 3597 self._match_text_seq("SAVEPOINT") 3598 savepoint = self._parse_id_var() 3599 3600 if self._match(TokenType.AND): 3601 chain = not self._match_text_seq("NO") 3602 self._match_text_seq("CHAIN") 3603 3604 if is_rollback: 3605 return self.expression(exp.Rollback, savepoint=savepoint) 3606 return self.expression(exp.Commit, chain=chain) 3607 3608 def _parse_add_column(self) -> t.Optional[exp.Expression]: 3609 if not self._match_text_seq("ADD"): 3610 return None 3611 3612 self._match(TokenType.COLUMN) 3613 exists_column = self._parse_exists(not_=True) 3614 expression = self._parse_column_def(self._parse_field(any_token=True)) 3615 3616 if expression: 3617 expression.set("exists", exists_column) 3618 3619 return expression 3620 3621 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 3622 return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN") 3623 3624 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 3625 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 3626 return self.expression( 3627 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 3628 ) 3629 3630 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 3631 this = None 3632 kind = self._prev.token_type 3633 3634 if kind == TokenType.CONSTRAINT: 3635 this = self._parse_id_var() 3636 3637 if self._match_text_seq("CHECK"): 3638 expression = self._parse_wrapped(self._parse_conjunction) 3639 enforced = self._match_text_seq("ENFORCED") 3640 3641 return self.expression( 3642 exp.AddConstraint, this=this, expression=expression, enforced=enforced 3643 ) 3644 3645 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 3646 expression = self._parse_foreign_key() 3647 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 3648 expression = self._parse_primary_key() 3649 3650 return self.expression(exp.AddConstraint, this=this, expression=expression) 3651 3652 def _parse_alter(self) -> t.Optional[exp.Expression]: 3653 if not self._match(TokenType.TABLE): 3654 return self._parse_as_command(self._prev) 3655 3656 exists = self._parse_exists() 3657 this = self._parse_table(schema=True) 3658 3659 actions: t.Optional[exp.Expression | t.List[t.Optional[exp.Expression]]] = None 3660 3661 index = self._index 3662 if self._match(TokenType.DELETE): 3663 actions = [self.expression(exp.Delete, where=self._parse_where())] 3664 elif self._match_text_seq("ADD"): 3665 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 3666 actions = self._parse_csv(self._parse_add_constraint) 3667 else: 3668 self._retreat(index) 3669 actions = self._parse_csv(self._parse_add_column) 3670 elif self._match_text_seq("DROP"): 3671 partition_exists = self._parse_exists() 3672 3673 if self._match(TokenType.PARTITION, advance=False): 3674 actions = self._parse_csv( 3675 lambda: self._parse_drop_partition(exists=partition_exists) 3676 ) 3677 else: 3678 self._retreat(index) 3679 actions = self._parse_csv(self._parse_drop_column) 3680 elif self._match_text_seq("RENAME", "TO"): 3681 actions = self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 3682 elif self._match_text_seq("ALTER"): 3683 self._match(TokenType.COLUMN) 3684 column = self._parse_field(any_token=True) 3685 3686 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 3687 actions = self.expression(exp.AlterColumn, this=column, drop=True) 3688 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3689 actions = self.expression( 3690 exp.AlterColumn, this=column, default=self._parse_conjunction() 3691 ) 3692 else: 3693 self._match_text_seq("SET", "DATA") 3694 actions = self.expression( 3695 exp.AlterColumn, 3696 this=column, 3697 dtype=self._match_text_seq("TYPE") and self._parse_types(), 3698 collate=self._match(TokenType.COLLATE) and self._parse_term(), 3699 using=self._match(TokenType.USING) and self._parse_conjunction(), 3700 ) 3701 3702 actions = ensure_list(actions) 3703 return self.expression(exp.AlterTable, this=this, exists=exists, actions=actions) 3704 3705 def _parse_show(self) -> t.Optional[exp.Expression]: 3706 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 3707 if parser: 3708 return parser(self) 3709 self._advance() 3710 return self.expression(exp.Show, this=self._prev.text.upper()) 3711 3712 def _default_parse_set_item(self) -> exp.Expression: 3713 return self.expression( 3714 exp.SetItem, 3715 this=self._parse_statement(), 3716 ) 3717 3718 def _parse_set_item(self) -> t.Optional[exp.Expression]: 3719 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 3720 return parser(self) if parser else self._default_parse_set_item() 3721 3722 def _parse_merge(self) -> exp.Expression: 3723 self._match(TokenType.INTO) 3724 target = self._parse_table() 3725 3726 self._match(TokenType.USING) 3727 using = self._parse_table() 3728 3729 self._match(TokenType.ON) 3730 on = self._parse_conjunction() 3731 3732 whens = [] 3733 while self._match(TokenType.WHEN): 3734 this = self._parse_conjunction() 3735 self._match(TokenType.THEN) 3736 3737 if self._match(TokenType.INSERT): 3738 _this = self._parse_star() 3739 if _this: 3740 then = self.expression(exp.Insert, this=_this) 3741 else: 3742 then = self.expression( 3743 exp.Insert, 3744 this=self._parse_value(), 3745 expression=self._match(TokenType.VALUES) and self._parse_value(), 3746 ) 3747 elif self._match(TokenType.UPDATE): 3748 expressions = self._parse_star() 3749 if expressions: 3750 then = self.expression(exp.Update, expressions=expressions) 3751 else: 3752 then = self.expression( 3753 exp.Update, 3754 expressions=self._match(TokenType.SET) 3755 and self._parse_csv(self._parse_equality), 3756 ) 3757 elif self._match(TokenType.DELETE): 3758 then = self.expression(exp.Var, this=self._prev.text) 3759 3760 whens.append(self.expression(exp.When, this=this, then=then)) 3761 3762 return self.expression( 3763 exp.Merge, 3764 this=target, 3765 using=using, 3766 on=on, 3767 expressions=whens, 3768 ) 3769 3770 def _parse_set(self) -> exp.Expression: 3771 return self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 3772 3773 def _parse_as_command(self, start: Token) -> exp.Command: 3774 while self._curr: 3775 self._advance() 3776 return exp.Command(this=self._find_sql(start, self._prev)) 3777 3778 def _find_parser( 3779 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 3780 ) -> t.Optional[t.Callable]: 3781 index = self._index 3782 this = [] 3783 while True: 3784 # The current token might be multiple words 3785 curr = self._curr.text.upper() 3786 key = curr.split(" ") 3787 this.append(curr) 3788 self._advance() 3789 result, trie = in_trie(trie, key) 3790 if result == 0: 3791 break 3792 if result == 2: 3793 subparser = parsers[" ".join(this)] 3794 return subparser 3795 self._retreat(index) 3796 return None 3797 3798 def _match(self, token_type, advance=True): 3799 if not self._curr: 3800 return None 3801 3802 if self._curr.token_type == token_type: 3803 if advance: 3804 self._advance() 3805 return True 3806 3807 return None 3808 3809 def _match_set(self, types, advance=True): 3810 if not self._curr: 3811 return None 3812 3813 if self._curr.token_type in types: 3814 if advance: 3815 self._advance() 3816 return True 3817 3818 return None 3819 3820 def _match_pair(self, token_type_a, token_type_b, advance=True): 3821 if not self._curr or not self._next: 3822 return None 3823 3824 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 3825 if advance: 3826 self._advance(2) 3827 return True 3828 3829 return None 3830 3831 def _match_l_paren(self, expression=None): 3832 if not self._match(TokenType.L_PAREN): 3833 self.raise_error("Expecting (") 3834 if expression and self._prev_comments: 3835 expression.comments = self._prev_comments 3836 3837 def _match_r_paren(self, expression=None): 3838 if not self._match(TokenType.R_PAREN): 3839 self.raise_error("Expecting )") 3840 if expression and self._prev_comments: 3841 expression.comments = self._prev_comments 3842 3843 def _match_texts(self, texts, advance=True): 3844 if self._curr and self._curr.text.upper() in texts: 3845 if advance: 3846 self._advance() 3847 return True 3848 return False 3849 3850 def _match_text_seq(self, *texts, advance=True): 3851 index = self._index 3852 for text in texts: 3853 if self._curr and self._curr.text.upper() == text: 3854 self._advance() 3855 else: 3856 self._retreat(index) 3857 return False 3858 3859 if not advance: 3860 self._retreat(index) 3861 3862 return True 3863 3864 def _replace_columns_with_dots(self, this): 3865 if isinstance(this, exp.Dot): 3866 exp.replace_children(this, self._replace_columns_with_dots) 3867 elif isinstance(this, exp.Column): 3868 exp.replace_children(this, self._replace_columns_with_dots) 3869 table = this.args.get("table") 3870 this = ( 3871 self.expression(exp.Dot, this=table, expression=this.this) 3872 if table 3873 else self.expression(exp.Var, this=this.name) 3874 ) 3875 elif isinstance(this, exp.Identifier): 3876 this = self.expression(exp.Var, this=this.name) 3877 return this 3878 3879 def _replace_lambda(self, node, lambda_variables): 3880 if isinstance(node, exp.Column): 3881 if node.name in lambda_variables: 3882 return node.this 3883 return node
Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer
and produces
a parsed syntax tree.
Arguments:
- error_level: the desired error level. Default: ErrorLevel.RAISE
- error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
- index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
- alias_post_tablesample: If the table alias comes after tablesample. Default: False
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
713 def __init__( 714 self, 715 error_level: t.Optional[ErrorLevel] = None, 716 error_message_context: int = 100, 717 index_offset: int = 0, 718 unnest_column_only: bool = False, 719 alias_post_tablesample: bool = False, 720 max_errors: int = 3, 721 null_ordering: t.Optional[str] = None, 722 ): 723 self.error_level = error_level or ErrorLevel.IMMEDIATE 724 self.error_message_context = error_message_context 725 self.index_offset = index_offset 726 self.unnest_column_only = unnest_column_only 727 self.alias_post_tablesample = alias_post_tablesample 728 self.max_errors = max_errors 729 self.null_ordering = null_ordering 730 self.reset()
742 def parse( 743 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 744 ) -> t.List[t.Optional[exp.Expression]]: 745 """ 746 Parses a list of tokens and returns a list of syntax trees, one tree 747 per parsed SQL statement. 748 749 Args: 750 raw_tokens: the list of tokens. 751 sql: the original SQL string, used to produce helpful debug messages. 752 753 Returns: 754 The list of syntax trees. 755 """ 756 return self._parse( 757 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 758 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The list of syntax trees.
760 def parse_into( 761 self, 762 expression_types: exp.IntoType, 763 raw_tokens: t.List[Token], 764 sql: t.Optional[str] = None, 765 ) -> t.List[t.Optional[exp.Expression]]: 766 """ 767 Parses a list of tokens into a given Expression type. If a collection of Expression 768 types is given instead, this method will try to parse the token list into each one 769 of them, stopping at the first for which the parsing succeeds. 770 771 Args: 772 expression_types: the expression type(s) to try and parse the token list into. 773 raw_tokens: the list of tokens. 774 sql: the original SQL string, used to produce helpful debug messages. 775 776 Returns: 777 The target Expression. 778 """ 779 errors = [] 780 for expression_type in ensure_collection(expression_types): 781 parser = self.EXPRESSION_PARSERS.get(expression_type) 782 if not parser: 783 raise TypeError(f"No parser registered for {expression_type}") 784 try: 785 return self._parse(parser, raw_tokens, sql) 786 except ParseError as e: 787 e.errors[0]["into_expression"] = expression_type 788 errors.append(e) 789 raise ParseError( 790 f"Failed to parse into {expression_types}", 791 errors=merge_errors(errors), 792 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: the expression type(s) to try and parse the token list into.
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
828 def check_errors(self) -> None: 829 """ 830 Logs or raises any found errors, depending on the chosen error level setting. 831 """ 832 if self.error_level == ErrorLevel.WARN: 833 for error in self.errors: 834 logger.error(str(error)) 835 elif self.error_level == ErrorLevel.RAISE and self.errors: 836 raise ParseError( 837 concat_messages(self.errors, self.max_errors), 838 errors=merge_errors(self.errors), 839 )
Logs or raises any found errors, depending on the chosen error level setting.
841 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 842 """ 843 Appends an error in the list of recorded errors or raises it, depending on the chosen 844 error level setting. 845 """ 846 token = token or self._curr or self._prev or Token.string("") 847 start = self._find_token(token) 848 end = start + len(token.text) 849 start_context = self.sql[max(start - self.error_message_context, 0) : start] 850 highlight = self.sql[start:end] 851 end_context = self.sql[end : end + self.error_message_context] 852 853 error = ParseError.new( 854 f"{message}. Line {token.line}, Col: {token.col}.\n" 855 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 856 description=message, 857 line=token.line, 858 col=token.col, 859 start_context=start_context, 860 highlight=highlight, 861 end_context=end_context, 862 ) 863 864 if self.error_level == ErrorLevel.IMMEDIATE: 865 raise error 866 867 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
869 def expression( 870 self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs 871 ) -> exp.Expression: 872 """ 873 Creates a new, validated Expression. 874 875 Args: 876 exp_class: the expression class to instantiate. 877 comments: an optional list of comments to attach to the expression. 878 kwargs: the arguments to set for the expression along with their respective values. 879 880 Returns: 881 The target expression. 882 """ 883 instance = exp_class(**kwargs) 884 if self._prev_comments: 885 instance.comments = self._prev_comments 886 self._prev_comments = None 887 if comments: 888 instance.comments = comments 889 self.validate_expression(instance) 890 return instance
Creates a new, validated Expression.
Arguments:
- exp_class: the expression class to instantiate.
- comments: an optional list of comments to attach to the expression.
- kwargs: the arguments to set for the expression along with their respective values.
Returns:
The target expression.
892 def validate_expression( 893 self, expression: exp.Expression, args: t.Optional[t.List] = None 894 ) -> None: 895 """ 896 Validates an already instantiated expression, making sure that all its mandatory arguments 897 are set. 898 899 Args: 900 expression: the expression to validate. 901 args: an optional list of items that was used to instantiate the expression, if it's a Func. 902 """ 903 if self.error_level == ErrorLevel.IGNORE: 904 return 905 906 for error_message in expression.error_messages(args): 907 self.raise_error(error_message)
Validates an already instantiated expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: the expression to validate.
- args: an optional list of items that was used to instantiate the expression, if it's a Func.