sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import ( 10 apply_index_offset, 11 count_params, 12 ensure_collection, 13 ensure_list, 14 seq_get, 15) 16from sqlglot.tokens import Token, Tokenizer, TokenType 17from sqlglot.trie import in_trie, new_trie 18 19logger = logging.getLogger("sqlglot") 20 21 22def parse_var_map(args): 23 keys = [] 24 values = [] 25 for i in range(0, len(args), 2): 26 keys.append(args[i]) 27 values.append(args[i + 1]) 28 return exp.VarMap( 29 keys=exp.Array(expressions=keys), 30 values=exp.Array(expressions=values), 31 ) 32 33 34class _Parser(type): 35 def __new__(cls, clsname, bases, attrs): 36 klass = super().__new__(cls, clsname, bases, attrs) 37 klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 38 klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS) 39 return klass 40 41 42class Parser(metaclass=_Parser): 43 """ 44 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 45 a parsed syntax tree. 46 47 Args: 48 error_level: the desired error level. 49 Default: ErrorLevel.RAISE 50 error_message_context: determines the amount of context to capture from a 51 query string when displaying the error message (in number of characters). 52 Default: 50. 53 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 54 Default: 0 55 alias_post_tablesample: If the table alias comes after tablesample. 56 Default: False 57 max_errors: Maximum number of error messages to include in a raised ParseError. 58 This is only relevant if error_level is ErrorLevel.RAISE. 59 Default: 3 60 null_ordering: Indicates the default null ordering method to use if not explicitly set. 61 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 62 Default: "nulls_are_small" 63 """ 64 65 FUNCTIONS: t.Dict[str, t.Callable] = { 66 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 67 "DATE_TO_DATE_STR": lambda args: exp.Cast( 68 this=seq_get(args, 0), 69 to=exp.DataType(this=exp.DataType.Type.TEXT), 70 ), 71 "TIME_TO_TIME_STR": lambda args: exp.Cast( 72 this=seq_get(args, 0), 73 to=exp.DataType(this=exp.DataType.Type.TEXT), 74 ), 75 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 76 this=exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 start=exp.Literal.number(1), 81 length=exp.Literal.number(10), 82 ), 83 "VAR_MAP": parse_var_map, 84 "IFNULL": exp.Coalesce.from_arg_list, 85 } 86 87 NO_PAREN_FUNCTIONS = { 88 TokenType.CURRENT_DATE: exp.CurrentDate, 89 TokenType.CURRENT_DATETIME: exp.CurrentDate, 90 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 91 } 92 93 NESTED_TYPE_TOKENS = { 94 TokenType.ARRAY, 95 TokenType.MAP, 96 TokenType.STRUCT, 97 TokenType.NULLABLE, 98 } 99 100 TYPE_TOKENS = { 101 TokenType.BOOLEAN, 102 TokenType.TINYINT, 103 TokenType.SMALLINT, 104 TokenType.INT, 105 TokenType.BIGINT, 106 TokenType.FLOAT, 107 TokenType.DOUBLE, 108 TokenType.CHAR, 109 TokenType.NCHAR, 110 TokenType.VARCHAR, 111 TokenType.NVARCHAR, 112 TokenType.TEXT, 113 TokenType.MEDIUMTEXT, 114 TokenType.LONGTEXT, 115 TokenType.MEDIUMBLOB, 116 TokenType.LONGBLOB, 117 TokenType.BINARY, 118 TokenType.VARBINARY, 119 TokenType.JSON, 120 TokenType.JSONB, 121 TokenType.INTERVAL, 122 TokenType.TIME, 123 TokenType.TIMESTAMP, 124 TokenType.TIMESTAMPTZ, 125 TokenType.TIMESTAMPLTZ, 126 TokenType.DATETIME, 127 TokenType.DATE, 128 TokenType.DECIMAL, 129 TokenType.UUID, 130 TokenType.GEOGRAPHY, 131 TokenType.GEOMETRY, 132 TokenType.HLLSKETCH, 133 TokenType.HSTORE, 134 TokenType.PSEUDO_TYPE, 135 TokenType.SUPER, 136 TokenType.SERIAL, 137 TokenType.SMALLSERIAL, 138 TokenType.BIGSERIAL, 139 TokenType.XML, 140 TokenType.UNIQUEIDENTIFIER, 141 TokenType.MONEY, 142 TokenType.SMALLMONEY, 143 TokenType.ROWVERSION, 144 TokenType.IMAGE, 145 TokenType.VARIANT, 146 TokenType.OBJECT, 147 *NESTED_TYPE_TOKENS, 148 } 149 150 SUBQUERY_PREDICATES = { 151 TokenType.ANY: exp.Any, 152 TokenType.ALL: exp.All, 153 TokenType.EXISTS: exp.Exists, 154 TokenType.SOME: exp.Any, 155 } 156 157 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 158 159 ID_VAR_TOKENS = { 160 TokenType.VAR, 161 TokenType.ANTI, 162 TokenType.APPLY, 163 TokenType.AUTO_INCREMENT, 164 TokenType.BEGIN, 165 TokenType.BOTH, 166 TokenType.BUCKET, 167 TokenType.CACHE, 168 TokenType.CASCADE, 169 TokenType.COLLATE, 170 TokenType.COLUMN, 171 TokenType.COMMAND, 172 TokenType.COMMIT, 173 TokenType.COMPOUND, 174 TokenType.CONSTRAINT, 175 TokenType.CURRENT_TIME, 176 TokenType.DEFAULT, 177 TokenType.DELETE, 178 TokenType.DESCRIBE, 179 TokenType.DIV, 180 TokenType.END, 181 TokenType.EXECUTE, 182 TokenType.ESCAPE, 183 TokenType.FALSE, 184 TokenType.FIRST, 185 TokenType.FILTER, 186 TokenType.FOLLOWING, 187 TokenType.FORMAT, 188 TokenType.FUNCTION, 189 TokenType.IF, 190 TokenType.INDEX, 191 TokenType.ISNULL, 192 TokenType.INTERVAL, 193 TokenType.LAZY, 194 TokenType.LEADING, 195 TokenType.LEFT, 196 TokenType.LOCAL, 197 TokenType.MATERIALIZED, 198 TokenType.MERGE, 199 TokenType.NATURAL, 200 TokenType.NEXT, 201 TokenType.OFFSET, 202 TokenType.ONLY, 203 TokenType.OPTIONS, 204 TokenType.ORDINALITY, 205 TokenType.PERCENT, 206 TokenType.PIVOT, 207 TokenType.PRECEDING, 208 TokenType.RANGE, 209 TokenType.REFERENCES, 210 TokenType.RIGHT, 211 TokenType.ROW, 212 TokenType.ROWS, 213 TokenType.SCHEMA, 214 TokenType.SEED, 215 TokenType.SEMI, 216 TokenType.SET, 217 TokenType.SHOW, 218 TokenType.SORTKEY, 219 TokenType.TABLE, 220 TokenType.TEMPORARY, 221 TokenType.TOP, 222 TokenType.TRAILING, 223 TokenType.TRUE, 224 TokenType.UNBOUNDED, 225 TokenType.UNIQUE, 226 TokenType.UNLOGGED, 227 TokenType.UNPIVOT, 228 TokenType.PROCEDURE, 229 TokenType.VIEW, 230 TokenType.VOLATILE, 231 TokenType.WINDOW, 232 *SUBQUERY_PREDICATES, 233 *TYPE_TOKENS, 234 *NO_PAREN_FUNCTIONS, 235 } 236 237 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 238 TokenType.APPLY, 239 TokenType.LEFT, 240 TokenType.NATURAL, 241 TokenType.OFFSET, 242 TokenType.RIGHT, 243 TokenType.WINDOW, 244 } 245 246 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 247 248 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} 249 250 FUNC_TOKENS = { 251 TokenType.COMMAND, 252 TokenType.CURRENT_DATE, 253 TokenType.CURRENT_DATETIME, 254 TokenType.CURRENT_TIMESTAMP, 255 TokenType.CURRENT_TIME, 256 TokenType.FILTER, 257 TokenType.FIRST, 258 TokenType.FORMAT, 259 TokenType.IDENTIFIER, 260 TokenType.INDEX, 261 TokenType.ISNULL, 262 TokenType.ILIKE, 263 TokenType.LIKE, 264 TokenType.MERGE, 265 TokenType.OFFSET, 266 TokenType.PRIMARY_KEY, 267 TokenType.REPLACE, 268 TokenType.ROW, 269 TokenType.UNNEST, 270 TokenType.VAR, 271 TokenType.LEFT, 272 TokenType.RIGHT, 273 TokenType.DATE, 274 TokenType.DATETIME, 275 TokenType.TABLE, 276 TokenType.TIMESTAMP, 277 TokenType.TIMESTAMPTZ, 278 TokenType.WINDOW, 279 *TYPE_TOKENS, 280 *SUBQUERY_PREDICATES, 281 } 282 283 CONJUNCTION = { 284 TokenType.AND: exp.And, 285 TokenType.OR: exp.Or, 286 } 287 288 EQUALITY = { 289 TokenType.EQ: exp.EQ, 290 TokenType.NEQ: exp.NEQ, 291 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 292 } 293 294 COMPARISON = { 295 TokenType.GT: exp.GT, 296 TokenType.GTE: exp.GTE, 297 TokenType.LT: exp.LT, 298 TokenType.LTE: exp.LTE, 299 } 300 301 BITWISE = { 302 TokenType.AMP: exp.BitwiseAnd, 303 TokenType.CARET: exp.BitwiseXor, 304 TokenType.PIPE: exp.BitwiseOr, 305 TokenType.DPIPE: exp.DPipe, 306 } 307 308 TERM = { 309 TokenType.DASH: exp.Sub, 310 TokenType.PLUS: exp.Add, 311 TokenType.MOD: exp.Mod, 312 TokenType.COLLATE: exp.Collate, 313 } 314 315 FACTOR = { 316 TokenType.DIV: exp.IntDiv, 317 TokenType.LR_ARROW: exp.Distance, 318 TokenType.SLASH: exp.Div, 319 TokenType.STAR: exp.Mul, 320 } 321 322 TIMESTAMPS = { 323 TokenType.TIME, 324 TokenType.TIMESTAMP, 325 TokenType.TIMESTAMPTZ, 326 TokenType.TIMESTAMPLTZ, 327 } 328 329 SET_OPERATIONS = { 330 TokenType.UNION, 331 TokenType.INTERSECT, 332 TokenType.EXCEPT, 333 } 334 335 JOIN_SIDES = { 336 TokenType.LEFT, 337 TokenType.RIGHT, 338 TokenType.FULL, 339 } 340 341 JOIN_KINDS = { 342 TokenType.INNER, 343 TokenType.OUTER, 344 TokenType.CROSS, 345 TokenType.SEMI, 346 TokenType.ANTI, 347 } 348 349 LAMBDAS = { 350 TokenType.ARROW: lambda self, expressions: self.expression( 351 exp.Lambda, 352 this=self._parse_conjunction().transform( 353 self._replace_lambda, {node.name for node in expressions} 354 ), 355 expressions=expressions, 356 ), 357 TokenType.FARROW: lambda self, expressions: self.expression( 358 exp.Kwarg, 359 this=exp.Var(this=expressions[0].name), 360 expression=self._parse_conjunction(), 361 ), 362 } 363 364 COLUMN_OPERATORS = { 365 TokenType.DOT: None, 366 TokenType.DCOLON: lambda self, this, to: self.expression( 367 exp.Cast, 368 this=this, 369 to=to, 370 ), 371 TokenType.ARROW: lambda self, this, path: self.expression( 372 exp.JSONExtract, 373 this=this, 374 expression=path, 375 ), 376 TokenType.DARROW: lambda self, this, path: self.expression( 377 exp.JSONExtractScalar, 378 this=this, 379 expression=path, 380 ), 381 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 382 exp.JSONBExtract, 383 this=this, 384 expression=path, 385 ), 386 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 387 exp.JSONBExtractScalar, 388 this=this, 389 expression=path, 390 ), 391 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 392 exp.JSONBContains, 393 this=this, 394 expression=key, 395 ), 396 } 397 398 EXPRESSION_PARSERS = { 399 exp.Column: lambda self: self._parse_column(), 400 exp.DataType: lambda self: self._parse_types(), 401 exp.From: lambda self: self._parse_from(), 402 exp.Group: lambda self: self._parse_group(), 403 exp.Identifier: lambda self: self._parse_id_var(), 404 exp.Lateral: lambda self: self._parse_lateral(), 405 exp.Join: lambda self: self._parse_join(), 406 exp.Order: lambda self: self._parse_order(), 407 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 408 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 409 exp.Lambda: lambda self: self._parse_lambda(), 410 exp.Limit: lambda self: self._parse_limit(), 411 exp.Offset: lambda self: self._parse_offset(), 412 exp.TableAlias: lambda self: self._parse_table_alias(), 413 exp.Table: lambda self: self._parse_table(), 414 exp.Condition: lambda self: self._parse_conjunction(), 415 exp.Expression: lambda self: self._parse_statement(), 416 exp.Properties: lambda self: self._parse_properties(), 417 exp.Where: lambda self: self._parse_where(), 418 exp.Ordered: lambda self: self._parse_ordered(), 419 exp.Having: lambda self: self._parse_having(), 420 exp.With: lambda self: self._parse_with(), 421 exp.Window: lambda self: self._parse_named_window(), 422 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 423 } 424 425 STATEMENT_PARSERS = { 426 TokenType.ALTER: lambda self: self._parse_alter(), 427 TokenType.BEGIN: lambda self: self._parse_transaction(), 428 TokenType.CACHE: lambda self: self._parse_cache(), 429 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 430 TokenType.CREATE: lambda self: self._parse_create(), 431 TokenType.DELETE: lambda self: self._parse_delete(), 432 TokenType.DESC: lambda self: self._parse_describe(), 433 TokenType.DESCRIBE: lambda self: self._parse_describe(), 434 TokenType.DROP: lambda self: self._parse_drop(), 435 TokenType.END: lambda self: self._parse_commit_or_rollback(), 436 TokenType.INSERT: lambda self: self._parse_insert(), 437 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), 438 TokenType.MERGE: lambda self: self._parse_merge(), 439 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 440 TokenType.UNCACHE: lambda self: self._parse_uncache(), 441 TokenType.UPDATE: lambda self: self._parse_update(), 442 TokenType.USE: lambda self: self.expression( 443 exp.Use, 444 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 445 and exp.Var(this=self._prev.text), 446 this=self._parse_table(schema=False), 447 ), 448 } 449 450 UNARY_PARSERS = { 451 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 452 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 453 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 454 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 455 } 456 457 PRIMARY_PARSERS = { 458 TokenType.STRING: lambda self, token: self.expression( 459 exp.Literal, this=token.text, is_string=True 460 ), 461 TokenType.NUMBER: lambda self, token: self.expression( 462 exp.Literal, this=token.text, is_string=False 463 ), 464 TokenType.STAR: lambda self, _: self.expression( 465 exp.Star, 466 **{"except": self._parse_except(), "replace": self._parse_replace()}, 467 ), 468 TokenType.NULL: lambda self, _: self.expression(exp.Null), 469 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 470 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 471 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 472 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 473 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 474 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 475 TokenType.NATIONAL: lambda self, token: self._parse_national(token), 476 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 477 } 478 479 PLACEHOLDER_PARSERS = { 480 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 481 TokenType.PARAMETER: lambda self: self._parse_parameter(), 482 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 483 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 484 else None, 485 } 486 487 RANGE_PARSERS = { 488 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 489 TokenType.GLOB: lambda self, this: self._parse_escape( 490 self.expression(exp.Glob, this=this, expression=self._parse_bitwise()) 491 ), 492 TokenType.IN: lambda self, this: self._parse_in(this), 493 TokenType.IS: lambda self, this: self._parse_is(this), 494 TokenType.LIKE: lambda self, this: self._parse_escape( 495 self.expression(exp.Like, this=this, expression=self._parse_bitwise()) 496 ), 497 TokenType.ILIKE: lambda self, this: self._parse_escape( 498 self.expression(exp.ILike, this=this, expression=self._parse_bitwise()) 499 ), 500 TokenType.IRLIKE: lambda self, this: self.expression( 501 exp.RegexpILike, this=this, expression=self._parse_bitwise() 502 ), 503 TokenType.RLIKE: lambda self, this: self.expression( 504 exp.RegexpLike, this=this, expression=self._parse_bitwise() 505 ), 506 TokenType.SIMILAR_TO: lambda self, this: self.expression( 507 exp.SimilarTo, this=this, expression=self._parse_bitwise() 508 ), 509 } 510 511 PROPERTY_PARSERS = { 512 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 513 "CHARACTER SET": lambda self: self._parse_character_set(), 514 "CLUSTER BY": lambda self: self.expression( 515 exp.Cluster, expressions=self._parse_csv(self._parse_ordered) 516 ), 517 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 518 "PARTITION BY": lambda self: self._parse_partitioned_by(), 519 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 520 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 521 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 522 "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 523 "DISTKEY": lambda self: self._parse_distkey(), 524 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 525 "SORTKEY": lambda self: self._parse_sortkey(), 526 "LIKE": lambda self: self._parse_create_like(), 527 "RETURNS": lambda self: self._parse_returns(), 528 "ROW": lambda self: self._parse_row(), 529 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 530 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 531 "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 532 "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 533 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 534 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 535 "DETERMINISTIC": lambda self: self.expression( 536 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 537 ), 538 "IMMUTABLE": lambda self: self.expression( 539 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 540 ), 541 "STABLE": lambda self: self.expression( 542 exp.VolatilityProperty, this=exp.Literal.string("STABLE") 543 ), 544 "VOLATILE": lambda self: self.expression( 545 exp.VolatilityProperty, this=exp.Literal.string("VOLATILE") 546 ), 547 "WITH": lambda self: self._parse_with_property(), 548 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 549 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), 550 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), 551 "BEFORE": lambda self: self._parse_journal( 552 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 553 ), 554 "JOURNAL": lambda self: self._parse_journal( 555 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 556 ), 557 "AFTER": lambda self: self._parse_afterjournal( 558 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 559 ), 560 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), 561 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), 562 "CHECKSUM": lambda self: self._parse_checksum(), 563 "FREESPACE": lambda self: self._parse_freespace(), 564 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( 565 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" 566 ), 567 "MIN": lambda self: self._parse_datablocksize(), 568 "MINIMUM": lambda self: self._parse_datablocksize(), 569 "MAX": lambda self: self._parse_datablocksize(), 570 "MAXIMUM": lambda self: self._parse_datablocksize(), 571 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( 572 default=self._prev.text.upper() == "DEFAULT" 573 ), 574 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 575 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 576 "DEFINER": lambda self: self._parse_definer(), 577 "LOCK": lambda self: self._parse_locking(), 578 "LOCKING": lambda self: self._parse_locking(), 579 } 580 581 CONSTRAINT_PARSERS = { 582 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 583 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 584 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 585 "CHARACTER SET": lambda self: self.expression( 586 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 587 ), 588 "CHECK": lambda self: self.expression( 589 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 590 ), 591 "COLLATE": lambda self: self.expression( 592 exp.CollateColumnConstraint, this=self._parse_var() 593 ), 594 "COMMENT": lambda self: self.expression( 595 exp.CommentColumnConstraint, this=self._parse_string() 596 ), 597 "DEFAULT": lambda self: self.expression( 598 exp.DefaultColumnConstraint, this=self._parse_bitwise() 599 ), 600 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 601 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 602 "FORMAT": lambda self: self.expression( 603 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 604 ), 605 "GENERATED": lambda self: self._parse_generated_as_identity(), 606 "IDENTITY": lambda self: self._parse_auto_increment(), 607 "LIKE": lambda self: self._parse_create_like(), 608 "NOT": lambda self: self._parse_not_constraint(), 609 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 610 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 611 "PRIMARY KEY": lambda self: self._parse_primary_key(), 612 "TITLE": lambda self: self.expression( 613 exp.TitleColumnConstraint, this=self._parse_var_or_string() 614 ), 615 "UNIQUE": lambda self: self._parse_unique(), 616 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 617 } 618 619 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 620 621 NO_PAREN_FUNCTION_PARSERS = { 622 TokenType.CASE: lambda self: self._parse_case(), 623 TokenType.IF: lambda self: self._parse_if(), 624 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 625 } 626 627 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 628 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 629 "TRY_CONVERT": lambda self: self._parse_convert(False), 630 "EXTRACT": lambda self: self._parse_extract(), 631 "POSITION": lambda self: self._parse_position(), 632 "SUBSTRING": lambda self: self._parse_substring(), 633 "TRIM": lambda self: self._parse_trim(), 634 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 635 "TRY_CAST": lambda self: self._parse_cast(False), 636 "STRING_AGG": lambda self: self._parse_string_agg(), 637 } 638 639 QUERY_MODIFIER_PARSERS = { 640 "match": lambda self: self._parse_match_recognize(), 641 "where": lambda self: self._parse_where(), 642 "group": lambda self: self._parse_group(), 643 "having": lambda self: self._parse_having(), 644 "qualify": lambda self: self._parse_qualify(), 645 "windows": lambda self: self._parse_window_clause(), 646 "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute), 647 "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 648 "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 649 "order": lambda self: self._parse_order(), 650 "limit": lambda self: self._parse_limit(), 651 "offset": lambda self: self._parse_offset(), 652 "lock": lambda self: self._parse_lock(), 653 } 654 655 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 656 SET_PARSERS: t.Dict[str, t.Callable] = {} 657 658 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 659 660 CREATABLES = { 661 TokenType.COLUMN, 662 TokenType.FUNCTION, 663 TokenType.INDEX, 664 TokenType.PROCEDURE, 665 TokenType.SCHEMA, 666 TokenType.TABLE, 667 TokenType.VIEW, 668 } 669 670 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 671 672 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 673 674 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 675 676 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 677 678 STRICT_CAST = True 679 680 __slots__ = ( 681 "error_level", 682 "error_message_context", 683 "sql", 684 "errors", 685 "index_offset", 686 "unnest_column_only", 687 "alias_post_tablesample", 688 "max_errors", 689 "null_ordering", 690 "_tokens", 691 "_index", 692 "_curr", 693 "_next", 694 "_prev", 695 "_prev_comments", 696 "_show_trie", 697 "_set_trie", 698 ) 699 700 def __init__( 701 self, 702 error_level: t.Optional[ErrorLevel] = None, 703 error_message_context: int = 100, 704 index_offset: int = 0, 705 unnest_column_only: bool = False, 706 alias_post_tablesample: bool = False, 707 max_errors: int = 3, 708 null_ordering: t.Optional[str] = None, 709 ): 710 self.error_level = error_level or ErrorLevel.IMMEDIATE 711 self.error_message_context = error_message_context 712 self.index_offset = index_offset 713 self.unnest_column_only = unnest_column_only 714 self.alias_post_tablesample = alias_post_tablesample 715 self.max_errors = max_errors 716 self.null_ordering = null_ordering 717 self.reset() 718 719 def reset(self): 720 self.sql = "" 721 self.errors = [] 722 self._tokens = [] 723 self._index = 0 724 self._curr = None 725 self._next = None 726 self._prev = None 727 self._prev_comments = None 728 729 def parse( 730 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 731 ) -> t.List[t.Optional[exp.Expression]]: 732 """ 733 Parses a list of tokens and returns a list of syntax trees, one tree 734 per parsed SQL statement. 735 736 Args: 737 raw_tokens: the list of tokens. 738 sql: the original SQL string, used to produce helpful debug messages. 739 740 Returns: 741 The list of syntax trees. 742 """ 743 return self._parse( 744 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 745 ) 746 747 def parse_into( 748 self, 749 expression_types: exp.IntoType, 750 raw_tokens: t.List[Token], 751 sql: t.Optional[str] = None, 752 ) -> t.List[t.Optional[exp.Expression]]: 753 """ 754 Parses a list of tokens into a given Expression type. If a collection of Expression 755 types is given instead, this method will try to parse the token list into each one 756 of them, stopping at the first for which the parsing succeeds. 757 758 Args: 759 expression_types: the expression type(s) to try and parse the token list into. 760 raw_tokens: the list of tokens. 761 sql: the original SQL string, used to produce helpful debug messages. 762 763 Returns: 764 The target Expression. 765 """ 766 errors = [] 767 for expression_type in ensure_collection(expression_types): 768 parser = self.EXPRESSION_PARSERS.get(expression_type) 769 if not parser: 770 raise TypeError(f"No parser registered for {expression_type}") 771 try: 772 return self._parse(parser, raw_tokens, sql) 773 except ParseError as e: 774 e.errors[0]["into_expression"] = expression_type 775 errors.append(e) 776 raise ParseError( 777 f"Failed to parse into {expression_types}", 778 errors=merge_errors(errors), 779 ) from errors[-1] 780 781 def _parse( 782 self, 783 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 784 raw_tokens: t.List[Token], 785 sql: t.Optional[str] = None, 786 ) -> t.List[t.Optional[exp.Expression]]: 787 self.reset() 788 self.sql = sql or "" 789 total = len(raw_tokens) 790 chunks: t.List[t.List[Token]] = [[]] 791 792 for i, token in enumerate(raw_tokens): 793 if token.token_type == TokenType.SEMICOLON: 794 if i < total - 1: 795 chunks.append([]) 796 else: 797 chunks[-1].append(token) 798 799 expressions = [] 800 801 for tokens in chunks: 802 self._index = -1 803 self._tokens = tokens 804 self._advance() 805 806 expressions.append(parse_method(self)) 807 808 if self._index < len(self._tokens): 809 self.raise_error("Invalid expression / Unexpected token") 810 811 self.check_errors() 812 813 return expressions 814 815 def check_errors(self) -> None: 816 """ 817 Logs or raises any found errors, depending on the chosen error level setting. 818 """ 819 if self.error_level == ErrorLevel.WARN: 820 for error in self.errors: 821 logger.error(str(error)) 822 elif self.error_level == ErrorLevel.RAISE and self.errors: 823 raise ParseError( 824 concat_messages(self.errors, self.max_errors), 825 errors=merge_errors(self.errors), 826 ) 827 828 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 829 """ 830 Appends an error in the list of recorded errors or raises it, depending on the chosen 831 error level setting. 832 """ 833 token = token or self._curr or self._prev or Token.string("") 834 start = self._find_token(token) 835 end = start + len(token.text) 836 start_context = self.sql[max(start - self.error_message_context, 0) : start] 837 highlight = self.sql[start:end] 838 end_context = self.sql[end : end + self.error_message_context] 839 840 error = ParseError.new( 841 f"{message}. Line {token.line}, Col: {token.col}.\n" 842 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 843 description=message, 844 line=token.line, 845 col=token.col, 846 start_context=start_context, 847 highlight=highlight, 848 end_context=end_context, 849 ) 850 851 if self.error_level == ErrorLevel.IMMEDIATE: 852 raise error 853 854 self.errors.append(error) 855 856 def expression( 857 self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs 858 ) -> exp.Expression: 859 """ 860 Creates a new, validated Expression. 861 862 Args: 863 exp_class: the expression class to instantiate. 864 comments: an optional list of comments to attach to the expression. 865 kwargs: the arguments to set for the expression along with their respective values. 866 867 Returns: 868 The target expression. 869 """ 870 instance = exp_class(**kwargs) 871 if self._prev_comments: 872 instance.comments = self._prev_comments 873 self._prev_comments = None 874 if comments: 875 instance.comments = comments 876 self.validate_expression(instance) 877 return instance 878 879 def validate_expression( 880 self, expression: exp.Expression, args: t.Optional[t.List] = None 881 ) -> None: 882 """ 883 Validates an already instantiated expression, making sure that all its mandatory arguments 884 are set. 885 886 Args: 887 expression: the expression to validate. 888 args: an optional list of items that was used to instantiate the expression, if it's a Func. 889 """ 890 if self.error_level == ErrorLevel.IGNORE: 891 return 892 893 for error_message in expression.error_messages(args): 894 self.raise_error(error_message) 895 896 def _find_sql(self, start: Token, end: Token) -> str: 897 return self.sql[self._find_token(start) : self._find_token(end) + len(end.text)] 898 899 def _find_token(self, token: Token) -> int: 900 line = 1 901 col = 1 902 index = 0 903 904 while line < token.line or col < token.col: 905 if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK: 906 line += 1 907 col = 1 908 else: 909 col += 1 910 index += 1 911 912 return index 913 914 def _advance(self, times: int = 1) -> None: 915 self._index += times 916 self._curr = seq_get(self._tokens, self._index) 917 self._next = seq_get(self._tokens, self._index + 1) 918 if self._index > 0: 919 self._prev = self._tokens[self._index - 1] 920 self._prev_comments = self._prev.comments 921 else: 922 self._prev = None 923 self._prev_comments = None 924 925 def _retreat(self, index: int) -> None: 926 self._advance(index - self._index) 927 928 def _parse_command(self) -> exp.Expression: 929 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 930 931 def _parse_statement(self) -> t.Optional[exp.Expression]: 932 if self._curr is None: 933 return None 934 935 if self._match_set(self.STATEMENT_PARSERS): 936 return self.STATEMENT_PARSERS[self._prev.token_type](self) 937 938 if self._match_set(Tokenizer.COMMANDS): 939 return self._parse_command() 940 941 expression = self._parse_expression() 942 expression = self._parse_set_operations(expression) if expression else self._parse_select() 943 944 self._parse_query_modifiers(expression) 945 return expression 946 947 def _parse_drop(self, default_kind: t.Optional[str] = None) -> t.Optional[exp.Expression]: 948 start = self._prev 949 temporary = self._match(TokenType.TEMPORARY) 950 materialized = self._match(TokenType.MATERIALIZED) 951 kind = self._match_set(self.CREATABLES) and self._prev.text 952 if not kind: 953 if default_kind: 954 kind = default_kind 955 else: 956 return self._parse_as_command(start) 957 958 return self.expression( 959 exp.Drop, 960 exists=self._parse_exists(), 961 this=self._parse_table(schema=True), 962 kind=kind, 963 temporary=temporary, 964 materialized=materialized, 965 cascade=self._match(TokenType.CASCADE), 966 ) 967 968 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 969 return ( 970 self._match(TokenType.IF) 971 and (not not_ or self._match(TokenType.NOT)) 972 and self._match(TokenType.EXISTS) 973 ) 974 975 def _parse_create(self) -> t.Optional[exp.Expression]: 976 start = self._prev 977 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 978 TokenType.OR, TokenType.REPLACE 979 ) 980 set_ = self._match(TokenType.SET) # Teradata 981 multiset = self._match_text_seq("MULTISET") # Teradata 982 global_temporary = self._match_text_seq("GLOBAL", "TEMPORARY") # Teradata 983 volatile = self._match(TokenType.VOLATILE) # Teradata 984 temporary = self._match(TokenType.TEMPORARY) 985 transient = self._match_text_seq("TRANSIENT") 986 external = self._match_text_seq("EXTERNAL") 987 unique = self._match(TokenType.UNIQUE) 988 materialized = self._match(TokenType.MATERIALIZED) 989 990 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 991 self._match(TokenType.TABLE) 992 993 properties = None 994 create_token = self._match_set(self.CREATABLES) and self._prev 995 996 if not create_token: 997 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 998 create_token = self._match_set(self.CREATABLES) and self._prev 999 1000 if not properties or not create_token: 1001 return self._parse_as_command(start) 1002 1003 exists = self._parse_exists(not_=True) 1004 this = None 1005 expression = None 1006 data = None 1007 statistics = None 1008 no_primary_index = None 1009 indexes = None 1010 no_schema_binding = None 1011 begin = None 1012 1013 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1014 this = self._parse_user_defined_function(kind=create_token.token_type) 1015 properties = self._parse_properties() 1016 1017 self._match(TokenType.ALIAS) 1018 begin = self._match(TokenType.BEGIN) 1019 return_ = self._match_text_seq("RETURN") 1020 expression = self._parse_statement() 1021 1022 if return_: 1023 expression = self.expression(exp.Return, this=expression) 1024 elif create_token.token_type == TokenType.INDEX: 1025 this = self._parse_index() 1026 elif create_token.token_type in ( 1027 TokenType.TABLE, 1028 TokenType.VIEW, 1029 TokenType.SCHEMA, 1030 ): 1031 table_parts = self._parse_table_parts(schema=True) 1032 1033 # exp.Properties.Location.POST_NAME 1034 if self._match(TokenType.COMMA): 1035 temp_properties = self._parse_properties(before=True) 1036 if properties and temp_properties: 1037 properties.expressions.append(temp_properties.expressions) 1038 elif temp_properties: 1039 properties = temp_properties 1040 1041 this = self._parse_schema(this=table_parts) 1042 1043 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1044 temp_properties = self._parse_properties() 1045 if properties and temp_properties: 1046 properties.expressions.append(temp_properties.expressions) 1047 elif temp_properties: 1048 properties = temp_properties 1049 1050 self._match(TokenType.ALIAS) 1051 1052 # exp.Properties.Location.POST_ALIAS 1053 if not ( 1054 self._match(TokenType.SELECT, advance=False) 1055 or self._match(TokenType.WITH, advance=False) 1056 or self._match(TokenType.L_PAREN, advance=False) 1057 ): 1058 temp_properties = self._parse_properties() 1059 if properties and temp_properties: 1060 properties.expressions.append(temp_properties.expressions) 1061 elif temp_properties: 1062 properties = temp_properties 1063 1064 expression = self._parse_ddl_select() 1065 1066 if create_token.token_type == TokenType.TABLE: 1067 if self._match_text_seq("WITH", "DATA"): 1068 data = True 1069 elif self._match_text_seq("WITH", "NO", "DATA"): 1070 data = False 1071 1072 if self._match_text_seq("AND", "STATISTICS"): 1073 statistics = True 1074 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1075 statistics = False 1076 1077 no_primary_index = self._match_text_seq("NO", "PRIMARY", "INDEX") 1078 1079 indexes = [] 1080 while True: 1081 index = self._parse_create_table_index() 1082 1083 # exp.Properties.Location.POST_INDEX 1084 if self._match(TokenType.PARTITION_BY, advance=False): 1085 temp_properties = self._parse_properties() 1086 if properties and temp_properties: 1087 properties.expressions.append(temp_properties.expressions) 1088 elif temp_properties: 1089 properties = temp_properties 1090 1091 if not index: 1092 break 1093 else: 1094 indexes.append(index) 1095 elif create_token.token_type == TokenType.VIEW: 1096 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1097 no_schema_binding = True 1098 1099 return self.expression( 1100 exp.Create, 1101 this=this, 1102 kind=create_token.text, 1103 expression=expression, 1104 set=set_, 1105 multiset=multiset, 1106 global_temporary=global_temporary, 1107 volatile=volatile, 1108 exists=exists, 1109 properties=properties, 1110 temporary=temporary, 1111 transient=transient, 1112 external=external, 1113 replace=replace, 1114 unique=unique, 1115 materialized=materialized, 1116 data=data, 1117 statistics=statistics, 1118 no_primary_index=no_primary_index, 1119 indexes=indexes, 1120 no_schema_binding=no_schema_binding, 1121 begin=begin, 1122 ) 1123 1124 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1125 self._match(TokenType.COMMA) 1126 1127 # parsers look to _prev for no/dual/default, so need to consume first 1128 self._match_text_seq("NO") 1129 self._match_text_seq("DUAL") 1130 self._match_text_seq("DEFAULT") 1131 1132 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): 1133 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) 1134 1135 return None 1136 1137 def _parse_property(self) -> t.Optional[exp.Expression]: 1138 if self._match_texts(self.PROPERTY_PARSERS): 1139 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1140 1141 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1142 return self._parse_character_set(default=True) 1143 1144 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): 1145 return self._parse_sortkey(compound=True) 1146 1147 if self._match_text_seq("SQL", "SECURITY"): 1148 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1149 1150 assignment = self._match_pair( 1151 TokenType.VAR, TokenType.EQ, advance=False 1152 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1153 1154 if assignment: 1155 key = self._parse_var_or_string() 1156 self._match(TokenType.EQ) 1157 return self.expression(exp.Property, this=key, value=self._parse_column()) 1158 1159 return None 1160 1161 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1162 self._match(TokenType.EQ) 1163 self._match(TokenType.ALIAS) 1164 return self.expression( 1165 exp_class, 1166 this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1167 ) 1168 1169 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: 1170 properties = [] 1171 1172 while True: 1173 if before: 1174 identified_property = self._parse_property_before() 1175 else: 1176 identified_property = self._parse_property() 1177 1178 if not identified_property: 1179 break 1180 for p in ensure_collection(identified_property): 1181 properties.append(p) 1182 1183 if properties: 1184 return self.expression(exp.Properties, expressions=properties) 1185 1186 return None 1187 1188 def _parse_fallback(self, no=False) -> exp.Expression: 1189 self._match_text_seq("FALLBACK") 1190 return self.expression( 1191 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1192 ) 1193 1194 def _parse_with_property( 1195 self, 1196 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1197 if self._match(TokenType.L_PAREN, advance=False): 1198 return self._parse_wrapped_csv(self._parse_property) 1199 1200 if not self._next: 1201 return None 1202 1203 if self._next.text.upper() == "JOURNAL": 1204 return self._parse_withjournaltable() 1205 1206 return self._parse_withisolatedloading() 1207 1208 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1209 def _parse_definer(self) -> t.Optional[exp.Expression]: 1210 self._match(TokenType.EQ) 1211 1212 user = self._parse_id_var() 1213 self._match(TokenType.PARAMETER) 1214 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1215 1216 if not user or not host: 1217 return None 1218 1219 return exp.DefinerProperty(this=f"{user}@{host}") 1220 1221 def _parse_withjournaltable(self) -> exp.Expression: 1222 self._match_text_seq("WITH", "JOURNAL", "TABLE") 1223 self._match(TokenType.EQ) 1224 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1225 1226 def _parse_log(self, no=False) -> exp.Expression: 1227 self._match_text_seq("LOG") 1228 return self.expression(exp.LogProperty, no=no) 1229 1230 def _parse_journal(self, no=False, dual=False) -> exp.Expression: 1231 before = self._match_text_seq("BEFORE") 1232 self._match_text_seq("JOURNAL") 1233 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) 1234 1235 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: 1236 self._match_text_seq("NOT") 1237 self._match_text_seq("LOCAL") 1238 self._match_text_seq("AFTER", "JOURNAL") 1239 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) 1240 1241 def _parse_checksum(self) -> exp.Expression: 1242 self._match_text_seq("CHECKSUM") 1243 self._match(TokenType.EQ) 1244 1245 on = None 1246 if self._match(TokenType.ON): 1247 on = True 1248 elif self._match_text_seq("OFF"): 1249 on = False 1250 default = self._match(TokenType.DEFAULT) 1251 1252 return self.expression( 1253 exp.ChecksumProperty, 1254 on=on, 1255 default=default, 1256 ) 1257 1258 def _parse_freespace(self) -> exp.Expression: 1259 self._match_text_seq("FREESPACE") 1260 self._match(TokenType.EQ) 1261 return self.expression( 1262 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1263 ) 1264 1265 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: 1266 self._match_text_seq("MERGEBLOCKRATIO") 1267 if self._match(TokenType.EQ): 1268 return self.expression( 1269 exp.MergeBlockRatioProperty, 1270 this=self._parse_number(), 1271 percent=self._match(TokenType.PERCENT), 1272 ) 1273 else: 1274 return self.expression( 1275 exp.MergeBlockRatioProperty, 1276 no=no, 1277 default=default, 1278 ) 1279 1280 def _parse_datablocksize(self, default=None) -> exp.Expression: 1281 if default: 1282 self._match_text_seq("DATABLOCKSIZE") 1283 return self.expression(exp.DataBlocksizeProperty, default=True) 1284 elif self._match_texts(("MIN", "MINIMUM")): 1285 self._match_text_seq("DATABLOCKSIZE") 1286 return self.expression(exp.DataBlocksizeProperty, min=True) 1287 elif self._match_texts(("MAX", "MAXIMUM")): 1288 self._match_text_seq("DATABLOCKSIZE") 1289 return self.expression(exp.DataBlocksizeProperty, min=False) 1290 1291 self._match_text_seq("DATABLOCKSIZE") 1292 self._match(TokenType.EQ) 1293 size = self._parse_number() 1294 units = None 1295 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1296 units = self._prev.text 1297 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) 1298 1299 def _parse_blockcompression(self) -> exp.Expression: 1300 self._match_text_seq("BLOCKCOMPRESSION") 1301 self._match(TokenType.EQ) 1302 always = self._match_text_seq("ALWAYS") 1303 manual = self._match_text_seq("MANUAL") 1304 never = self._match_text_seq("NEVER") 1305 default = self._match_text_seq("DEFAULT") 1306 autotemp = None 1307 if self._match_text_seq("AUTOTEMP"): 1308 autotemp = self._parse_schema() 1309 1310 return self.expression( 1311 exp.BlockCompressionProperty, 1312 always=always, 1313 manual=manual, 1314 never=never, 1315 default=default, 1316 autotemp=autotemp, 1317 ) 1318 1319 def _parse_withisolatedloading(self) -> exp.Expression: 1320 self._match(TokenType.WITH) 1321 no = self._match_text_seq("NO") 1322 concurrent = self._match_text_seq("CONCURRENT") 1323 self._match_text_seq("ISOLATED", "LOADING") 1324 for_all = self._match_text_seq("FOR", "ALL") 1325 for_insert = self._match_text_seq("FOR", "INSERT") 1326 for_none = self._match_text_seq("FOR", "NONE") 1327 return self.expression( 1328 exp.IsolatedLoadingProperty, 1329 no=no, 1330 concurrent=concurrent, 1331 for_all=for_all, 1332 for_insert=for_insert, 1333 for_none=for_none, 1334 ) 1335 1336 def _parse_locking(self) -> exp.Expression: 1337 if self._match(TokenType.TABLE): 1338 kind = "TABLE" 1339 elif self._match(TokenType.VIEW): 1340 kind = "VIEW" 1341 elif self._match(TokenType.ROW): 1342 kind = "ROW" 1343 elif self._match_text_seq("DATABASE"): 1344 kind = "DATABASE" 1345 else: 1346 kind = None 1347 1348 if kind in ("DATABASE", "TABLE", "VIEW"): 1349 this = self._parse_table_parts() 1350 else: 1351 this = None 1352 1353 if self._match(TokenType.FOR): 1354 for_or_in = "FOR" 1355 elif self._match(TokenType.IN): 1356 for_or_in = "IN" 1357 else: 1358 for_or_in = None 1359 1360 if self._match_text_seq("ACCESS"): 1361 lock_type = "ACCESS" 1362 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1363 lock_type = "EXCLUSIVE" 1364 elif self._match_text_seq("SHARE"): 1365 lock_type = "SHARE" 1366 elif self._match_text_seq("READ"): 1367 lock_type = "READ" 1368 elif self._match_text_seq("WRITE"): 1369 lock_type = "WRITE" 1370 elif self._match_text_seq("CHECKSUM"): 1371 lock_type = "CHECKSUM" 1372 else: 1373 lock_type = None 1374 1375 override = self._match_text_seq("OVERRIDE") 1376 1377 return self.expression( 1378 exp.LockingProperty, 1379 this=this, 1380 kind=kind, 1381 for_or_in=for_or_in, 1382 lock_type=lock_type, 1383 override=override, 1384 ) 1385 1386 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1387 if self._match(TokenType.PARTITION_BY): 1388 return self._parse_csv(self._parse_conjunction) 1389 return [] 1390 1391 def _parse_partitioned_by(self) -> exp.Expression: 1392 self._match(TokenType.EQ) 1393 return self.expression( 1394 exp.PartitionedByProperty, 1395 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1396 ) 1397 1398 def _parse_distkey(self) -> exp.Expression: 1399 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1400 1401 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1402 table = self._parse_table(schema=True) 1403 options = [] 1404 while self._match_texts(("INCLUDING", "EXCLUDING")): 1405 this = self._prev.text.upper() 1406 id_var = self._parse_id_var() 1407 1408 if not id_var: 1409 return None 1410 1411 options.append( 1412 self.expression( 1413 exp.Property, 1414 this=this, 1415 value=exp.Var(this=id_var.this.upper()), 1416 ) 1417 ) 1418 return self.expression(exp.LikeProperty, this=table, expressions=options) 1419 1420 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1421 return self.expression( 1422 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1423 ) 1424 1425 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1426 self._match(TokenType.EQ) 1427 return self.expression( 1428 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1429 ) 1430 1431 def _parse_returns(self) -> exp.Expression: 1432 value: t.Optional[exp.Expression] 1433 is_table = self._match(TokenType.TABLE) 1434 1435 if is_table: 1436 if self._match(TokenType.LT): 1437 value = self.expression( 1438 exp.Schema, 1439 this="TABLE", 1440 expressions=self._parse_csv(self._parse_struct_kwargs), 1441 ) 1442 if not self._match(TokenType.GT): 1443 self.raise_error("Expecting >") 1444 else: 1445 value = self._parse_schema(exp.Var(this="TABLE")) 1446 else: 1447 value = self._parse_types() 1448 1449 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1450 1451 def _parse_describe(self) -> exp.Expression: 1452 kind = self._match_set(self.CREATABLES) and self._prev.text 1453 this = self._parse_table() 1454 1455 return self.expression(exp.Describe, this=this, kind=kind) 1456 1457 def _parse_insert(self) -> exp.Expression: 1458 overwrite = self._match(TokenType.OVERWRITE) 1459 local = self._match(TokenType.LOCAL) 1460 1461 this: t.Optional[exp.Expression] 1462 1463 alternative = None 1464 if self._match_text_seq("DIRECTORY"): 1465 this = self.expression( 1466 exp.Directory, 1467 this=self._parse_var_or_string(), 1468 local=local, 1469 row_format=self._parse_row_format(match_row=True), 1470 ) 1471 else: 1472 if self._match(TokenType.OR): 1473 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1474 1475 self._match(TokenType.INTO) 1476 self._match(TokenType.TABLE) 1477 this = self._parse_table(schema=True) 1478 1479 return self.expression( 1480 exp.Insert, 1481 this=this, 1482 exists=self._parse_exists(), 1483 partition=self._parse_partition(), 1484 expression=self._parse_ddl_select(), 1485 overwrite=overwrite, 1486 alternative=alternative, 1487 ) 1488 1489 def _parse_row(self) -> t.Optional[exp.Expression]: 1490 if not self._match(TokenType.FORMAT): 1491 return None 1492 return self._parse_row_format() 1493 1494 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1495 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1496 return None 1497 1498 if self._match_text_seq("SERDE"): 1499 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1500 1501 self._match_text_seq("DELIMITED") 1502 1503 kwargs = {} 1504 1505 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1506 kwargs["fields"] = self._parse_string() 1507 if self._match_text_seq("ESCAPED", "BY"): 1508 kwargs["escaped"] = self._parse_string() 1509 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1510 kwargs["collection_items"] = self._parse_string() 1511 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1512 kwargs["map_keys"] = self._parse_string() 1513 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1514 kwargs["lines"] = self._parse_string() 1515 if self._match_text_seq("NULL", "DEFINED", "AS"): 1516 kwargs["null"] = self._parse_string() 1517 1518 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1519 1520 def _parse_load_data(self) -> exp.Expression: 1521 local = self._match(TokenType.LOCAL) 1522 self._match_text_seq("INPATH") 1523 inpath = self._parse_string() 1524 overwrite = self._match(TokenType.OVERWRITE) 1525 self._match_pair(TokenType.INTO, TokenType.TABLE) 1526 1527 return self.expression( 1528 exp.LoadData, 1529 this=self._parse_table(schema=True), 1530 local=local, 1531 overwrite=overwrite, 1532 inpath=inpath, 1533 partition=self._parse_partition(), 1534 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1535 serde=self._match_text_seq("SERDE") and self._parse_string(), 1536 ) 1537 1538 def _parse_delete(self) -> exp.Expression: 1539 self._match(TokenType.FROM) 1540 1541 return self.expression( 1542 exp.Delete, 1543 this=self._parse_table(schema=True), 1544 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1545 where=self._parse_where(), 1546 ) 1547 1548 def _parse_update(self) -> exp.Expression: 1549 return self.expression( 1550 exp.Update, 1551 **{ # type: ignore 1552 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1553 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1554 "from": self._parse_from(), 1555 "where": self._parse_where(), 1556 }, 1557 ) 1558 1559 def _parse_uncache(self) -> exp.Expression: 1560 if not self._match(TokenType.TABLE): 1561 self.raise_error("Expecting TABLE after UNCACHE") 1562 1563 return self.expression( 1564 exp.Uncache, 1565 exists=self._parse_exists(), 1566 this=self._parse_table(schema=True), 1567 ) 1568 1569 def _parse_cache(self) -> exp.Expression: 1570 lazy = self._match(TokenType.LAZY) 1571 self._match(TokenType.TABLE) 1572 table = self._parse_table(schema=True) 1573 options = [] 1574 1575 if self._match(TokenType.OPTIONS): 1576 self._match_l_paren() 1577 k = self._parse_string() 1578 self._match(TokenType.EQ) 1579 v = self._parse_string() 1580 options = [k, v] 1581 self._match_r_paren() 1582 1583 self._match(TokenType.ALIAS) 1584 return self.expression( 1585 exp.Cache, 1586 this=table, 1587 lazy=lazy, 1588 options=options, 1589 expression=self._parse_select(nested=True), 1590 ) 1591 1592 def _parse_partition(self) -> t.Optional[exp.Expression]: 1593 if not self._match(TokenType.PARTITION): 1594 return None 1595 1596 return self.expression( 1597 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1598 ) 1599 1600 def _parse_value(self) -> exp.Expression: 1601 if self._match(TokenType.L_PAREN): 1602 expressions = self._parse_csv(self._parse_conjunction) 1603 self._match_r_paren() 1604 return self.expression(exp.Tuple, expressions=expressions) 1605 1606 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1607 # Source: https://prestodb.io/docs/current/sql/values.html 1608 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1609 1610 def _parse_select( 1611 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1612 ) -> t.Optional[exp.Expression]: 1613 cte = self._parse_with() 1614 if cte: 1615 this = self._parse_statement() 1616 1617 if not this: 1618 self.raise_error("Failed to parse any statement following CTE") 1619 return cte 1620 1621 if "with" in this.arg_types: 1622 this.set("with", cte) 1623 else: 1624 self.raise_error(f"{this.key} does not support CTE") 1625 this = cte 1626 elif self._match(TokenType.SELECT): 1627 comments = self._prev_comments 1628 1629 hint = self._parse_hint() 1630 all_ = self._match(TokenType.ALL) 1631 distinct = self._match(TokenType.DISTINCT) 1632 1633 if distinct: 1634 distinct = self.expression( 1635 exp.Distinct, 1636 on=self._parse_value() if self._match(TokenType.ON) else None, 1637 ) 1638 1639 if all_ and distinct: 1640 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1641 1642 limit = self._parse_limit(top=True) 1643 expressions = self._parse_csv(self._parse_expression) 1644 1645 this = self.expression( 1646 exp.Select, 1647 hint=hint, 1648 distinct=distinct, 1649 expressions=expressions, 1650 limit=limit, 1651 ) 1652 this.comments = comments 1653 1654 into = self._parse_into() 1655 if into: 1656 this.set("into", into) 1657 1658 from_ = self._parse_from() 1659 if from_: 1660 this.set("from", from_) 1661 1662 self._parse_query_modifiers(this) 1663 elif (table or nested) and self._match(TokenType.L_PAREN): 1664 this = self._parse_table() if table else self._parse_select(nested=True) 1665 self._parse_query_modifiers(this) 1666 this = self._parse_set_operations(this) 1667 self._match_r_paren() 1668 1669 # early return so that subquery unions aren't parsed again 1670 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1671 # Union ALL should be a property of the top select node, not the subquery 1672 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1673 elif self._match(TokenType.VALUES): 1674 this = self.expression( 1675 exp.Values, 1676 expressions=self._parse_csv(self._parse_value), 1677 alias=self._parse_table_alias(), 1678 ) 1679 else: 1680 this = None 1681 1682 return self._parse_set_operations(this) 1683 1684 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1685 if not skip_with_token and not self._match(TokenType.WITH): 1686 return None 1687 1688 recursive = self._match(TokenType.RECURSIVE) 1689 1690 expressions = [] 1691 while True: 1692 expressions.append(self._parse_cte()) 1693 1694 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1695 break 1696 else: 1697 self._match(TokenType.WITH) 1698 1699 return self.expression(exp.With, expressions=expressions, recursive=recursive) 1700 1701 def _parse_cte(self) -> exp.Expression: 1702 alias = self._parse_table_alias() 1703 if not alias or not alias.this: 1704 self.raise_error("Expected CTE to have alias") 1705 1706 self._match(TokenType.ALIAS) 1707 1708 return self.expression( 1709 exp.CTE, 1710 this=self._parse_wrapped(self._parse_statement), 1711 alias=alias, 1712 ) 1713 1714 def _parse_table_alias( 1715 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1716 ) -> t.Optional[exp.Expression]: 1717 any_token = self._match(TokenType.ALIAS) 1718 alias = self._parse_id_var( 1719 any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 1720 ) 1721 index = self._index 1722 1723 if self._match(TokenType.L_PAREN): 1724 columns = self._parse_csv(self._parse_function_parameter) 1725 self._match_r_paren() if columns else self._retreat(index) 1726 else: 1727 columns = None 1728 1729 if not alias and not columns: 1730 return None 1731 1732 return self.expression(exp.TableAlias, this=alias, columns=columns) 1733 1734 def _parse_subquery( 1735 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1736 ) -> exp.Expression: 1737 return self.expression( 1738 exp.Subquery, 1739 this=this, 1740 pivots=self._parse_pivots(), 1741 alias=self._parse_table_alias() if parse_alias else None, 1742 ) 1743 1744 def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None: 1745 if not isinstance(this, self.MODIFIABLES): 1746 return 1747 1748 table = isinstance(this, exp.Table) 1749 1750 while True: 1751 lateral = self._parse_lateral() 1752 join = self._parse_join() 1753 comma = None if table else self._match(TokenType.COMMA) 1754 if lateral: 1755 this.append("laterals", lateral) 1756 if join: 1757 this.append("joins", join) 1758 if comma: 1759 this.args["from"].append("expressions", self._parse_table()) 1760 if not (lateral or join or comma): 1761 break 1762 1763 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1764 expression = parser(self) 1765 1766 if expression: 1767 this.set(key, expression) 1768 1769 def _parse_hint(self) -> t.Optional[exp.Expression]: 1770 if self._match(TokenType.HINT): 1771 hints = self._parse_csv(self._parse_function) 1772 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 1773 self.raise_error("Expected */ after HINT") 1774 return self.expression(exp.Hint, expressions=hints) 1775 1776 return None 1777 1778 def _parse_into(self) -> t.Optional[exp.Expression]: 1779 if not self._match(TokenType.INTO): 1780 return None 1781 1782 temp = self._match(TokenType.TEMPORARY) 1783 unlogged = self._match(TokenType.UNLOGGED) 1784 self._match(TokenType.TABLE) 1785 1786 return self.expression( 1787 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 1788 ) 1789 1790 def _parse_from(self) -> t.Optional[exp.Expression]: 1791 if not self._match(TokenType.FROM): 1792 return None 1793 1794 return self.expression( 1795 exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table) 1796 ) 1797 1798 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 1799 if not self._match(TokenType.MATCH_RECOGNIZE): 1800 return None 1801 self._match_l_paren() 1802 1803 partition = self._parse_partition_by() 1804 order = self._parse_order() 1805 measures = ( 1806 self._parse_alias(self._parse_conjunction()) 1807 if self._match_text_seq("MEASURES") 1808 else None 1809 ) 1810 1811 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 1812 rows = exp.Var(this="ONE ROW PER MATCH") 1813 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 1814 text = "ALL ROWS PER MATCH" 1815 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 1816 text += f" SHOW EMPTY MATCHES" 1817 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 1818 text += f" OMIT EMPTY MATCHES" 1819 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 1820 text += f" WITH UNMATCHED ROWS" 1821 rows = exp.Var(this=text) 1822 else: 1823 rows = None 1824 1825 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 1826 text = "AFTER MATCH SKIP" 1827 if self._match_text_seq("PAST", "LAST", "ROW"): 1828 text += f" PAST LAST ROW" 1829 elif self._match_text_seq("TO", "NEXT", "ROW"): 1830 text += f" TO NEXT ROW" 1831 elif self._match_text_seq("TO", "FIRST"): 1832 text += f" TO FIRST {self._advance_any().text}" # type: ignore 1833 elif self._match_text_seq("TO", "LAST"): 1834 text += f" TO LAST {self._advance_any().text}" # type: ignore 1835 after = exp.Var(this=text) 1836 else: 1837 after = None 1838 1839 if self._match_text_seq("PATTERN"): 1840 self._match_l_paren() 1841 1842 if not self._curr: 1843 self.raise_error("Expecting )", self._curr) 1844 1845 paren = 1 1846 start = self._curr 1847 1848 while self._curr and paren > 0: 1849 if self._curr.token_type == TokenType.L_PAREN: 1850 paren += 1 1851 if self._curr.token_type == TokenType.R_PAREN: 1852 paren -= 1 1853 end = self._prev 1854 self._advance() 1855 if paren > 0: 1856 self.raise_error("Expecting )", self._curr) 1857 pattern = exp.Var(this=self._find_sql(start, end)) 1858 else: 1859 pattern = None 1860 1861 define = ( 1862 self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None 1863 ) 1864 self._match_r_paren() 1865 1866 return self.expression( 1867 exp.MatchRecognize, 1868 partition_by=partition, 1869 order=order, 1870 measures=measures, 1871 rows=rows, 1872 after=after, 1873 pattern=pattern, 1874 define=define, 1875 ) 1876 1877 def _parse_lateral(self) -> t.Optional[exp.Expression]: 1878 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 1879 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 1880 1881 if outer_apply or cross_apply: 1882 this = self._parse_select(table=True) 1883 view = None 1884 outer = not cross_apply 1885 elif self._match(TokenType.LATERAL): 1886 this = self._parse_select(table=True) 1887 view = self._match(TokenType.VIEW) 1888 outer = self._match(TokenType.OUTER) 1889 else: 1890 return None 1891 1892 if not this: 1893 this = self._parse_function() or self._parse_id_var(any_token=False) 1894 while self._match(TokenType.DOT): 1895 this = exp.Dot( 1896 this=this, 1897 expression=self._parse_function() or self._parse_id_var(any_token=False), 1898 ) 1899 1900 table_alias: t.Optional[exp.Expression] 1901 1902 if view: 1903 table = self._parse_id_var(any_token=False) 1904 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 1905 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 1906 else: 1907 table_alias = self._parse_table_alias() 1908 1909 expression = self.expression( 1910 exp.Lateral, 1911 this=this, 1912 view=view, 1913 outer=outer, 1914 alias=table_alias, 1915 ) 1916 1917 if outer_apply or cross_apply: 1918 return self.expression(exp.Join, this=expression, side=None if cross_apply else "LEFT") 1919 1920 return expression 1921 1922 def _parse_join_side_and_kind( 1923 self, 1924 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 1925 return ( 1926 self._match(TokenType.NATURAL) and self._prev, 1927 self._match_set(self.JOIN_SIDES) and self._prev, 1928 self._match_set(self.JOIN_KINDS) and self._prev, 1929 ) 1930 1931 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 1932 natural, side, kind = self._parse_join_side_and_kind() 1933 1934 if not skip_join_token and not self._match(TokenType.JOIN): 1935 return None 1936 1937 kwargs: t.Dict[ 1938 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 1939 ] = {"this": self._parse_table()} 1940 1941 if natural: 1942 kwargs["natural"] = True 1943 if side: 1944 kwargs["side"] = side.text 1945 if kind: 1946 kwargs["kind"] = kind.text 1947 1948 if self._match(TokenType.ON): 1949 kwargs["on"] = self._parse_conjunction() 1950 elif self._match(TokenType.USING): 1951 kwargs["using"] = self._parse_wrapped_id_vars() 1952 1953 return self.expression(exp.Join, **kwargs) # type: ignore 1954 1955 def _parse_index(self) -> exp.Expression: 1956 index = self._parse_id_var() 1957 self._match(TokenType.ON) 1958 self._match(TokenType.TABLE) # hive 1959 1960 return self.expression( 1961 exp.Index, 1962 this=index, 1963 table=self.expression(exp.Table, this=self._parse_id_var()), 1964 columns=self._parse_expression(), 1965 ) 1966 1967 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: 1968 unique = self._match(TokenType.UNIQUE) 1969 primary = self._match_text_seq("PRIMARY") 1970 amp = self._match_text_seq("AMP") 1971 if not self._match(TokenType.INDEX): 1972 return None 1973 index = self._parse_id_var() 1974 columns = None 1975 if self._match(TokenType.L_PAREN, advance=False): 1976 columns = self._parse_wrapped_csv(self._parse_column) 1977 return self.expression( 1978 exp.Index, 1979 this=index, 1980 columns=columns, 1981 unique=unique, 1982 primary=primary, 1983 amp=amp, 1984 ) 1985 1986 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 1987 catalog = None 1988 db = None 1989 table = (not schema and self._parse_function()) or self._parse_id_var(any_token=False) 1990 1991 while self._match(TokenType.DOT): 1992 if catalog: 1993 # This allows nesting the table in arbitrarily many dot expressions if needed 1994 table = self.expression(exp.Dot, this=table, expression=self._parse_id_var()) 1995 else: 1996 catalog = db 1997 db = table 1998 table = self._parse_id_var() 1999 2000 if not table: 2001 self.raise_error(f"Expected table name but got {self._curr}") 2002 2003 return self.expression( 2004 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2005 ) 2006 2007 def _parse_table( 2008 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2009 ) -> t.Optional[exp.Expression]: 2010 lateral = self._parse_lateral() 2011 2012 if lateral: 2013 return lateral 2014 2015 unnest = self._parse_unnest() 2016 2017 if unnest: 2018 return unnest 2019 2020 values = self._parse_derived_table_values() 2021 2022 if values: 2023 return values 2024 2025 subquery = self._parse_select(table=True) 2026 2027 if subquery: 2028 return subquery 2029 2030 this = self._parse_table_parts(schema=schema) 2031 2032 if schema: 2033 return self._parse_schema(this=this) 2034 2035 if self.alias_post_tablesample: 2036 table_sample = self._parse_table_sample() 2037 2038 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2039 2040 if alias: 2041 this.set("alias", alias) 2042 2043 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2044 this.set( 2045 "hints", 2046 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2047 ) 2048 self._match_r_paren() 2049 2050 if not self.alias_post_tablesample: 2051 table_sample = self._parse_table_sample() 2052 2053 if table_sample: 2054 table_sample.set("this", this) 2055 this = table_sample 2056 2057 return this 2058 2059 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2060 if not self._match(TokenType.UNNEST): 2061 return None 2062 2063 expressions = self._parse_wrapped_csv(self._parse_column) 2064 ordinality = bool(self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY)) 2065 alias = self._parse_table_alias() 2066 2067 if alias and self.unnest_column_only: 2068 if alias.args.get("columns"): 2069 self.raise_error("Unexpected extra column alias in unnest.") 2070 alias.set("columns", [alias.this]) 2071 alias.set("this", None) 2072 2073 offset = None 2074 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2075 self._match(TokenType.ALIAS) 2076 offset = self._parse_conjunction() 2077 2078 return self.expression( 2079 exp.Unnest, 2080 expressions=expressions, 2081 ordinality=ordinality, 2082 alias=alias, 2083 offset=offset, 2084 ) 2085 2086 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2087 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2088 if not is_derived and not self._match(TokenType.VALUES): 2089 return None 2090 2091 expressions = self._parse_csv(self._parse_value) 2092 2093 if is_derived: 2094 self._match_r_paren() 2095 2096 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2097 2098 def _parse_table_sample(self) -> t.Optional[exp.Expression]: 2099 if not self._match(TokenType.TABLE_SAMPLE): 2100 return None 2101 2102 method = self._parse_var() 2103 bucket_numerator = None 2104 bucket_denominator = None 2105 bucket_field = None 2106 percent = None 2107 rows = None 2108 size = None 2109 seed = None 2110 2111 self._match_l_paren() 2112 2113 if self._match(TokenType.BUCKET): 2114 bucket_numerator = self._parse_number() 2115 self._match(TokenType.OUT_OF) 2116 bucket_denominator = bucket_denominator = self._parse_number() 2117 self._match(TokenType.ON) 2118 bucket_field = self._parse_field() 2119 else: 2120 num = self._parse_number() 2121 2122 if self._match(TokenType.PERCENT): 2123 percent = num 2124 elif self._match(TokenType.ROWS): 2125 rows = num 2126 else: 2127 size = num 2128 2129 self._match_r_paren() 2130 2131 if self._match(TokenType.SEED): 2132 seed = self._parse_wrapped(self._parse_number) 2133 2134 return self.expression( 2135 exp.TableSample, 2136 method=method, 2137 bucket_numerator=bucket_numerator, 2138 bucket_denominator=bucket_denominator, 2139 bucket_field=bucket_field, 2140 percent=percent, 2141 rows=rows, 2142 size=size, 2143 seed=seed, 2144 ) 2145 2146 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2147 return list(iter(self._parse_pivot, None)) 2148 2149 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2150 index = self._index 2151 2152 if self._match(TokenType.PIVOT): 2153 unpivot = False 2154 elif self._match(TokenType.UNPIVOT): 2155 unpivot = True 2156 else: 2157 return None 2158 2159 expressions = [] 2160 field = None 2161 2162 if not self._match(TokenType.L_PAREN): 2163 self._retreat(index) 2164 return None 2165 2166 if unpivot: 2167 expressions = self._parse_csv(self._parse_column) 2168 else: 2169 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2170 2171 if not self._match(TokenType.FOR): 2172 self.raise_error("Expecting FOR") 2173 2174 value = self._parse_column() 2175 2176 if not self._match(TokenType.IN): 2177 self.raise_error("Expecting IN") 2178 2179 field = self._parse_in(value) 2180 2181 self._match_r_paren() 2182 2183 return self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2184 2185 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2186 if not skip_where_token and not self._match(TokenType.WHERE): 2187 return None 2188 2189 return self.expression( 2190 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2191 ) 2192 2193 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2194 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2195 return None 2196 2197 elements = defaultdict(list) 2198 2199 while True: 2200 expressions = self._parse_csv(self._parse_conjunction) 2201 if expressions: 2202 elements["expressions"].extend(expressions) 2203 2204 grouping_sets = self._parse_grouping_sets() 2205 if grouping_sets: 2206 elements["grouping_sets"].extend(grouping_sets) 2207 2208 rollup = None 2209 cube = None 2210 2211 with_ = self._match(TokenType.WITH) 2212 if self._match(TokenType.ROLLUP): 2213 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2214 elements["rollup"].extend(ensure_list(rollup)) 2215 2216 if self._match(TokenType.CUBE): 2217 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2218 elements["cube"].extend(ensure_list(cube)) 2219 2220 if not (expressions or grouping_sets or rollup or cube): 2221 break 2222 2223 return self.expression(exp.Group, **elements) # type: ignore 2224 2225 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2226 if not self._match(TokenType.GROUPING_SETS): 2227 return None 2228 2229 return self._parse_wrapped_csv(self._parse_grouping_set) 2230 2231 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2232 if self._match(TokenType.L_PAREN): 2233 grouping_set = self._parse_csv(self._parse_column) 2234 self._match_r_paren() 2235 return self.expression(exp.Tuple, expressions=grouping_set) 2236 2237 return self._parse_column() 2238 2239 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2240 if not skip_having_token and not self._match(TokenType.HAVING): 2241 return None 2242 return self.expression(exp.Having, this=self._parse_conjunction()) 2243 2244 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2245 if not self._match(TokenType.QUALIFY): 2246 return None 2247 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2248 2249 def _parse_order( 2250 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2251 ) -> t.Optional[exp.Expression]: 2252 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2253 return this 2254 2255 return self.expression( 2256 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2257 ) 2258 2259 def _parse_sort( 2260 self, token_type: TokenType, exp_class: t.Type[exp.Expression] 2261 ) -> t.Optional[exp.Expression]: 2262 if not self._match(token_type): 2263 return None 2264 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2265 2266 def _parse_ordered(self) -> exp.Expression: 2267 this = self._parse_conjunction() 2268 self._match(TokenType.ASC) 2269 is_desc = self._match(TokenType.DESC) 2270 is_nulls_first = self._match(TokenType.NULLS_FIRST) 2271 is_nulls_last = self._match(TokenType.NULLS_LAST) 2272 desc = is_desc or False 2273 asc = not desc 2274 nulls_first = is_nulls_first or False 2275 explicitly_null_ordered = is_nulls_first or is_nulls_last 2276 if ( 2277 not explicitly_null_ordered 2278 and ( 2279 (asc and self.null_ordering == "nulls_are_small") 2280 or (desc and self.null_ordering != "nulls_are_small") 2281 ) 2282 and self.null_ordering != "nulls_are_last" 2283 ): 2284 nulls_first = True 2285 2286 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2287 2288 def _parse_limit( 2289 self, this: t.Optional[exp.Expression] = None, top: bool = False 2290 ) -> t.Optional[exp.Expression]: 2291 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2292 limit_paren = self._match(TokenType.L_PAREN) 2293 limit_exp = self.expression( 2294 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2295 ) 2296 2297 if limit_paren: 2298 self._match_r_paren() 2299 2300 return limit_exp 2301 2302 if self._match(TokenType.FETCH): 2303 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2304 direction = self._prev.text if direction else "FIRST" 2305 count = self._parse_number() 2306 self._match_set((TokenType.ROW, TokenType.ROWS)) 2307 self._match(TokenType.ONLY) 2308 return self.expression(exp.Fetch, direction=direction, count=count) 2309 2310 return this 2311 2312 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2313 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2314 return this 2315 2316 count = self._parse_number() 2317 self._match_set((TokenType.ROW, TokenType.ROWS)) 2318 return self.expression(exp.Offset, this=this, expression=count) 2319 2320 def _parse_lock(self) -> t.Optional[exp.Expression]: 2321 if self._match_text_seq("FOR", "UPDATE"): 2322 return self.expression(exp.Lock, update=True) 2323 if self._match_text_seq("FOR", "SHARE"): 2324 return self.expression(exp.Lock, update=False) 2325 2326 return None 2327 2328 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2329 if not self._match_set(self.SET_OPERATIONS): 2330 return this 2331 2332 token_type = self._prev.token_type 2333 2334 if token_type == TokenType.UNION: 2335 expression = exp.Union 2336 elif token_type == TokenType.EXCEPT: 2337 expression = exp.Except 2338 else: 2339 expression = exp.Intersect 2340 2341 return self.expression( 2342 expression, 2343 this=this, 2344 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2345 expression=self._parse_set_operations(self._parse_select(nested=True)), 2346 ) 2347 2348 def _parse_expression(self) -> t.Optional[exp.Expression]: 2349 return self._parse_alias(self._parse_conjunction()) 2350 2351 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2352 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2353 2354 def _parse_equality(self) -> t.Optional[exp.Expression]: 2355 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2356 2357 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2358 return self._parse_tokens(self._parse_range, self.COMPARISON) 2359 2360 def _parse_range(self) -> t.Optional[exp.Expression]: 2361 this = self._parse_bitwise() 2362 negate = self._match(TokenType.NOT) 2363 2364 if self._match_set(self.RANGE_PARSERS): 2365 this = self.RANGE_PARSERS[self._prev.token_type](self, this) 2366 elif self._match(TokenType.ISNULL): 2367 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2368 2369 # Postgres supports ISNULL and NOTNULL for conditions. 2370 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2371 if self._match(TokenType.NOTNULL): 2372 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2373 this = self.expression(exp.Not, this=this) 2374 2375 if negate: 2376 this = self.expression(exp.Not, this=this) 2377 2378 if self._match(TokenType.IS): 2379 this = self._parse_is(this) 2380 2381 return this 2382 2383 def _parse_is(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2384 negate = self._match(TokenType.NOT) 2385 if self._match(TokenType.DISTINCT_FROM): 2386 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2387 return self.expression(klass, this=this, expression=self._parse_expression()) 2388 2389 this = self.expression( 2390 exp.Is, 2391 this=this, 2392 expression=self._parse_null() or self._parse_boolean(), 2393 ) 2394 return self.expression(exp.Not, this=this) if negate else this 2395 2396 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2397 unnest = self._parse_unnest() 2398 if unnest: 2399 this = self.expression(exp.In, this=this, unnest=unnest) 2400 elif self._match(TokenType.L_PAREN): 2401 expressions = self._parse_csv(self._parse_select_or_expression) 2402 2403 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2404 this = self.expression(exp.In, this=this, query=expressions[0]) 2405 else: 2406 this = self.expression(exp.In, this=this, expressions=expressions) 2407 2408 self._match_r_paren() 2409 else: 2410 this = self.expression(exp.In, this=this, field=self._parse_field()) 2411 2412 return this 2413 2414 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2415 low = self._parse_bitwise() 2416 self._match(TokenType.AND) 2417 high = self._parse_bitwise() 2418 return self.expression(exp.Between, this=this, low=low, high=high) 2419 2420 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2421 if not self._match(TokenType.ESCAPE): 2422 return this 2423 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2424 2425 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2426 this = self._parse_term() 2427 2428 while True: 2429 if self._match_set(self.BITWISE): 2430 this = self.expression( 2431 self.BITWISE[self._prev.token_type], 2432 this=this, 2433 expression=self._parse_term(), 2434 ) 2435 elif self._match_pair(TokenType.LT, TokenType.LT): 2436 this = self.expression( 2437 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2438 ) 2439 elif self._match_pair(TokenType.GT, TokenType.GT): 2440 this = self.expression( 2441 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2442 ) 2443 else: 2444 break 2445 2446 return this 2447 2448 def _parse_term(self) -> t.Optional[exp.Expression]: 2449 return self._parse_tokens(self._parse_factor, self.TERM) 2450 2451 def _parse_factor(self) -> t.Optional[exp.Expression]: 2452 return self._parse_tokens(self._parse_unary, self.FACTOR) 2453 2454 def _parse_unary(self) -> t.Optional[exp.Expression]: 2455 if self._match_set(self.UNARY_PARSERS): 2456 return self.UNARY_PARSERS[self._prev.token_type](self) 2457 return self._parse_at_time_zone(self._parse_type()) 2458 2459 def _parse_type(self) -> t.Optional[exp.Expression]: 2460 if self._match(TokenType.INTERVAL): 2461 return self.expression(exp.Interval, this=self._parse_term(), unit=self._parse_var()) 2462 2463 index = self._index 2464 type_token = self._parse_types(check_func=True) 2465 this = self._parse_column() 2466 2467 if type_token: 2468 if this and not isinstance(this, exp.Star): 2469 return self.expression(exp.Cast, this=this, to=type_token) 2470 if not type_token.args.get("expressions"): 2471 self._retreat(index) 2472 return self._parse_column() 2473 return type_token 2474 2475 return this 2476 2477 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2478 index = self._index 2479 2480 prefix = self._match_text_seq("SYSUDTLIB", ".") 2481 2482 if not self._match_set(self.TYPE_TOKENS): 2483 return None 2484 2485 type_token = self._prev.token_type 2486 2487 if type_token == TokenType.PSEUDO_TYPE: 2488 return self.expression(exp.PseudoType, this=self._prev.text) 2489 2490 nested = type_token in self.NESTED_TYPE_TOKENS 2491 is_struct = type_token == TokenType.STRUCT 2492 expressions = None 2493 maybe_func = False 2494 2495 if self._match(TokenType.L_PAREN): 2496 if is_struct: 2497 expressions = self._parse_csv(self._parse_struct_kwargs) 2498 elif nested: 2499 expressions = self._parse_csv(self._parse_types) 2500 else: 2501 expressions = self._parse_csv(self._parse_conjunction) 2502 2503 if not expressions: 2504 self._retreat(index) 2505 return None 2506 2507 self._match_r_paren() 2508 maybe_func = True 2509 2510 if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2511 this = exp.DataType( 2512 this=exp.DataType.Type.ARRAY, 2513 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2514 nested=True, 2515 ) 2516 2517 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2518 this = exp.DataType( 2519 this=exp.DataType.Type.ARRAY, 2520 expressions=[this], 2521 nested=True, 2522 ) 2523 2524 return this 2525 2526 if self._match(TokenType.L_BRACKET): 2527 self._retreat(index) 2528 return None 2529 2530 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2531 if nested and self._match(TokenType.LT): 2532 if is_struct: 2533 expressions = self._parse_csv(self._parse_struct_kwargs) 2534 else: 2535 expressions = self._parse_csv(self._parse_types) 2536 2537 if not self._match(TokenType.GT): 2538 self.raise_error("Expecting >") 2539 2540 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2541 values = self._parse_csv(self._parse_conjunction) 2542 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2543 2544 value: t.Optional[exp.Expression] = None 2545 if type_token in self.TIMESTAMPS: 2546 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: 2547 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2548 elif ( 2549 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ 2550 ): 2551 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2552 elif self._match(TokenType.WITHOUT_TIME_ZONE): 2553 if type_token == TokenType.TIME: 2554 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2555 else: 2556 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2557 2558 maybe_func = maybe_func and value is None 2559 2560 if value is None: 2561 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2562 elif type_token == TokenType.INTERVAL: 2563 value = self.expression(exp.Interval, unit=self._parse_var()) 2564 2565 if maybe_func and check_func: 2566 index2 = self._index 2567 peek = self._parse_string() 2568 2569 if not peek: 2570 self._retreat(index) 2571 return None 2572 2573 self._retreat(index2) 2574 2575 if value: 2576 return value 2577 2578 return exp.DataType( 2579 this=exp.DataType.Type[type_token.value.upper()], 2580 expressions=expressions, 2581 nested=nested, 2582 values=values, 2583 prefix=prefix, 2584 ) 2585 2586 def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]: 2587 if self._curr and self._curr.token_type in self.TYPE_TOKENS: 2588 return self._parse_types() 2589 2590 this = self._parse_id_var() 2591 self._match(TokenType.COLON) 2592 data_type = self._parse_types() 2593 2594 if not data_type: 2595 return None 2596 return self.expression(exp.StructKwarg, this=this, expression=data_type) 2597 2598 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2599 if not self._match(TokenType.AT_TIME_ZONE): 2600 return this 2601 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 2602 2603 def _parse_column(self) -> t.Optional[exp.Expression]: 2604 this = self._parse_field() 2605 if isinstance(this, exp.Identifier): 2606 this = self.expression(exp.Column, this=this) 2607 elif not this: 2608 return self._parse_bracket(this) 2609 this = self._parse_bracket(this) 2610 2611 while self._match_set(self.COLUMN_OPERATORS): 2612 op_token = self._prev.token_type 2613 op = self.COLUMN_OPERATORS.get(op_token) 2614 2615 if op_token == TokenType.DCOLON: 2616 field = self._parse_types() 2617 if not field: 2618 self.raise_error("Expected type") 2619 elif op: 2620 self._advance() 2621 value = self._prev.text 2622 field = ( 2623 exp.Literal.number(value) 2624 if self._prev.token_type == TokenType.NUMBER 2625 else exp.Literal.string(value) 2626 ) 2627 else: 2628 field = self._parse_star() or self._parse_function() or self._parse_id_var() 2629 2630 if isinstance(field, exp.Func): 2631 # bigquery allows function calls like x.y.count(...) 2632 # SAFE.SUBSTR(...) 2633 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 2634 this = self._replace_columns_with_dots(this) 2635 2636 if op: 2637 this = op(self, this, field) 2638 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 2639 this = self.expression( 2640 exp.Column, 2641 this=field, 2642 table=this.this, 2643 db=this.args.get("table"), 2644 catalog=this.args.get("db"), 2645 ) 2646 else: 2647 this = self.expression(exp.Dot, this=this, expression=field) 2648 this = self._parse_bracket(this) 2649 2650 return this 2651 2652 def _parse_primary(self) -> t.Optional[exp.Expression]: 2653 if self._match_set(self.PRIMARY_PARSERS): 2654 token_type = self._prev.token_type 2655 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 2656 2657 if token_type == TokenType.STRING: 2658 expressions = [primary] 2659 while self._match(TokenType.STRING): 2660 expressions.append(exp.Literal.string(self._prev.text)) 2661 if len(expressions) > 1: 2662 return self.expression(exp.Concat, expressions=expressions) 2663 return primary 2664 2665 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 2666 return exp.Literal.number(f"0.{self._prev.text}") 2667 2668 if self._match(TokenType.L_PAREN): 2669 comments = self._prev_comments 2670 query = self._parse_select() 2671 2672 if query: 2673 expressions = [query] 2674 else: 2675 expressions = self._parse_csv( 2676 lambda: self._parse_alias(self._parse_conjunction(), explicit=True) 2677 ) 2678 2679 this = seq_get(expressions, 0) 2680 self._parse_query_modifiers(this) 2681 self._match_r_paren() 2682 2683 if isinstance(this, exp.Subqueryable): 2684 this = self._parse_set_operations( 2685 self._parse_subquery(this=this, parse_alias=False) 2686 ) 2687 elif len(expressions) > 1: 2688 this = self.expression(exp.Tuple, expressions=expressions) 2689 else: 2690 this = self.expression(exp.Paren, this=this) 2691 2692 if this and comments: 2693 this.comments = comments 2694 2695 return this 2696 2697 return None 2698 2699 def _parse_field(self, any_token: bool = False) -> t.Optional[exp.Expression]: 2700 return self._parse_primary() or self._parse_function() or self._parse_id_var(any_token) 2701 2702 def _parse_function( 2703 self, functions: t.Optional[t.Dict[str, t.Callable]] = None 2704 ) -> t.Optional[exp.Expression]: 2705 if not self._curr: 2706 return None 2707 2708 token_type = self._curr.token_type 2709 2710 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 2711 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 2712 2713 if not self._next or self._next.token_type != TokenType.L_PAREN: 2714 if token_type in self.NO_PAREN_FUNCTIONS: 2715 self._advance() 2716 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 2717 2718 return None 2719 2720 if token_type not in self.FUNC_TOKENS: 2721 return None 2722 2723 this = self._curr.text 2724 upper = this.upper() 2725 self._advance(2) 2726 2727 parser = self.FUNCTION_PARSERS.get(upper) 2728 2729 if parser: 2730 this = parser(self) 2731 else: 2732 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 2733 2734 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 2735 this = self.expression(subquery_predicate, this=self._parse_select()) 2736 self._match_r_paren() 2737 return this 2738 2739 if functions is None: 2740 functions = self.FUNCTIONS 2741 2742 function = functions.get(upper) 2743 args = self._parse_csv(self._parse_lambda) 2744 2745 if function: 2746 # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the 2747 # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists. 2748 if count_params(function) == 2: 2749 params = None 2750 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 2751 params = self._parse_csv(self._parse_lambda) 2752 2753 this = function(args, params) 2754 else: 2755 this = function(args) 2756 2757 self.validate_expression(this, args) 2758 else: 2759 this = self.expression(exp.Anonymous, this=this, expressions=args) 2760 2761 self._match_r_paren(this) 2762 return self._parse_window(this) 2763 2764 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 2765 return self._parse_column_def(self._parse_id_var()) 2766 2767 def _parse_user_defined_function( 2768 self, kind: t.Optional[TokenType] = None 2769 ) -> t.Optional[exp.Expression]: 2770 this = self._parse_id_var() 2771 2772 while self._match(TokenType.DOT): 2773 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 2774 2775 if not self._match(TokenType.L_PAREN): 2776 return this 2777 2778 expressions = self._parse_csv(self._parse_function_parameter) 2779 self._match_r_paren() 2780 return self.expression( 2781 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 2782 ) 2783 2784 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 2785 literal = self._parse_primary() 2786 if literal: 2787 return self.expression(exp.Introducer, this=token.text, expression=literal) 2788 2789 return self.expression(exp.Identifier, this=token.text) 2790 2791 def _parse_national(self, token: Token) -> exp.Expression: 2792 return self.expression(exp.National, this=exp.Literal.string(token.text)) 2793 2794 def _parse_session_parameter(self) -> exp.Expression: 2795 kind = None 2796 this = self._parse_id_var() or self._parse_primary() 2797 2798 if this and self._match(TokenType.DOT): 2799 kind = this.name 2800 this = self._parse_var() or self._parse_primary() 2801 2802 return self.expression(exp.SessionParameter, this=this, kind=kind) 2803 2804 def _parse_lambda(self) -> t.Optional[exp.Expression]: 2805 index = self._index 2806 2807 if self._match(TokenType.L_PAREN): 2808 expressions = self._parse_csv(self._parse_id_var) 2809 2810 if not self._match(TokenType.R_PAREN): 2811 self._retreat(index) 2812 else: 2813 expressions = [self._parse_id_var()] 2814 2815 if self._match_set(self.LAMBDAS): 2816 return self.LAMBDAS[self._prev.token_type](self, expressions) 2817 2818 self._retreat(index) 2819 2820 this: t.Optional[exp.Expression] 2821 2822 if self._match(TokenType.DISTINCT): 2823 this = self.expression( 2824 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 2825 ) 2826 else: 2827 this = self._parse_select_or_expression() 2828 2829 if self._match(TokenType.IGNORE_NULLS): 2830 this = self.expression(exp.IgnoreNulls, this=this) 2831 else: 2832 self._match(TokenType.RESPECT_NULLS) 2833 2834 return self._parse_limit(self._parse_order(this)) 2835 2836 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2837 index = self._index 2838 if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT): 2839 self._retreat(index) 2840 return this 2841 2842 args = self._parse_csv( 2843 lambda: self._parse_constraint() 2844 or self._parse_column_def(self._parse_field(any_token=True)) 2845 ) 2846 self._match_r_paren() 2847 return self.expression(exp.Schema, this=this, expressions=args) 2848 2849 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2850 kind = self._parse_types() 2851 2852 if self._match_text_seq("FOR", "ORDINALITY"): 2853 return self.expression(exp.ColumnDef, this=this, ordinality=True) 2854 2855 constraints = [] 2856 while True: 2857 constraint = self._parse_column_constraint() 2858 if not constraint: 2859 break 2860 constraints.append(constraint) 2861 2862 if not kind and not constraints: 2863 return this 2864 2865 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 2866 2867 def _parse_auto_increment(self) -> exp.Expression: 2868 start = None 2869 increment = None 2870 2871 if self._match(TokenType.L_PAREN, advance=False): 2872 args = self._parse_wrapped_csv(self._parse_bitwise) 2873 start = seq_get(args, 0) 2874 increment = seq_get(args, 1) 2875 elif self._match_text_seq("START"): 2876 start = self._parse_bitwise() 2877 self._match_text_seq("INCREMENT") 2878 increment = self._parse_bitwise() 2879 2880 if start and increment: 2881 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 2882 2883 return exp.AutoIncrementColumnConstraint() 2884 2885 def _parse_generated_as_identity(self) -> exp.Expression: 2886 if self._match(TokenType.BY_DEFAULT): 2887 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False) 2888 else: 2889 self._match_text_seq("ALWAYS") 2890 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 2891 2892 self._match_text_seq("AS", "IDENTITY") 2893 if self._match(TokenType.L_PAREN): 2894 if self._match_text_seq("START", "WITH"): 2895 this.set("start", self._parse_bitwise()) 2896 if self._match_text_seq("INCREMENT", "BY"): 2897 this.set("increment", self._parse_bitwise()) 2898 if self._match_text_seq("MINVALUE"): 2899 this.set("minvalue", self._parse_bitwise()) 2900 if self._match_text_seq("MAXVALUE"): 2901 this.set("maxvalue", self._parse_bitwise()) 2902 2903 if self._match_text_seq("CYCLE"): 2904 this.set("cycle", True) 2905 elif self._match_text_seq("NO", "CYCLE"): 2906 this.set("cycle", False) 2907 2908 self._match_r_paren() 2909 2910 return this 2911 2912 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 2913 if self._match_text_seq("NULL"): 2914 return self.expression(exp.NotNullColumnConstraint) 2915 if self._match_text_seq("CASESPECIFIC"): 2916 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 2917 return None 2918 2919 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 2920 this = self._parse_references() 2921 if this: 2922 return this 2923 2924 if self._match(TokenType.CONSTRAINT): 2925 this = self._parse_id_var() 2926 2927 if self._match_texts(self.CONSTRAINT_PARSERS): 2928 return self.expression( 2929 exp.ColumnConstraint, 2930 this=this, 2931 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 2932 ) 2933 2934 return this 2935 2936 def _parse_constraint(self) -> t.Optional[exp.Expression]: 2937 if not self._match(TokenType.CONSTRAINT): 2938 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 2939 2940 this = self._parse_id_var() 2941 expressions = [] 2942 2943 while True: 2944 constraint = self._parse_unnamed_constraint() or self._parse_function() 2945 if not constraint: 2946 break 2947 expressions.append(constraint) 2948 2949 return self.expression(exp.Constraint, this=this, expressions=expressions) 2950 2951 def _parse_unnamed_constraint( 2952 self, constraints: t.Optional[t.Collection[str]] = None 2953 ) -> t.Optional[exp.Expression]: 2954 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 2955 return None 2956 2957 constraint = self._prev.text.upper() 2958 if constraint not in self.CONSTRAINT_PARSERS: 2959 self.raise_error(f"No parser found for schema constraint {constraint}.") 2960 2961 return self.CONSTRAINT_PARSERS[constraint](self) 2962 2963 def _parse_unique(self) -> exp.Expression: 2964 if not self._match(TokenType.L_PAREN, advance=False): 2965 return self.expression(exp.UniqueColumnConstraint) 2966 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 2967 2968 def _parse_key_constraint_options(self) -> t.List[str]: 2969 options = [] 2970 while True: 2971 if not self._curr: 2972 break 2973 2974 if self._match(TokenType.ON): 2975 action = None 2976 on = self._advance_any() and self._prev.text 2977 2978 if self._match(TokenType.NO_ACTION): 2979 action = "NO ACTION" 2980 elif self._match(TokenType.CASCADE): 2981 action = "CASCADE" 2982 elif self._match_pair(TokenType.SET, TokenType.NULL): 2983 action = "SET NULL" 2984 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 2985 action = "SET DEFAULT" 2986 else: 2987 self.raise_error("Invalid key constraint") 2988 2989 options.append(f"ON {on} {action}") 2990 elif self._match_text_seq("NOT", "ENFORCED"): 2991 options.append("NOT ENFORCED") 2992 elif self._match_text_seq("DEFERRABLE"): 2993 options.append("DEFERRABLE") 2994 elif self._match_text_seq("INITIALLY", "DEFERRED"): 2995 options.append("INITIALLY DEFERRED") 2996 elif self._match_text_seq("NORELY"): 2997 options.append("NORELY") 2998 elif self._match_text_seq("MATCH", "FULL"): 2999 options.append("MATCH FULL") 3000 else: 3001 break 3002 3003 return options 3004 3005 def _parse_references(self) -> t.Optional[exp.Expression]: 3006 if not self._match(TokenType.REFERENCES): 3007 return None 3008 3009 expressions = None 3010 this = self._parse_id_var() 3011 3012 if self._match(TokenType.L_PAREN, advance=False): 3013 expressions = self._parse_wrapped_id_vars() 3014 3015 options = self._parse_key_constraint_options() 3016 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3017 3018 def _parse_foreign_key(self) -> exp.Expression: 3019 expressions = self._parse_wrapped_id_vars() 3020 reference = self._parse_references() 3021 options = {} 3022 3023 while self._match(TokenType.ON): 3024 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3025 self.raise_error("Expected DELETE or UPDATE") 3026 3027 kind = self._prev.text.lower() 3028 3029 if self._match(TokenType.NO_ACTION): 3030 action = "NO ACTION" 3031 elif self._match(TokenType.SET): 3032 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3033 action = "SET " + self._prev.text.upper() 3034 else: 3035 self._advance() 3036 action = self._prev.text.upper() 3037 3038 options[kind] = action 3039 3040 return self.expression( 3041 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3042 ) 3043 3044 def _parse_primary_key(self) -> exp.Expression: 3045 desc = ( 3046 self._match_set((TokenType.ASC, TokenType.DESC)) 3047 and self._prev.token_type == TokenType.DESC 3048 ) 3049 3050 if not self._match(TokenType.L_PAREN, advance=False): 3051 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3052 3053 expressions = self._parse_wrapped_id_vars() 3054 options = self._parse_key_constraint_options() 3055 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3056 3057 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3058 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3059 return this 3060 3061 bracket_kind = self._prev.token_type 3062 expressions: t.List[t.Optional[exp.Expression]] 3063 3064 if self._match(TokenType.COLON): 3065 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3066 else: 3067 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3068 3069 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3070 if bracket_kind == TokenType.L_BRACE: 3071 this = self.expression(exp.Struct, expressions=expressions) 3072 elif not this or this.name.upper() == "ARRAY": 3073 this = self.expression(exp.Array, expressions=expressions) 3074 else: 3075 expressions = apply_index_offset(expressions, -self.index_offset) 3076 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3077 3078 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3079 self.raise_error("Expected ]") 3080 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3081 self.raise_error("Expected }") 3082 3083 this.comments = self._prev_comments 3084 return self._parse_bracket(this) 3085 3086 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3087 if self._match(TokenType.COLON): 3088 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3089 return this 3090 3091 def _parse_case(self) -> t.Optional[exp.Expression]: 3092 ifs = [] 3093 default = None 3094 3095 expression = self._parse_conjunction() 3096 3097 while self._match(TokenType.WHEN): 3098 this = self._parse_conjunction() 3099 self._match(TokenType.THEN) 3100 then = self._parse_conjunction() 3101 ifs.append(self.expression(exp.If, this=this, true=then)) 3102 3103 if self._match(TokenType.ELSE): 3104 default = self._parse_conjunction() 3105 3106 if not self._match(TokenType.END): 3107 self.raise_error("Expected END after CASE", self._prev) 3108 3109 return self._parse_window( 3110 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3111 ) 3112 3113 def _parse_if(self) -> t.Optional[exp.Expression]: 3114 if self._match(TokenType.L_PAREN): 3115 args = self._parse_csv(self._parse_conjunction) 3116 this = exp.If.from_arg_list(args) 3117 self.validate_expression(this, args) 3118 self._match_r_paren() 3119 else: 3120 condition = self._parse_conjunction() 3121 self._match(TokenType.THEN) 3122 true = self._parse_conjunction() 3123 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3124 self._match(TokenType.END) 3125 this = self.expression(exp.If, this=condition, true=true, false=false) 3126 3127 return self._parse_window(this) 3128 3129 def _parse_extract(self) -> exp.Expression: 3130 this = self._parse_function() or self._parse_var() or self._parse_type() 3131 3132 if self._match(TokenType.FROM): 3133 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3134 3135 if not self._match(TokenType.COMMA): 3136 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3137 3138 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3139 3140 def _parse_cast(self, strict: bool) -> exp.Expression: 3141 this = self._parse_conjunction() 3142 3143 if not self._match(TokenType.ALIAS): 3144 self.raise_error("Expected AS after CAST") 3145 3146 to = self._parse_types() 3147 3148 if not to: 3149 self.raise_error("Expected TYPE after CAST") 3150 elif to.this == exp.DataType.Type.CHAR: 3151 if self._match(TokenType.CHARACTER_SET): 3152 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3153 3154 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3155 3156 def _parse_string_agg(self) -> exp.Expression: 3157 expression: t.Optional[exp.Expression] 3158 3159 if self._match(TokenType.DISTINCT): 3160 args = self._parse_csv(self._parse_conjunction) 3161 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3162 else: 3163 args = self._parse_csv(self._parse_conjunction) 3164 expression = seq_get(args, 0) 3165 3166 index = self._index 3167 if not self._match(TokenType.R_PAREN): 3168 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3169 order = self._parse_order(this=expression) 3170 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3171 3172 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3173 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3174 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3175 if not self._match(TokenType.WITHIN_GROUP): 3176 self._retreat(index) 3177 this = exp.GroupConcat.from_arg_list(args) 3178 self.validate_expression(this, args) 3179 return this 3180 3181 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3182 order = self._parse_order(this=expression) 3183 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3184 3185 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3186 to: t.Optional[exp.Expression] 3187 this = self._parse_column() 3188 3189 if self._match(TokenType.USING): 3190 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3191 elif self._match(TokenType.COMMA): 3192 to = self._parse_types() 3193 else: 3194 to = None 3195 3196 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3197 3198 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3199 args = self._parse_csv(self._parse_bitwise) 3200 3201 if self._match(TokenType.IN): 3202 return self.expression( 3203 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3204 ) 3205 3206 if haystack_first: 3207 haystack = seq_get(args, 0) 3208 needle = seq_get(args, 1) 3209 else: 3210 needle = seq_get(args, 0) 3211 haystack = seq_get(args, 1) 3212 3213 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3214 3215 self.validate_expression(this, args) 3216 3217 return this 3218 3219 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3220 args = self._parse_csv(self._parse_table) 3221 return exp.JoinHint(this=func_name.upper(), expressions=args) 3222 3223 def _parse_substring(self) -> exp.Expression: 3224 # Postgres supports the form: substring(string [from int] [for int]) 3225 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3226 3227 args = self._parse_csv(self._parse_bitwise) 3228 3229 if self._match(TokenType.FROM): 3230 args.append(self._parse_bitwise()) 3231 if self._match(TokenType.FOR): 3232 args.append(self._parse_bitwise()) 3233 3234 this = exp.Substring.from_arg_list(args) 3235 self.validate_expression(this, args) 3236 3237 return this 3238 3239 def _parse_trim(self) -> exp.Expression: 3240 # https://www.w3resource.com/sql/character-functions/trim.php 3241 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3242 3243 position = None 3244 collation = None 3245 3246 if self._match_set(self.TRIM_TYPES): 3247 position = self._prev.text.upper() 3248 3249 expression = self._parse_term() 3250 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3251 this = self._parse_term() 3252 else: 3253 this = expression 3254 expression = None 3255 3256 if self._match(TokenType.COLLATE): 3257 collation = self._parse_term() 3258 3259 return self.expression( 3260 exp.Trim, 3261 this=this, 3262 position=position, 3263 expression=expression, 3264 collation=collation, 3265 ) 3266 3267 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3268 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3269 3270 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3271 return self._parse_window(self._parse_id_var(), alias=True) 3272 3273 def _parse_window( 3274 self, this: t.Optional[exp.Expression], alias: bool = False 3275 ) -> t.Optional[exp.Expression]: 3276 if self._match(TokenType.FILTER): 3277 where = self._parse_wrapped(self._parse_where) 3278 this = self.expression(exp.Filter, this=this, expression=where) 3279 3280 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3281 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3282 if self._match(TokenType.WITHIN_GROUP): 3283 order = self._parse_wrapped(self._parse_order) 3284 this = self.expression(exp.WithinGroup, this=this, expression=order) 3285 3286 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3287 # Some dialects choose to implement and some do not. 3288 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3289 3290 # There is some code above in _parse_lambda that handles 3291 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3292 3293 # The below changes handle 3294 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3295 3296 # Oracle allows both formats 3297 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3298 # and Snowflake chose to do the same for familiarity 3299 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3300 if self._match(TokenType.IGNORE_NULLS): 3301 this = self.expression(exp.IgnoreNulls, this=this) 3302 elif self._match(TokenType.RESPECT_NULLS): 3303 this = self.expression(exp.RespectNulls, this=this) 3304 3305 # bigquery select from window x AS (partition by ...) 3306 if alias: 3307 self._match(TokenType.ALIAS) 3308 elif not self._match(TokenType.OVER): 3309 return this 3310 3311 if not self._match(TokenType.L_PAREN): 3312 return self.expression(exp.Window, this=this, alias=self._parse_id_var(False)) 3313 3314 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3315 partition = self._parse_partition_by() 3316 order = self._parse_order() 3317 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3318 3319 if kind: 3320 self._match(TokenType.BETWEEN) 3321 start = self._parse_window_spec() 3322 self._match(TokenType.AND) 3323 end = self._parse_window_spec() 3324 3325 spec = self.expression( 3326 exp.WindowSpec, 3327 kind=kind, 3328 start=start["value"], 3329 start_side=start["side"], 3330 end=end["value"], 3331 end_side=end["side"], 3332 ) 3333 else: 3334 spec = None 3335 3336 self._match_r_paren() 3337 3338 return self.expression( 3339 exp.Window, 3340 this=this, 3341 partition_by=partition, 3342 order=order, 3343 spec=spec, 3344 alias=window_alias, 3345 ) 3346 3347 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3348 self._match(TokenType.BETWEEN) 3349 3350 return { 3351 "value": ( 3352 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text 3353 ) 3354 or self._parse_bitwise(), 3355 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, 3356 } 3357 3358 def _parse_alias( 3359 self, this: t.Optional[exp.Expression], explicit: bool = False 3360 ) -> t.Optional[exp.Expression]: 3361 any_token = self._match(TokenType.ALIAS) 3362 3363 if explicit and not any_token: 3364 return this 3365 3366 if self._match(TokenType.L_PAREN): 3367 aliases = self.expression( 3368 exp.Aliases, 3369 this=this, 3370 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3371 ) 3372 self._match_r_paren(aliases) 3373 return aliases 3374 3375 alias = self._parse_id_var(any_token) 3376 3377 if alias: 3378 return self.expression(exp.Alias, this=this, alias=alias) 3379 3380 return this 3381 3382 def _parse_id_var( 3383 self, 3384 any_token: bool = True, 3385 tokens: t.Optional[t.Collection[TokenType]] = None, 3386 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3387 ) -> t.Optional[exp.Expression]: 3388 identifier = self._parse_identifier() 3389 3390 if identifier: 3391 return identifier 3392 3393 prefix = "" 3394 3395 if prefix_tokens: 3396 while self._match_set(prefix_tokens): 3397 prefix += self._prev.text 3398 3399 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3400 quoted = self._prev.token_type == TokenType.STRING 3401 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3402 3403 return None 3404 3405 def _parse_string(self) -> t.Optional[exp.Expression]: 3406 if self._match(TokenType.STRING): 3407 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 3408 return self._parse_placeholder() 3409 3410 def _parse_number(self) -> t.Optional[exp.Expression]: 3411 if self._match(TokenType.NUMBER): 3412 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 3413 return self._parse_placeholder() 3414 3415 def _parse_identifier(self) -> t.Optional[exp.Expression]: 3416 if self._match(TokenType.IDENTIFIER): 3417 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 3418 return self._parse_placeholder() 3419 3420 def _parse_var(self, any_token: bool = False) -> t.Optional[exp.Expression]: 3421 if (any_token and self._advance_any()) or self._match(TokenType.VAR): 3422 return self.expression(exp.Var, this=self._prev.text) 3423 return self._parse_placeholder() 3424 3425 def _advance_any(self) -> t.Optional[Token]: 3426 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 3427 self._advance() 3428 return self._prev 3429 return None 3430 3431 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 3432 return self._parse_var() or self._parse_string() 3433 3434 def _parse_null(self) -> t.Optional[exp.Expression]: 3435 if self._match(TokenType.NULL): 3436 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 3437 return None 3438 3439 def _parse_boolean(self) -> t.Optional[exp.Expression]: 3440 if self._match(TokenType.TRUE): 3441 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 3442 if self._match(TokenType.FALSE): 3443 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 3444 return None 3445 3446 def _parse_star(self) -> t.Optional[exp.Expression]: 3447 if self._match(TokenType.STAR): 3448 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 3449 return None 3450 3451 def _parse_parameter(self) -> exp.Expression: 3452 wrapped = self._match(TokenType.L_BRACE) 3453 this = self._parse_var() or self._parse_primary() 3454 self._match(TokenType.R_BRACE) 3455 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 3456 3457 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 3458 if self._match_set(self.PLACEHOLDER_PARSERS): 3459 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 3460 if placeholder: 3461 return placeholder 3462 self._advance(-1) 3463 return None 3464 3465 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3466 if not self._match(TokenType.EXCEPT): 3467 return None 3468 if self._match(TokenType.L_PAREN, advance=False): 3469 return self._parse_wrapped_csv(self._parse_column) 3470 return self._parse_csv(self._parse_column) 3471 3472 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3473 if not self._match(TokenType.REPLACE): 3474 return None 3475 if self._match(TokenType.L_PAREN, advance=False): 3476 return self._parse_wrapped_csv(self._parse_expression) 3477 return self._parse_csv(self._parse_expression) 3478 3479 def _parse_csv( 3480 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3481 ) -> t.List[t.Optional[exp.Expression]]: 3482 parse_result = parse_method() 3483 items = [parse_result] if parse_result is not None else [] 3484 3485 while self._match(sep): 3486 if parse_result and self._prev_comments: 3487 parse_result.comments = self._prev_comments 3488 3489 parse_result = parse_method() 3490 if parse_result is not None: 3491 items.append(parse_result) 3492 3493 return items 3494 3495 def _parse_tokens( 3496 self, parse_method: t.Callable, expressions: t.Dict 3497 ) -> t.Optional[exp.Expression]: 3498 this = parse_method() 3499 3500 while self._match_set(expressions): 3501 this = self.expression( 3502 expressions[self._prev.token_type], 3503 this=this, 3504 comments=self._prev_comments, 3505 expression=parse_method(), 3506 ) 3507 3508 return this 3509 3510 def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]: 3511 return self._parse_wrapped_csv(self._parse_id_var) 3512 3513 def _parse_wrapped_csv( 3514 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3515 ) -> t.List[t.Optional[exp.Expression]]: 3516 return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep)) 3517 3518 def _parse_wrapped(self, parse_method: t.Callable) -> t.Any: 3519 self._match_l_paren() 3520 parse_result = parse_method() 3521 self._match_r_paren() 3522 return parse_result 3523 3524 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: 3525 return self._parse_select() or self._parse_expression() 3526 3527 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 3528 return self._parse_set_operations( 3529 self._parse_select(nested=True, parse_subquery_alias=False) 3530 ) 3531 3532 def _parse_transaction(self) -> exp.Expression: 3533 this = None 3534 if self._match_texts(self.TRANSACTION_KIND): 3535 this = self._prev.text 3536 3537 self._match_texts({"TRANSACTION", "WORK"}) 3538 3539 modes = [] 3540 while True: 3541 mode = [] 3542 while self._match(TokenType.VAR): 3543 mode.append(self._prev.text) 3544 3545 if mode: 3546 modes.append(" ".join(mode)) 3547 if not self._match(TokenType.COMMA): 3548 break 3549 3550 return self.expression(exp.Transaction, this=this, modes=modes) 3551 3552 def _parse_commit_or_rollback(self) -> exp.Expression: 3553 chain = None 3554 savepoint = None 3555 is_rollback = self._prev.token_type == TokenType.ROLLBACK 3556 3557 self._match_texts({"TRANSACTION", "WORK"}) 3558 3559 if self._match_text_seq("TO"): 3560 self._match_text_seq("SAVEPOINT") 3561 savepoint = self._parse_id_var() 3562 3563 if self._match(TokenType.AND): 3564 chain = not self._match_text_seq("NO") 3565 self._match_text_seq("CHAIN") 3566 3567 if is_rollback: 3568 return self.expression(exp.Rollback, savepoint=savepoint) 3569 return self.expression(exp.Commit, chain=chain) 3570 3571 def _parse_add_column(self) -> t.Optional[exp.Expression]: 3572 if not self._match_text_seq("ADD"): 3573 return None 3574 3575 self._match(TokenType.COLUMN) 3576 exists_column = self._parse_exists(not_=True) 3577 expression = self._parse_column_def(self._parse_field(any_token=True)) 3578 3579 if expression: 3580 expression.set("exists", exists_column) 3581 3582 return expression 3583 3584 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 3585 return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN") 3586 3587 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 3588 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 3589 return self.expression( 3590 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 3591 ) 3592 3593 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 3594 this = None 3595 kind = self._prev.token_type 3596 3597 if kind == TokenType.CONSTRAINT: 3598 this = self._parse_id_var() 3599 3600 if self._match_text_seq("CHECK"): 3601 expression = self._parse_wrapped(self._parse_conjunction) 3602 enforced = self._match_text_seq("ENFORCED") 3603 3604 return self.expression( 3605 exp.AddConstraint, this=this, expression=expression, enforced=enforced 3606 ) 3607 3608 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 3609 expression = self._parse_foreign_key() 3610 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 3611 expression = self._parse_primary_key() 3612 3613 return self.expression(exp.AddConstraint, this=this, expression=expression) 3614 3615 def _parse_alter(self) -> t.Optional[exp.Expression]: 3616 if not self._match(TokenType.TABLE): 3617 return self._parse_as_command(self._prev) 3618 3619 exists = self._parse_exists() 3620 this = self._parse_table(schema=True) 3621 3622 actions: t.Optional[exp.Expression | t.List[t.Optional[exp.Expression]]] = None 3623 3624 index = self._index 3625 if self._match(TokenType.DELETE): 3626 actions = [self.expression(exp.Delete, where=self._parse_where())] 3627 elif self._match_text_seq("ADD"): 3628 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 3629 actions = self._parse_csv(self._parse_add_constraint) 3630 else: 3631 self._retreat(index) 3632 actions = self._parse_csv(self._parse_add_column) 3633 elif self._match_text_seq("DROP"): 3634 partition_exists = self._parse_exists() 3635 3636 if self._match(TokenType.PARTITION, advance=False): 3637 actions = self._parse_csv( 3638 lambda: self._parse_drop_partition(exists=partition_exists) 3639 ) 3640 else: 3641 self._retreat(index) 3642 actions = self._parse_csv(self._parse_drop_column) 3643 elif self._match_text_seq("RENAME", "TO"): 3644 actions = self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 3645 elif self._match_text_seq("ALTER"): 3646 self._match(TokenType.COLUMN) 3647 column = self._parse_field(any_token=True) 3648 3649 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 3650 actions = self.expression(exp.AlterColumn, this=column, drop=True) 3651 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3652 actions = self.expression( 3653 exp.AlterColumn, this=column, default=self._parse_conjunction() 3654 ) 3655 else: 3656 self._match_text_seq("SET", "DATA") 3657 actions = self.expression( 3658 exp.AlterColumn, 3659 this=column, 3660 dtype=self._match_text_seq("TYPE") and self._parse_types(), 3661 collate=self._match(TokenType.COLLATE) and self._parse_term(), 3662 using=self._match(TokenType.USING) and self._parse_conjunction(), 3663 ) 3664 3665 actions = ensure_list(actions) 3666 return self.expression(exp.AlterTable, this=this, exists=exists, actions=actions) 3667 3668 def _parse_show(self) -> t.Optional[exp.Expression]: 3669 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 3670 if parser: 3671 return parser(self) 3672 self._advance() 3673 return self.expression(exp.Show, this=self._prev.text.upper()) 3674 3675 def _default_parse_set_item(self) -> exp.Expression: 3676 return self.expression( 3677 exp.SetItem, 3678 this=self._parse_statement(), 3679 ) 3680 3681 def _parse_set_item(self) -> t.Optional[exp.Expression]: 3682 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 3683 return parser(self) if parser else self._default_parse_set_item() 3684 3685 def _parse_merge(self) -> exp.Expression: 3686 self._match(TokenType.INTO) 3687 target = self._parse_table() 3688 3689 self._match(TokenType.USING) 3690 using = self._parse_table() 3691 3692 self._match(TokenType.ON) 3693 on = self._parse_conjunction() 3694 3695 whens = [] 3696 while self._match(TokenType.WHEN): 3697 this = self._parse_conjunction() 3698 self._match(TokenType.THEN) 3699 3700 if self._match(TokenType.INSERT): 3701 _this = self._parse_star() 3702 if _this: 3703 then = self.expression(exp.Insert, this=_this) 3704 else: 3705 then = self.expression( 3706 exp.Insert, 3707 this=self._parse_value(), 3708 expression=self._match(TokenType.VALUES) and self._parse_value(), 3709 ) 3710 elif self._match(TokenType.UPDATE): 3711 expressions = self._parse_star() 3712 if expressions: 3713 then = self.expression(exp.Update, expressions=expressions) 3714 else: 3715 then = self.expression( 3716 exp.Update, 3717 expressions=self._match(TokenType.SET) 3718 and self._parse_csv(self._parse_equality), 3719 ) 3720 elif self._match(TokenType.DELETE): 3721 then = self.expression(exp.Var, this=self._prev.text) 3722 3723 whens.append(self.expression(exp.When, this=this, then=then)) 3724 3725 return self.expression( 3726 exp.Merge, 3727 this=target, 3728 using=using, 3729 on=on, 3730 expressions=whens, 3731 ) 3732 3733 def _parse_set(self) -> exp.Expression: 3734 return self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 3735 3736 def _parse_as_command(self, start: Token) -> exp.Command: 3737 while self._curr: 3738 self._advance() 3739 return exp.Command(this=self._find_sql(start, self._prev)) 3740 3741 def _find_parser( 3742 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 3743 ) -> t.Optional[t.Callable]: 3744 index = self._index 3745 this = [] 3746 while True: 3747 # The current token might be multiple words 3748 curr = self._curr.text.upper() 3749 key = curr.split(" ") 3750 this.append(curr) 3751 self._advance() 3752 result, trie = in_trie(trie, key) 3753 if result == 0: 3754 break 3755 if result == 2: 3756 subparser = parsers[" ".join(this)] 3757 return subparser 3758 self._retreat(index) 3759 return None 3760 3761 def _match(self, token_type, advance=True): 3762 if not self._curr: 3763 return None 3764 3765 if self._curr.token_type == token_type: 3766 if advance: 3767 self._advance() 3768 return True 3769 3770 return None 3771 3772 def _match_set(self, types): 3773 if not self._curr: 3774 return None 3775 3776 if self._curr.token_type in types: 3777 self._advance() 3778 return True 3779 3780 return None 3781 3782 def _match_pair(self, token_type_a, token_type_b, advance=True): 3783 if not self._curr or not self._next: 3784 return None 3785 3786 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 3787 if advance: 3788 self._advance(2) 3789 return True 3790 3791 return None 3792 3793 def _match_l_paren(self, expression=None): 3794 if not self._match(TokenType.L_PAREN): 3795 self.raise_error("Expecting (") 3796 if expression and self._prev_comments: 3797 expression.comments = self._prev_comments 3798 3799 def _match_r_paren(self, expression=None): 3800 if not self._match(TokenType.R_PAREN): 3801 self.raise_error("Expecting )") 3802 if expression and self._prev_comments: 3803 expression.comments = self._prev_comments 3804 3805 def _match_texts(self, texts): 3806 if self._curr and self._curr.text.upper() in texts: 3807 self._advance() 3808 return True 3809 return False 3810 3811 def _match_text_seq(self, *texts, advance=True): 3812 index = self._index 3813 for text in texts: 3814 if self._curr and self._curr.text.upper() == text: 3815 self._advance() 3816 else: 3817 self._retreat(index) 3818 return False 3819 3820 if not advance: 3821 self._retreat(index) 3822 3823 return True 3824 3825 def _replace_columns_with_dots(self, this): 3826 if isinstance(this, exp.Dot): 3827 exp.replace_children(this, self._replace_columns_with_dots) 3828 elif isinstance(this, exp.Column): 3829 exp.replace_children(this, self._replace_columns_with_dots) 3830 table = this.args.get("table") 3831 this = ( 3832 self.expression(exp.Dot, this=table, expression=this.this) 3833 if table 3834 else self.expression(exp.Var, this=this.name) 3835 ) 3836 elif isinstance(this, exp.Identifier): 3837 this = self.expression(exp.Var, this=this.name) 3838 return this 3839 3840 def _replace_lambda(self, node, lambda_variables): 3841 if isinstance(node, exp.Column): 3842 if node.name in lambda_variables: 3843 return node.this 3844 return node
43class Parser(metaclass=_Parser): 44 """ 45 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 46 a parsed syntax tree. 47 48 Args: 49 error_level: the desired error level. 50 Default: ErrorLevel.RAISE 51 error_message_context: determines the amount of context to capture from a 52 query string when displaying the error message (in number of characters). 53 Default: 50. 54 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 55 Default: 0 56 alias_post_tablesample: If the table alias comes after tablesample. 57 Default: False 58 max_errors: Maximum number of error messages to include in a raised ParseError. 59 This is only relevant if error_level is ErrorLevel.RAISE. 60 Default: 3 61 null_ordering: Indicates the default null ordering method to use if not explicitly set. 62 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 63 Default: "nulls_are_small" 64 """ 65 66 FUNCTIONS: t.Dict[str, t.Callable] = { 67 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 68 "DATE_TO_DATE_STR": lambda args: exp.Cast( 69 this=seq_get(args, 0), 70 to=exp.DataType(this=exp.DataType.Type.TEXT), 71 ), 72 "TIME_TO_TIME_STR": lambda args: exp.Cast( 73 this=seq_get(args, 0), 74 to=exp.DataType(this=exp.DataType.Type.TEXT), 75 ), 76 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 77 this=exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 start=exp.Literal.number(1), 82 length=exp.Literal.number(10), 83 ), 84 "VAR_MAP": parse_var_map, 85 "IFNULL": exp.Coalesce.from_arg_list, 86 } 87 88 NO_PAREN_FUNCTIONS = { 89 TokenType.CURRENT_DATE: exp.CurrentDate, 90 TokenType.CURRENT_DATETIME: exp.CurrentDate, 91 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 92 } 93 94 NESTED_TYPE_TOKENS = { 95 TokenType.ARRAY, 96 TokenType.MAP, 97 TokenType.STRUCT, 98 TokenType.NULLABLE, 99 } 100 101 TYPE_TOKENS = { 102 TokenType.BOOLEAN, 103 TokenType.TINYINT, 104 TokenType.SMALLINT, 105 TokenType.INT, 106 TokenType.BIGINT, 107 TokenType.FLOAT, 108 TokenType.DOUBLE, 109 TokenType.CHAR, 110 TokenType.NCHAR, 111 TokenType.VARCHAR, 112 TokenType.NVARCHAR, 113 TokenType.TEXT, 114 TokenType.MEDIUMTEXT, 115 TokenType.LONGTEXT, 116 TokenType.MEDIUMBLOB, 117 TokenType.LONGBLOB, 118 TokenType.BINARY, 119 TokenType.VARBINARY, 120 TokenType.JSON, 121 TokenType.JSONB, 122 TokenType.INTERVAL, 123 TokenType.TIME, 124 TokenType.TIMESTAMP, 125 TokenType.TIMESTAMPTZ, 126 TokenType.TIMESTAMPLTZ, 127 TokenType.DATETIME, 128 TokenType.DATE, 129 TokenType.DECIMAL, 130 TokenType.UUID, 131 TokenType.GEOGRAPHY, 132 TokenType.GEOMETRY, 133 TokenType.HLLSKETCH, 134 TokenType.HSTORE, 135 TokenType.PSEUDO_TYPE, 136 TokenType.SUPER, 137 TokenType.SERIAL, 138 TokenType.SMALLSERIAL, 139 TokenType.BIGSERIAL, 140 TokenType.XML, 141 TokenType.UNIQUEIDENTIFIER, 142 TokenType.MONEY, 143 TokenType.SMALLMONEY, 144 TokenType.ROWVERSION, 145 TokenType.IMAGE, 146 TokenType.VARIANT, 147 TokenType.OBJECT, 148 *NESTED_TYPE_TOKENS, 149 } 150 151 SUBQUERY_PREDICATES = { 152 TokenType.ANY: exp.Any, 153 TokenType.ALL: exp.All, 154 TokenType.EXISTS: exp.Exists, 155 TokenType.SOME: exp.Any, 156 } 157 158 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 159 160 ID_VAR_TOKENS = { 161 TokenType.VAR, 162 TokenType.ANTI, 163 TokenType.APPLY, 164 TokenType.AUTO_INCREMENT, 165 TokenType.BEGIN, 166 TokenType.BOTH, 167 TokenType.BUCKET, 168 TokenType.CACHE, 169 TokenType.CASCADE, 170 TokenType.COLLATE, 171 TokenType.COLUMN, 172 TokenType.COMMAND, 173 TokenType.COMMIT, 174 TokenType.COMPOUND, 175 TokenType.CONSTRAINT, 176 TokenType.CURRENT_TIME, 177 TokenType.DEFAULT, 178 TokenType.DELETE, 179 TokenType.DESCRIBE, 180 TokenType.DIV, 181 TokenType.END, 182 TokenType.EXECUTE, 183 TokenType.ESCAPE, 184 TokenType.FALSE, 185 TokenType.FIRST, 186 TokenType.FILTER, 187 TokenType.FOLLOWING, 188 TokenType.FORMAT, 189 TokenType.FUNCTION, 190 TokenType.IF, 191 TokenType.INDEX, 192 TokenType.ISNULL, 193 TokenType.INTERVAL, 194 TokenType.LAZY, 195 TokenType.LEADING, 196 TokenType.LEFT, 197 TokenType.LOCAL, 198 TokenType.MATERIALIZED, 199 TokenType.MERGE, 200 TokenType.NATURAL, 201 TokenType.NEXT, 202 TokenType.OFFSET, 203 TokenType.ONLY, 204 TokenType.OPTIONS, 205 TokenType.ORDINALITY, 206 TokenType.PERCENT, 207 TokenType.PIVOT, 208 TokenType.PRECEDING, 209 TokenType.RANGE, 210 TokenType.REFERENCES, 211 TokenType.RIGHT, 212 TokenType.ROW, 213 TokenType.ROWS, 214 TokenType.SCHEMA, 215 TokenType.SEED, 216 TokenType.SEMI, 217 TokenType.SET, 218 TokenType.SHOW, 219 TokenType.SORTKEY, 220 TokenType.TABLE, 221 TokenType.TEMPORARY, 222 TokenType.TOP, 223 TokenType.TRAILING, 224 TokenType.TRUE, 225 TokenType.UNBOUNDED, 226 TokenType.UNIQUE, 227 TokenType.UNLOGGED, 228 TokenType.UNPIVOT, 229 TokenType.PROCEDURE, 230 TokenType.VIEW, 231 TokenType.VOLATILE, 232 TokenType.WINDOW, 233 *SUBQUERY_PREDICATES, 234 *TYPE_TOKENS, 235 *NO_PAREN_FUNCTIONS, 236 } 237 238 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 239 TokenType.APPLY, 240 TokenType.LEFT, 241 TokenType.NATURAL, 242 TokenType.OFFSET, 243 TokenType.RIGHT, 244 TokenType.WINDOW, 245 } 246 247 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 248 249 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} 250 251 FUNC_TOKENS = { 252 TokenType.COMMAND, 253 TokenType.CURRENT_DATE, 254 TokenType.CURRENT_DATETIME, 255 TokenType.CURRENT_TIMESTAMP, 256 TokenType.CURRENT_TIME, 257 TokenType.FILTER, 258 TokenType.FIRST, 259 TokenType.FORMAT, 260 TokenType.IDENTIFIER, 261 TokenType.INDEX, 262 TokenType.ISNULL, 263 TokenType.ILIKE, 264 TokenType.LIKE, 265 TokenType.MERGE, 266 TokenType.OFFSET, 267 TokenType.PRIMARY_KEY, 268 TokenType.REPLACE, 269 TokenType.ROW, 270 TokenType.UNNEST, 271 TokenType.VAR, 272 TokenType.LEFT, 273 TokenType.RIGHT, 274 TokenType.DATE, 275 TokenType.DATETIME, 276 TokenType.TABLE, 277 TokenType.TIMESTAMP, 278 TokenType.TIMESTAMPTZ, 279 TokenType.WINDOW, 280 *TYPE_TOKENS, 281 *SUBQUERY_PREDICATES, 282 } 283 284 CONJUNCTION = { 285 TokenType.AND: exp.And, 286 TokenType.OR: exp.Or, 287 } 288 289 EQUALITY = { 290 TokenType.EQ: exp.EQ, 291 TokenType.NEQ: exp.NEQ, 292 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 293 } 294 295 COMPARISON = { 296 TokenType.GT: exp.GT, 297 TokenType.GTE: exp.GTE, 298 TokenType.LT: exp.LT, 299 TokenType.LTE: exp.LTE, 300 } 301 302 BITWISE = { 303 TokenType.AMP: exp.BitwiseAnd, 304 TokenType.CARET: exp.BitwiseXor, 305 TokenType.PIPE: exp.BitwiseOr, 306 TokenType.DPIPE: exp.DPipe, 307 } 308 309 TERM = { 310 TokenType.DASH: exp.Sub, 311 TokenType.PLUS: exp.Add, 312 TokenType.MOD: exp.Mod, 313 TokenType.COLLATE: exp.Collate, 314 } 315 316 FACTOR = { 317 TokenType.DIV: exp.IntDiv, 318 TokenType.LR_ARROW: exp.Distance, 319 TokenType.SLASH: exp.Div, 320 TokenType.STAR: exp.Mul, 321 } 322 323 TIMESTAMPS = { 324 TokenType.TIME, 325 TokenType.TIMESTAMP, 326 TokenType.TIMESTAMPTZ, 327 TokenType.TIMESTAMPLTZ, 328 } 329 330 SET_OPERATIONS = { 331 TokenType.UNION, 332 TokenType.INTERSECT, 333 TokenType.EXCEPT, 334 } 335 336 JOIN_SIDES = { 337 TokenType.LEFT, 338 TokenType.RIGHT, 339 TokenType.FULL, 340 } 341 342 JOIN_KINDS = { 343 TokenType.INNER, 344 TokenType.OUTER, 345 TokenType.CROSS, 346 TokenType.SEMI, 347 TokenType.ANTI, 348 } 349 350 LAMBDAS = { 351 TokenType.ARROW: lambda self, expressions: self.expression( 352 exp.Lambda, 353 this=self._parse_conjunction().transform( 354 self._replace_lambda, {node.name for node in expressions} 355 ), 356 expressions=expressions, 357 ), 358 TokenType.FARROW: lambda self, expressions: self.expression( 359 exp.Kwarg, 360 this=exp.Var(this=expressions[0].name), 361 expression=self._parse_conjunction(), 362 ), 363 } 364 365 COLUMN_OPERATORS = { 366 TokenType.DOT: None, 367 TokenType.DCOLON: lambda self, this, to: self.expression( 368 exp.Cast, 369 this=this, 370 to=to, 371 ), 372 TokenType.ARROW: lambda self, this, path: self.expression( 373 exp.JSONExtract, 374 this=this, 375 expression=path, 376 ), 377 TokenType.DARROW: lambda self, this, path: self.expression( 378 exp.JSONExtractScalar, 379 this=this, 380 expression=path, 381 ), 382 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 383 exp.JSONBExtract, 384 this=this, 385 expression=path, 386 ), 387 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 388 exp.JSONBExtractScalar, 389 this=this, 390 expression=path, 391 ), 392 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 393 exp.JSONBContains, 394 this=this, 395 expression=key, 396 ), 397 } 398 399 EXPRESSION_PARSERS = { 400 exp.Column: lambda self: self._parse_column(), 401 exp.DataType: lambda self: self._parse_types(), 402 exp.From: lambda self: self._parse_from(), 403 exp.Group: lambda self: self._parse_group(), 404 exp.Identifier: lambda self: self._parse_id_var(), 405 exp.Lateral: lambda self: self._parse_lateral(), 406 exp.Join: lambda self: self._parse_join(), 407 exp.Order: lambda self: self._parse_order(), 408 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 409 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 410 exp.Lambda: lambda self: self._parse_lambda(), 411 exp.Limit: lambda self: self._parse_limit(), 412 exp.Offset: lambda self: self._parse_offset(), 413 exp.TableAlias: lambda self: self._parse_table_alias(), 414 exp.Table: lambda self: self._parse_table(), 415 exp.Condition: lambda self: self._parse_conjunction(), 416 exp.Expression: lambda self: self._parse_statement(), 417 exp.Properties: lambda self: self._parse_properties(), 418 exp.Where: lambda self: self._parse_where(), 419 exp.Ordered: lambda self: self._parse_ordered(), 420 exp.Having: lambda self: self._parse_having(), 421 exp.With: lambda self: self._parse_with(), 422 exp.Window: lambda self: self._parse_named_window(), 423 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 424 } 425 426 STATEMENT_PARSERS = { 427 TokenType.ALTER: lambda self: self._parse_alter(), 428 TokenType.BEGIN: lambda self: self._parse_transaction(), 429 TokenType.CACHE: lambda self: self._parse_cache(), 430 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 431 TokenType.CREATE: lambda self: self._parse_create(), 432 TokenType.DELETE: lambda self: self._parse_delete(), 433 TokenType.DESC: lambda self: self._parse_describe(), 434 TokenType.DESCRIBE: lambda self: self._parse_describe(), 435 TokenType.DROP: lambda self: self._parse_drop(), 436 TokenType.END: lambda self: self._parse_commit_or_rollback(), 437 TokenType.INSERT: lambda self: self._parse_insert(), 438 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), 439 TokenType.MERGE: lambda self: self._parse_merge(), 440 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 441 TokenType.UNCACHE: lambda self: self._parse_uncache(), 442 TokenType.UPDATE: lambda self: self._parse_update(), 443 TokenType.USE: lambda self: self.expression( 444 exp.Use, 445 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 446 and exp.Var(this=self._prev.text), 447 this=self._parse_table(schema=False), 448 ), 449 } 450 451 UNARY_PARSERS = { 452 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 453 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 454 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 455 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 456 } 457 458 PRIMARY_PARSERS = { 459 TokenType.STRING: lambda self, token: self.expression( 460 exp.Literal, this=token.text, is_string=True 461 ), 462 TokenType.NUMBER: lambda self, token: self.expression( 463 exp.Literal, this=token.text, is_string=False 464 ), 465 TokenType.STAR: lambda self, _: self.expression( 466 exp.Star, 467 **{"except": self._parse_except(), "replace": self._parse_replace()}, 468 ), 469 TokenType.NULL: lambda self, _: self.expression(exp.Null), 470 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 471 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 472 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 473 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 474 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 475 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 476 TokenType.NATIONAL: lambda self, token: self._parse_national(token), 477 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 478 } 479 480 PLACEHOLDER_PARSERS = { 481 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 482 TokenType.PARAMETER: lambda self: self._parse_parameter(), 483 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 484 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 485 else None, 486 } 487 488 RANGE_PARSERS = { 489 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 490 TokenType.GLOB: lambda self, this: self._parse_escape( 491 self.expression(exp.Glob, this=this, expression=self._parse_bitwise()) 492 ), 493 TokenType.IN: lambda self, this: self._parse_in(this), 494 TokenType.IS: lambda self, this: self._parse_is(this), 495 TokenType.LIKE: lambda self, this: self._parse_escape( 496 self.expression(exp.Like, this=this, expression=self._parse_bitwise()) 497 ), 498 TokenType.ILIKE: lambda self, this: self._parse_escape( 499 self.expression(exp.ILike, this=this, expression=self._parse_bitwise()) 500 ), 501 TokenType.IRLIKE: lambda self, this: self.expression( 502 exp.RegexpILike, this=this, expression=self._parse_bitwise() 503 ), 504 TokenType.RLIKE: lambda self, this: self.expression( 505 exp.RegexpLike, this=this, expression=self._parse_bitwise() 506 ), 507 TokenType.SIMILAR_TO: lambda self, this: self.expression( 508 exp.SimilarTo, this=this, expression=self._parse_bitwise() 509 ), 510 } 511 512 PROPERTY_PARSERS = { 513 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 514 "CHARACTER SET": lambda self: self._parse_character_set(), 515 "CLUSTER BY": lambda self: self.expression( 516 exp.Cluster, expressions=self._parse_csv(self._parse_ordered) 517 ), 518 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 519 "PARTITION BY": lambda self: self._parse_partitioned_by(), 520 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 521 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 522 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 523 "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 524 "DISTKEY": lambda self: self._parse_distkey(), 525 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 526 "SORTKEY": lambda self: self._parse_sortkey(), 527 "LIKE": lambda self: self._parse_create_like(), 528 "RETURNS": lambda self: self._parse_returns(), 529 "ROW": lambda self: self._parse_row(), 530 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 531 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 532 "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 533 "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 534 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 535 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 536 "DETERMINISTIC": lambda self: self.expression( 537 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 538 ), 539 "IMMUTABLE": lambda self: self.expression( 540 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 541 ), 542 "STABLE": lambda self: self.expression( 543 exp.VolatilityProperty, this=exp.Literal.string("STABLE") 544 ), 545 "VOLATILE": lambda self: self.expression( 546 exp.VolatilityProperty, this=exp.Literal.string("VOLATILE") 547 ), 548 "WITH": lambda self: self._parse_with_property(), 549 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 550 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), 551 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), 552 "BEFORE": lambda self: self._parse_journal( 553 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 554 ), 555 "JOURNAL": lambda self: self._parse_journal( 556 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 557 ), 558 "AFTER": lambda self: self._parse_afterjournal( 559 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 560 ), 561 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), 562 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), 563 "CHECKSUM": lambda self: self._parse_checksum(), 564 "FREESPACE": lambda self: self._parse_freespace(), 565 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( 566 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" 567 ), 568 "MIN": lambda self: self._parse_datablocksize(), 569 "MINIMUM": lambda self: self._parse_datablocksize(), 570 "MAX": lambda self: self._parse_datablocksize(), 571 "MAXIMUM": lambda self: self._parse_datablocksize(), 572 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( 573 default=self._prev.text.upper() == "DEFAULT" 574 ), 575 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 576 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 577 "DEFINER": lambda self: self._parse_definer(), 578 "LOCK": lambda self: self._parse_locking(), 579 "LOCKING": lambda self: self._parse_locking(), 580 } 581 582 CONSTRAINT_PARSERS = { 583 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 584 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 585 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 586 "CHARACTER SET": lambda self: self.expression( 587 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 588 ), 589 "CHECK": lambda self: self.expression( 590 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 591 ), 592 "COLLATE": lambda self: self.expression( 593 exp.CollateColumnConstraint, this=self._parse_var() 594 ), 595 "COMMENT": lambda self: self.expression( 596 exp.CommentColumnConstraint, this=self._parse_string() 597 ), 598 "DEFAULT": lambda self: self.expression( 599 exp.DefaultColumnConstraint, this=self._parse_bitwise() 600 ), 601 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 602 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 603 "FORMAT": lambda self: self.expression( 604 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 605 ), 606 "GENERATED": lambda self: self._parse_generated_as_identity(), 607 "IDENTITY": lambda self: self._parse_auto_increment(), 608 "LIKE": lambda self: self._parse_create_like(), 609 "NOT": lambda self: self._parse_not_constraint(), 610 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 611 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 612 "PRIMARY KEY": lambda self: self._parse_primary_key(), 613 "TITLE": lambda self: self.expression( 614 exp.TitleColumnConstraint, this=self._parse_var_or_string() 615 ), 616 "UNIQUE": lambda self: self._parse_unique(), 617 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 618 } 619 620 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 621 622 NO_PAREN_FUNCTION_PARSERS = { 623 TokenType.CASE: lambda self: self._parse_case(), 624 TokenType.IF: lambda self: self._parse_if(), 625 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 626 } 627 628 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 629 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 630 "TRY_CONVERT": lambda self: self._parse_convert(False), 631 "EXTRACT": lambda self: self._parse_extract(), 632 "POSITION": lambda self: self._parse_position(), 633 "SUBSTRING": lambda self: self._parse_substring(), 634 "TRIM": lambda self: self._parse_trim(), 635 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 636 "TRY_CAST": lambda self: self._parse_cast(False), 637 "STRING_AGG": lambda self: self._parse_string_agg(), 638 } 639 640 QUERY_MODIFIER_PARSERS = { 641 "match": lambda self: self._parse_match_recognize(), 642 "where": lambda self: self._parse_where(), 643 "group": lambda self: self._parse_group(), 644 "having": lambda self: self._parse_having(), 645 "qualify": lambda self: self._parse_qualify(), 646 "windows": lambda self: self._parse_window_clause(), 647 "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute), 648 "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 649 "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 650 "order": lambda self: self._parse_order(), 651 "limit": lambda self: self._parse_limit(), 652 "offset": lambda self: self._parse_offset(), 653 "lock": lambda self: self._parse_lock(), 654 } 655 656 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 657 SET_PARSERS: t.Dict[str, t.Callable] = {} 658 659 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 660 661 CREATABLES = { 662 TokenType.COLUMN, 663 TokenType.FUNCTION, 664 TokenType.INDEX, 665 TokenType.PROCEDURE, 666 TokenType.SCHEMA, 667 TokenType.TABLE, 668 TokenType.VIEW, 669 } 670 671 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 672 673 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 674 675 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 676 677 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 678 679 STRICT_CAST = True 680 681 __slots__ = ( 682 "error_level", 683 "error_message_context", 684 "sql", 685 "errors", 686 "index_offset", 687 "unnest_column_only", 688 "alias_post_tablesample", 689 "max_errors", 690 "null_ordering", 691 "_tokens", 692 "_index", 693 "_curr", 694 "_next", 695 "_prev", 696 "_prev_comments", 697 "_show_trie", 698 "_set_trie", 699 ) 700 701 def __init__( 702 self, 703 error_level: t.Optional[ErrorLevel] = None, 704 error_message_context: int = 100, 705 index_offset: int = 0, 706 unnest_column_only: bool = False, 707 alias_post_tablesample: bool = False, 708 max_errors: int = 3, 709 null_ordering: t.Optional[str] = None, 710 ): 711 self.error_level = error_level or ErrorLevel.IMMEDIATE 712 self.error_message_context = error_message_context 713 self.index_offset = index_offset 714 self.unnest_column_only = unnest_column_only 715 self.alias_post_tablesample = alias_post_tablesample 716 self.max_errors = max_errors 717 self.null_ordering = null_ordering 718 self.reset() 719 720 def reset(self): 721 self.sql = "" 722 self.errors = [] 723 self._tokens = [] 724 self._index = 0 725 self._curr = None 726 self._next = None 727 self._prev = None 728 self._prev_comments = None 729 730 def parse( 731 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 732 ) -> t.List[t.Optional[exp.Expression]]: 733 """ 734 Parses a list of tokens and returns a list of syntax trees, one tree 735 per parsed SQL statement. 736 737 Args: 738 raw_tokens: the list of tokens. 739 sql: the original SQL string, used to produce helpful debug messages. 740 741 Returns: 742 The list of syntax trees. 743 """ 744 return self._parse( 745 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 746 ) 747 748 def parse_into( 749 self, 750 expression_types: exp.IntoType, 751 raw_tokens: t.List[Token], 752 sql: t.Optional[str] = None, 753 ) -> t.List[t.Optional[exp.Expression]]: 754 """ 755 Parses a list of tokens into a given Expression type. If a collection of Expression 756 types is given instead, this method will try to parse the token list into each one 757 of them, stopping at the first for which the parsing succeeds. 758 759 Args: 760 expression_types: the expression type(s) to try and parse the token list into. 761 raw_tokens: the list of tokens. 762 sql: the original SQL string, used to produce helpful debug messages. 763 764 Returns: 765 The target Expression. 766 """ 767 errors = [] 768 for expression_type in ensure_collection(expression_types): 769 parser = self.EXPRESSION_PARSERS.get(expression_type) 770 if not parser: 771 raise TypeError(f"No parser registered for {expression_type}") 772 try: 773 return self._parse(parser, raw_tokens, sql) 774 except ParseError as e: 775 e.errors[0]["into_expression"] = expression_type 776 errors.append(e) 777 raise ParseError( 778 f"Failed to parse into {expression_types}", 779 errors=merge_errors(errors), 780 ) from errors[-1] 781 782 def _parse( 783 self, 784 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 785 raw_tokens: t.List[Token], 786 sql: t.Optional[str] = None, 787 ) -> t.List[t.Optional[exp.Expression]]: 788 self.reset() 789 self.sql = sql or "" 790 total = len(raw_tokens) 791 chunks: t.List[t.List[Token]] = [[]] 792 793 for i, token in enumerate(raw_tokens): 794 if token.token_type == TokenType.SEMICOLON: 795 if i < total - 1: 796 chunks.append([]) 797 else: 798 chunks[-1].append(token) 799 800 expressions = [] 801 802 for tokens in chunks: 803 self._index = -1 804 self._tokens = tokens 805 self._advance() 806 807 expressions.append(parse_method(self)) 808 809 if self._index < len(self._tokens): 810 self.raise_error("Invalid expression / Unexpected token") 811 812 self.check_errors() 813 814 return expressions 815 816 def check_errors(self) -> None: 817 """ 818 Logs or raises any found errors, depending on the chosen error level setting. 819 """ 820 if self.error_level == ErrorLevel.WARN: 821 for error in self.errors: 822 logger.error(str(error)) 823 elif self.error_level == ErrorLevel.RAISE and self.errors: 824 raise ParseError( 825 concat_messages(self.errors, self.max_errors), 826 errors=merge_errors(self.errors), 827 ) 828 829 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 830 """ 831 Appends an error in the list of recorded errors or raises it, depending on the chosen 832 error level setting. 833 """ 834 token = token or self._curr or self._prev or Token.string("") 835 start = self._find_token(token) 836 end = start + len(token.text) 837 start_context = self.sql[max(start - self.error_message_context, 0) : start] 838 highlight = self.sql[start:end] 839 end_context = self.sql[end : end + self.error_message_context] 840 841 error = ParseError.new( 842 f"{message}. Line {token.line}, Col: {token.col}.\n" 843 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 844 description=message, 845 line=token.line, 846 col=token.col, 847 start_context=start_context, 848 highlight=highlight, 849 end_context=end_context, 850 ) 851 852 if self.error_level == ErrorLevel.IMMEDIATE: 853 raise error 854 855 self.errors.append(error) 856 857 def expression( 858 self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs 859 ) -> exp.Expression: 860 """ 861 Creates a new, validated Expression. 862 863 Args: 864 exp_class: the expression class to instantiate. 865 comments: an optional list of comments to attach to the expression. 866 kwargs: the arguments to set for the expression along with their respective values. 867 868 Returns: 869 The target expression. 870 """ 871 instance = exp_class(**kwargs) 872 if self._prev_comments: 873 instance.comments = self._prev_comments 874 self._prev_comments = None 875 if comments: 876 instance.comments = comments 877 self.validate_expression(instance) 878 return instance 879 880 def validate_expression( 881 self, expression: exp.Expression, args: t.Optional[t.List] = None 882 ) -> None: 883 """ 884 Validates an already instantiated expression, making sure that all its mandatory arguments 885 are set. 886 887 Args: 888 expression: the expression to validate. 889 args: an optional list of items that was used to instantiate the expression, if it's a Func. 890 """ 891 if self.error_level == ErrorLevel.IGNORE: 892 return 893 894 for error_message in expression.error_messages(args): 895 self.raise_error(error_message) 896 897 def _find_sql(self, start: Token, end: Token) -> str: 898 return self.sql[self._find_token(start) : self._find_token(end) + len(end.text)] 899 900 def _find_token(self, token: Token) -> int: 901 line = 1 902 col = 1 903 index = 0 904 905 while line < token.line or col < token.col: 906 if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK: 907 line += 1 908 col = 1 909 else: 910 col += 1 911 index += 1 912 913 return index 914 915 def _advance(self, times: int = 1) -> None: 916 self._index += times 917 self._curr = seq_get(self._tokens, self._index) 918 self._next = seq_get(self._tokens, self._index + 1) 919 if self._index > 0: 920 self._prev = self._tokens[self._index - 1] 921 self._prev_comments = self._prev.comments 922 else: 923 self._prev = None 924 self._prev_comments = None 925 926 def _retreat(self, index: int) -> None: 927 self._advance(index - self._index) 928 929 def _parse_command(self) -> exp.Expression: 930 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 931 932 def _parse_statement(self) -> t.Optional[exp.Expression]: 933 if self._curr is None: 934 return None 935 936 if self._match_set(self.STATEMENT_PARSERS): 937 return self.STATEMENT_PARSERS[self._prev.token_type](self) 938 939 if self._match_set(Tokenizer.COMMANDS): 940 return self._parse_command() 941 942 expression = self._parse_expression() 943 expression = self._parse_set_operations(expression) if expression else self._parse_select() 944 945 self._parse_query_modifiers(expression) 946 return expression 947 948 def _parse_drop(self, default_kind: t.Optional[str] = None) -> t.Optional[exp.Expression]: 949 start = self._prev 950 temporary = self._match(TokenType.TEMPORARY) 951 materialized = self._match(TokenType.MATERIALIZED) 952 kind = self._match_set(self.CREATABLES) and self._prev.text 953 if not kind: 954 if default_kind: 955 kind = default_kind 956 else: 957 return self._parse_as_command(start) 958 959 return self.expression( 960 exp.Drop, 961 exists=self._parse_exists(), 962 this=self._parse_table(schema=True), 963 kind=kind, 964 temporary=temporary, 965 materialized=materialized, 966 cascade=self._match(TokenType.CASCADE), 967 ) 968 969 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 970 return ( 971 self._match(TokenType.IF) 972 and (not not_ or self._match(TokenType.NOT)) 973 and self._match(TokenType.EXISTS) 974 ) 975 976 def _parse_create(self) -> t.Optional[exp.Expression]: 977 start = self._prev 978 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 979 TokenType.OR, TokenType.REPLACE 980 ) 981 set_ = self._match(TokenType.SET) # Teradata 982 multiset = self._match_text_seq("MULTISET") # Teradata 983 global_temporary = self._match_text_seq("GLOBAL", "TEMPORARY") # Teradata 984 volatile = self._match(TokenType.VOLATILE) # Teradata 985 temporary = self._match(TokenType.TEMPORARY) 986 transient = self._match_text_seq("TRANSIENT") 987 external = self._match_text_seq("EXTERNAL") 988 unique = self._match(TokenType.UNIQUE) 989 materialized = self._match(TokenType.MATERIALIZED) 990 991 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 992 self._match(TokenType.TABLE) 993 994 properties = None 995 create_token = self._match_set(self.CREATABLES) and self._prev 996 997 if not create_token: 998 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 999 create_token = self._match_set(self.CREATABLES) and self._prev 1000 1001 if not properties or not create_token: 1002 return self._parse_as_command(start) 1003 1004 exists = self._parse_exists(not_=True) 1005 this = None 1006 expression = None 1007 data = None 1008 statistics = None 1009 no_primary_index = None 1010 indexes = None 1011 no_schema_binding = None 1012 begin = None 1013 1014 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1015 this = self._parse_user_defined_function(kind=create_token.token_type) 1016 properties = self._parse_properties() 1017 1018 self._match(TokenType.ALIAS) 1019 begin = self._match(TokenType.BEGIN) 1020 return_ = self._match_text_seq("RETURN") 1021 expression = self._parse_statement() 1022 1023 if return_: 1024 expression = self.expression(exp.Return, this=expression) 1025 elif create_token.token_type == TokenType.INDEX: 1026 this = self._parse_index() 1027 elif create_token.token_type in ( 1028 TokenType.TABLE, 1029 TokenType.VIEW, 1030 TokenType.SCHEMA, 1031 ): 1032 table_parts = self._parse_table_parts(schema=True) 1033 1034 # exp.Properties.Location.POST_NAME 1035 if self._match(TokenType.COMMA): 1036 temp_properties = self._parse_properties(before=True) 1037 if properties and temp_properties: 1038 properties.expressions.append(temp_properties.expressions) 1039 elif temp_properties: 1040 properties = temp_properties 1041 1042 this = self._parse_schema(this=table_parts) 1043 1044 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1045 temp_properties = self._parse_properties() 1046 if properties and temp_properties: 1047 properties.expressions.append(temp_properties.expressions) 1048 elif temp_properties: 1049 properties = temp_properties 1050 1051 self._match(TokenType.ALIAS) 1052 1053 # exp.Properties.Location.POST_ALIAS 1054 if not ( 1055 self._match(TokenType.SELECT, advance=False) 1056 or self._match(TokenType.WITH, advance=False) 1057 or self._match(TokenType.L_PAREN, advance=False) 1058 ): 1059 temp_properties = self._parse_properties() 1060 if properties and temp_properties: 1061 properties.expressions.append(temp_properties.expressions) 1062 elif temp_properties: 1063 properties = temp_properties 1064 1065 expression = self._parse_ddl_select() 1066 1067 if create_token.token_type == TokenType.TABLE: 1068 if self._match_text_seq("WITH", "DATA"): 1069 data = True 1070 elif self._match_text_seq("WITH", "NO", "DATA"): 1071 data = False 1072 1073 if self._match_text_seq("AND", "STATISTICS"): 1074 statistics = True 1075 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1076 statistics = False 1077 1078 no_primary_index = self._match_text_seq("NO", "PRIMARY", "INDEX") 1079 1080 indexes = [] 1081 while True: 1082 index = self._parse_create_table_index() 1083 1084 # exp.Properties.Location.POST_INDEX 1085 if self._match(TokenType.PARTITION_BY, advance=False): 1086 temp_properties = self._parse_properties() 1087 if properties and temp_properties: 1088 properties.expressions.append(temp_properties.expressions) 1089 elif temp_properties: 1090 properties = temp_properties 1091 1092 if not index: 1093 break 1094 else: 1095 indexes.append(index) 1096 elif create_token.token_type == TokenType.VIEW: 1097 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1098 no_schema_binding = True 1099 1100 return self.expression( 1101 exp.Create, 1102 this=this, 1103 kind=create_token.text, 1104 expression=expression, 1105 set=set_, 1106 multiset=multiset, 1107 global_temporary=global_temporary, 1108 volatile=volatile, 1109 exists=exists, 1110 properties=properties, 1111 temporary=temporary, 1112 transient=transient, 1113 external=external, 1114 replace=replace, 1115 unique=unique, 1116 materialized=materialized, 1117 data=data, 1118 statistics=statistics, 1119 no_primary_index=no_primary_index, 1120 indexes=indexes, 1121 no_schema_binding=no_schema_binding, 1122 begin=begin, 1123 ) 1124 1125 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1126 self._match(TokenType.COMMA) 1127 1128 # parsers look to _prev for no/dual/default, so need to consume first 1129 self._match_text_seq("NO") 1130 self._match_text_seq("DUAL") 1131 self._match_text_seq("DEFAULT") 1132 1133 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): 1134 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) 1135 1136 return None 1137 1138 def _parse_property(self) -> t.Optional[exp.Expression]: 1139 if self._match_texts(self.PROPERTY_PARSERS): 1140 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1141 1142 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1143 return self._parse_character_set(default=True) 1144 1145 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): 1146 return self._parse_sortkey(compound=True) 1147 1148 if self._match_text_seq("SQL", "SECURITY"): 1149 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1150 1151 assignment = self._match_pair( 1152 TokenType.VAR, TokenType.EQ, advance=False 1153 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1154 1155 if assignment: 1156 key = self._parse_var_or_string() 1157 self._match(TokenType.EQ) 1158 return self.expression(exp.Property, this=key, value=self._parse_column()) 1159 1160 return None 1161 1162 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1163 self._match(TokenType.EQ) 1164 self._match(TokenType.ALIAS) 1165 return self.expression( 1166 exp_class, 1167 this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1168 ) 1169 1170 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: 1171 properties = [] 1172 1173 while True: 1174 if before: 1175 identified_property = self._parse_property_before() 1176 else: 1177 identified_property = self._parse_property() 1178 1179 if not identified_property: 1180 break 1181 for p in ensure_collection(identified_property): 1182 properties.append(p) 1183 1184 if properties: 1185 return self.expression(exp.Properties, expressions=properties) 1186 1187 return None 1188 1189 def _parse_fallback(self, no=False) -> exp.Expression: 1190 self._match_text_seq("FALLBACK") 1191 return self.expression( 1192 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1193 ) 1194 1195 def _parse_with_property( 1196 self, 1197 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1198 if self._match(TokenType.L_PAREN, advance=False): 1199 return self._parse_wrapped_csv(self._parse_property) 1200 1201 if not self._next: 1202 return None 1203 1204 if self._next.text.upper() == "JOURNAL": 1205 return self._parse_withjournaltable() 1206 1207 return self._parse_withisolatedloading() 1208 1209 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1210 def _parse_definer(self) -> t.Optional[exp.Expression]: 1211 self._match(TokenType.EQ) 1212 1213 user = self._parse_id_var() 1214 self._match(TokenType.PARAMETER) 1215 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1216 1217 if not user or not host: 1218 return None 1219 1220 return exp.DefinerProperty(this=f"{user}@{host}") 1221 1222 def _parse_withjournaltable(self) -> exp.Expression: 1223 self._match_text_seq("WITH", "JOURNAL", "TABLE") 1224 self._match(TokenType.EQ) 1225 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1226 1227 def _parse_log(self, no=False) -> exp.Expression: 1228 self._match_text_seq("LOG") 1229 return self.expression(exp.LogProperty, no=no) 1230 1231 def _parse_journal(self, no=False, dual=False) -> exp.Expression: 1232 before = self._match_text_seq("BEFORE") 1233 self._match_text_seq("JOURNAL") 1234 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) 1235 1236 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: 1237 self._match_text_seq("NOT") 1238 self._match_text_seq("LOCAL") 1239 self._match_text_seq("AFTER", "JOURNAL") 1240 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) 1241 1242 def _parse_checksum(self) -> exp.Expression: 1243 self._match_text_seq("CHECKSUM") 1244 self._match(TokenType.EQ) 1245 1246 on = None 1247 if self._match(TokenType.ON): 1248 on = True 1249 elif self._match_text_seq("OFF"): 1250 on = False 1251 default = self._match(TokenType.DEFAULT) 1252 1253 return self.expression( 1254 exp.ChecksumProperty, 1255 on=on, 1256 default=default, 1257 ) 1258 1259 def _parse_freespace(self) -> exp.Expression: 1260 self._match_text_seq("FREESPACE") 1261 self._match(TokenType.EQ) 1262 return self.expression( 1263 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1264 ) 1265 1266 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: 1267 self._match_text_seq("MERGEBLOCKRATIO") 1268 if self._match(TokenType.EQ): 1269 return self.expression( 1270 exp.MergeBlockRatioProperty, 1271 this=self._parse_number(), 1272 percent=self._match(TokenType.PERCENT), 1273 ) 1274 else: 1275 return self.expression( 1276 exp.MergeBlockRatioProperty, 1277 no=no, 1278 default=default, 1279 ) 1280 1281 def _parse_datablocksize(self, default=None) -> exp.Expression: 1282 if default: 1283 self._match_text_seq("DATABLOCKSIZE") 1284 return self.expression(exp.DataBlocksizeProperty, default=True) 1285 elif self._match_texts(("MIN", "MINIMUM")): 1286 self._match_text_seq("DATABLOCKSIZE") 1287 return self.expression(exp.DataBlocksizeProperty, min=True) 1288 elif self._match_texts(("MAX", "MAXIMUM")): 1289 self._match_text_seq("DATABLOCKSIZE") 1290 return self.expression(exp.DataBlocksizeProperty, min=False) 1291 1292 self._match_text_seq("DATABLOCKSIZE") 1293 self._match(TokenType.EQ) 1294 size = self._parse_number() 1295 units = None 1296 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1297 units = self._prev.text 1298 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) 1299 1300 def _parse_blockcompression(self) -> exp.Expression: 1301 self._match_text_seq("BLOCKCOMPRESSION") 1302 self._match(TokenType.EQ) 1303 always = self._match_text_seq("ALWAYS") 1304 manual = self._match_text_seq("MANUAL") 1305 never = self._match_text_seq("NEVER") 1306 default = self._match_text_seq("DEFAULT") 1307 autotemp = None 1308 if self._match_text_seq("AUTOTEMP"): 1309 autotemp = self._parse_schema() 1310 1311 return self.expression( 1312 exp.BlockCompressionProperty, 1313 always=always, 1314 manual=manual, 1315 never=never, 1316 default=default, 1317 autotemp=autotemp, 1318 ) 1319 1320 def _parse_withisolatedloading(self) -> exp.Expression: 1321 self._match(TokenType.WITH) 1322 no = self._match_text_seq("NO") 1323 concurrent = self._match_text_seq("CONCURRENT") 1324 self._match_text_seq("ISOLATED", "LOADING") 1325 for_all = self._match_text_seq("FOR", "ALL") 1326 for_insert = self._match_text_seq("FOR", "INSERT") 1327 for_none = self._match_text_seq("FOR", "NONE") 1328 return self.expression( 1329 exp.IsolatedLoadingProperty, 1330 no=no, 1331 concurrent=concurrent, 1332 for_all=for_all, 1333 for_insert=for_insert, 1334 for_none=for_none, 1335 ) 1336 1337 def _parse_locking(self) -> exp.Expression: 1338 if self._match(TokenType.TABLE): 1339 kind = "TABLE" 1340 elif self._match(TokenType.VIEW): 1341 kind = "VIEW" 1342 elif self._match(TokenType.ROW): 1343 kind = "ROW" 1344 elif self._match_text_seq("DATABASE"): 1345 kind = "DATABASE" 1346 else: 1347 kind = None 1348 1349 if kind in ("DATABASE", "TABLE", "VIEW"): 1350 this = self._parse_table_parts() 1351 else: 1352 this = None 1353 1354 if self._match(TokenType.FOR): 1355 for_or_in = "FOR" 1356 elif self._match(TokenType.IN): 1357 for_or_in = "IN" 1358 else: 1359 for_or_in = None 1360 1361 if self._match_text_seq("ACCESS"): 1362 lock_type = "ACCESS" 1363 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1364 lock_type = "EXCLUSIVE" 1365 elif self._match_text_seq("SHARE"): 1366 lock_type = "SHARE" 1367 elif self._match_text_seq("READ"): 1368 lock_type = "READ" 1369 elif self._match_text_seq("WRITE"): 1370 lock_type = "WRITE" 1371 elif self._match_text_seq("CHECKSUM"): 1372 lock_type = "CHECKSUM" 1373 else: 1374 lock_type = None 1375 1376 override = self._match_text_seq("OVERRIDE") 1377 1378 return self.expression( 1379 exp.LockingProperty, 1380 this=this, 1381 kind=kind, 1382 for_or_in=for_or_in, 1383 lock_type=lock_type, 1384 override=override, 1385 ) 1386 1387 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1388 if self._match(TokenType.PARTITION_BY): 1389 return self._parse_csv(self._parse_conjunction) 1390 return [] 1391 1392 def _parse_partitioned_by(self) -> exp.Expression: 1393 self._match(TokenType.EQ) 1394 return self.expression( 1395 exp.PartitionedByProperty, 1396 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1397 ) 1398 1399 def _parse_distkey(self) -> exp.Expression: 1400 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1401 1402 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1403 table = self._parse_table(schema=True) 1404 options = [] 1405 while self._match_texts(("INCLUDING", "EXCLUDING")): 1406 this = self._prev.text.upper() 1407 id_var = self._parse_id_var() 1408 1409 if not id_var: 1410 return None 1411 1412 options.append( 1413 self.expression( 1414 exp.Property, 1415 this=this, 1416 value=exp.Var(this=id_var.this.upper()), 1417 ) 1418 ) 1419 return self.expression(exp.LikeProperty, this=table, expressions=options) 1420 1421 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1422 return self.expression( 1423 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1424 ) 1425 1426 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1427 self._match(TokenType.EQ) 1428 return self.expression( 1429 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1430 ) 1431 1432 def _parse_returns(self) -> exp.Expression: 1433 value: t.Optional[exp.Expression] 1434 is_table = self._match(TokenType.TABLE) 1435 1436 if is_table: 1437 if self._match(TokenType.LT): 1438 value = self.expression( 1439 exp.Schema, 1440 this="TABLE", 1441 expressions=self._parse_csv(self._parse_struct_kwargs), 1442 ) 1443 if not self._match(TokenType.GT): 1444 self.raise_error("Expecting >") 1445 else: 1446 value = self._parse_schema(exp.Var(this="TABLE")) 1447 else: 1448 value = self._parse_types() 1449 1450 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1451 1452 def _parse_describe(self) -> exp.Expression: 1453 kind = self._match_set(self.CREATABLES) and self._prev.text 1454 this = self._parse_table() 1455 1456 return self.expression(exp.Describe, this=this, kind=kind) 1457 1458 def _parse_insert(self) -> exp.Expression: 1459 overwrite = self._match(TokenType.OVERWRITE) 1460 local = self._match(TokenType.LOCAL) 1461 1462 this: t.Optional[exp.Expression] 1463 1464 alternative = None 1465 if self._match_text_seq("DIRECTORY"): 1466 this = self.expression( 1467 exp.Directory, 1468 this=self._parse_var_or_string(), 1469 local=local, 1470 row_format=self._parse_row_format(match_row=True), 1471 ) 1472 else: 1473 if self._match(TokenType.OR): 1474 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1475 1476 self._match(TokenType.INTO) 1477 self._match(TokenType.TABLE) 1478 this = self._parse_table(schema=True) 1479 1480 return self.expression( 1481 exp.Insert, 1482 this=this, 1483 exists=self._parse_exists(), 1484 partition=self._parse_partition(), 1485 expression=self._parse_ddl_select(), 1486 overwrite=overwrite, 1487 alternative=alternative, 1488 ) 1489 1490 def _parse_row(self) -> t.Optional[exp.Expression]: 1491 if not self._match(TokenType.FORMAT): 1492 return None 1493 return self._parse_row_format() 1494 1495 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1496 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1497 return None 1498 1499 if self._match_text_seq("SERDE"): 1500 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1501 1502 self._match_text_seq("DELIMITED") 1503 1504 kwargs = {} 1505 1506 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1507 kwargs["fields"] = self._parse_string() 1508 if self._match_text_seq("ESCAPED", "BY"): 1509 kwargs["escaped"] = self._parse_string() 1510 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1511 kwargs["collection_items"] = self._parse_string() 1512 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1513 kwargs["map_keys"] = self._parse_string() 1514 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1515 kwargs["lines"] = self._parse_string() 1516 if self._match_text_seq("NULL", "DEFINED", "AS"): 1517 kwargs["null"] = self._parse_string() 1518 1519 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1520 1521 def _parse_load_data(self) -> exp.Expression: 1522 local = self._match(TokenType.LOCAL) 1523 self._match_text_seq("INPATH") 1524 inpath = self._parse_string() 1525 overwrite = self._match(TokenType.OVERWRITE) 1526 self._match_pair(TokenType.INTO, TokenType.TABLE) 1527 1528 return self.expression( 1529 exp.LoadData, 1530 this=self._parse_table(schema=True), 1531 local=local, 1532 overwrite=overwrite, 1533 inpath=inpath, 1534 partition=self._parse_partition(), 1535 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1536 serde=self._match_text_seq("SERDE") and self._parse_string(), 1537 ) 1538 1539 def _parse_delete(self) -> exp.Expression: 1540 self._match(TokenType.FROM) 1541 1542 return self.expression( 1543 exp.Delete, 1544 this=self._parse_table(schema=True), 1545 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1546 where=self._parse_where(), 1547 ) 1548 1549 def _parse_update(self) -> exp.Expression: 1550 return self.expression( 1551 exp.Update, 1552 **{ # type: ignore 1553 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1554 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1555 "from": self._parse_from(), 1556 "where": self._parse_where(), 1557 }, 1558 ) 1559 1560 def _parse_uncache(self) -> exp.Expression: 1561 if not self._match(TokenType.TABLE): 1562 self.raise_error("Expecting TABLE after UNCACHE") 1563 1564 return self.expression( 1565 exp.Uncache, 1566 exists=self._parse_exists(), 1567 this=self._parse_table(schema=True), 1568 ) 1569 1570 def _parse_cache(self) -> exp.Expression: 1571 lazy = self._match(TokenType.LAZY) 1572 self._match(TokenType.TABLE) 1573 table = self._parse_table(schema=True) 1574 options = [] 1575 1576 if self._match(TokenType.OPTIONS): 1577 self._match_l_paren() 1578 k = self._parse_string() 1579 self._match(TokenType.EQ) 1580 v = self._parse_string() 1581 options = [k, v] 1582 self._match_r_paren() 1583 1584 self._match(TokenType.ALIAS) 1585 return self.expression( 1586 exp.Cache, 1587 this=table, 1588 lazy=lazy, 1589 options=options, 1590 expression=self._parse_select(nested=True), 1591 ) 1592 1593 def _parse_partition(self) -> t.Optional[exp.Expression]: 1594 if not self._match(TokenType.PARTITION): 1595 return None 1596 1597 return self.expression( 1598 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1599 ) 1600 1601 def _parse_value(self) -> exp.Expression: 1602 if self._match(TokenType.L_PAREN): 1603 expressions = self._parse_csv(self._parse_conjunction) 1604 self._match_r_paren() 1605 return self.expression(exp.Tuple, expressions=expressions) 1606 1607 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1608 # Source: https://prestodb.io/docs/current/sql/values.html 1609 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1610 1611 def _parse_select( 1612 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1613 ) -> t.Optional[exp.Expression]: 1614 cte = self._parse_with() 1615 if cte: 1616 this = self._parse_statement() 1617 1618 if not this: 1619 self.raise_error("Failed to parse any statement following CTE") 1620 return cte 1621 1622 if "with" in this.arg_types: 1623 this.set("with", cte) 1624 else: 1625 self.raise_error(f"{this.key} does not support CTE") 1626 this = cte 1627 elif self._match(TokenType.SELECT): 1628 comments = self._prev_comments 1629 1630 hint = self._parse_hint() 1631 all_ = self._match(TokenType.ALL) 1632 distinct = self._match(TokenType.DISTINCT) 1633 1634 if distinct: 1635 distinct = self.expression( 1636 exp.Distinct, 1637 on=self._parse_value() if self._match(TokenType.ON) else None, 1638 ) 1639 1640 if all_ and distinct: 1641 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1642 1643 limit = self._parse_limit(top=True) 1644 expressions = self._parse_csv(self._parse_expression) 1645 1646 this = self.expression( 1647 exp.Select, 1648 hint=hint, 1649 distinct=distinct, 1650 expressions=expressions, 1651 limit=limit, 1652 ) 1653 this.comments = comments 1654 1655 into = self._parse_into() 1656 if into: 1657 this.set("into", into) 1658 1659 from_ = self._parse_from() 1660 if from_: 1661 this.set("from", from_) 1662 1663 self._parse_query_modifiers(this) 1664 elif (table or nested) and self._match(TokenType.L_PAREN): 1665 this = self._parse_table() if table else self._parse_select(nested=True) 1666 self._parse_query_modifiers(this) 1667 this = self._parse_set_operations(this) 1668 self._match_r_paren() 1669 1670 # early return so that subquery unions aren't parsed again 1671 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1672 # Union ALL should be a property of the top select node, not the subquery 1673 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1674 elif self._match(TokenType.VALUES): 1675 this = self.expression( 1676 exp.Values, 1677 expressions=self._parse_csv(self._parse_value), 1678 alias=self._parse_table_alias(), 1679 ) 1680 else: 1681 this = None 1682 1683 return self._parse_set_operations(this) 1684 1685 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1686 if not skip_with_token and not self._match(TokenType.WITH): 1687 return None 1688 1689 recursive = self._match(TokenType.RECURSIVE) 1690 1691 expressions = [] 1692 while True: 1693 expressions.append(self._parse_cte()) 1694 1695 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1696 break 1697 else: 1698 self._match(TokenType.WITH) 1699 1700 return self.expression(exp.With, expressions=expressions, recursive=recursive) 1701 1702 def _parse_cte(self) -> exp.Expression: 1703 alias = self._parse_table_alias() 1704 if not alias or not alias.this: 1705 self.raise_error("Expected CTE to have alias") 1706 1707 self._match(TokenType.ALIAS) 1708 1709 return self.expression( 1710 exp.CTE, 1711 this=self._parse_wrapped(self._parse_statement), 1712 alias=alias, 1713 ) 1714 1715 def _parse_table_alias( 1716 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1717 ) -> t.Optional[exp.Expression]: 1718 any_token = self._match(TokenType.ALIAS) 1719 alias = self._parse_id_var( 1720 any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 1721 ) 1722 index = self._index 1723 1724 if self._match(TokenType.L_PAREN): 1725 columns = self._parse_csv(self._parse_function_parameter) 1726 self._match_r_paren() if columns else self._retreat(index) 1727 else: 1728 columns = None 1729 1730 if not alias and not columns: 1731 return None 1732 1733 return self.expression(exp.TableAlias, this=alias, columns=columns) 1734 1735 def _parse_subquery( 1736 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1737 ) -> exp.Expression: 1738 return self.expression( 1739 exp.Subquery, 1740 this=this, 1741 pivots=self._parse_pivots(), 1742 alias=self._parse_table_alias() if parse_alias else None, 1743 ) 1744 1745 def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None: 1746 if not isinstance(this, self.MODIFIABLES): 1747 return 1748 1749 table = isinstance(this, exp.Table) 1750 1751 while True: 1752 lateral = self._parse_lateral() 1753 join = self._parse_join() 1754 comma = None if table else self._match(TokenType.COMMA) 1755 if lateral: 1756 this.append("laterals", lateral) 1757 if join: 1758 this.append("joins", join) 1759 if comma: 1760 this.args["from"].append("expressions", self._parse_table()) 1761 if not (lateral or join or comma): 1762 break 1763 1764 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1765 expression = parser(self) 1766 1767 if expression: 1768 this.set(key, expression) 1769 1770 def _parse_hint(self) -> t.Optional[exp.Expression]: 1771 if self._match(TokenType.HINT): 1772 hints = self._parse_csv(self._parse_function) 1773 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 1774 self.raise_error("Expected */ after HINT") 1775 return self.expression(exp.Hint, expressions=hints) 1776 1777 return None 1778 1779 def _parse_into(self) -> t.Optional[exp.Expression]: 1780 if not self._match(TokenType.INTO): 1781 return None 1782 1783 temp = self._match(TokenType.TEMPORARY) 1784 unlogged = self._match(TokenType.UNLOGGED) 1785 self._match(TokenType.TABLE) 1786 1787 return self.expression( 1788 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 1789 ) 1790 1791 def _parse_from(self) -> t.Optional[exp.Expression]: 1792 if not self._match(TokenType.FROM): 1793 return None 1794 1795 return self.expression( 1796 exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table) 1797 ) 1798 1799 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 1800 if not self._match(TokenType.MATCH_RECOGNIZE): 1801 return None 1802 self._match_l_paren() 1803 1804 partition = self._parse_partition_by() 1805 order = self._parse_order() 1806 measures = ( 1807 self._parse_alias(self._parse_conjunction()) 1808 if self._match_text_seq("MEASURES") 1809 else None 1810 ) 1811 1812 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 1813 rows = exp.Var(this="ONE ROW PER MATCH") 1814 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 1815 text = "ALL ROWS PER MATCH" 1816 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 1817 text += f" SHOW EMPTY MATCHES" 1818 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 1819 text += f" OMIT EMPTY MATCHES" 1820 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 1821 text += f" WITH UNMATCHED ROWS" 1822 rows = exp.Var(this=text) 1823 else: 1824 rows = None 1825 1826 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 1827 text = "AFTER MATCH SKIP" 1828 if self._match_text_seq("PAST", "LAST", "ROW"): 1829 text += f" PAST LAST ROW" 1830 elif self._match_text_seq("TO", "NEXT", "ROW"): 1831 text += f" TO NEXT ROW" 1832 elif self._match_text_seq("TO", "FIRST"): 1833 text += f" TO FIRST {self._advance_any().text}" # type: ignore 1834 elif self._match_text_seq("TO", "LAST"): 1835 text += f" TO LAST {self._advance_any().text}" # type: ignore 1836 after = exp.Var(this=text) 1837 else: 1838 after = None 1839 1840 if self._match_text_seq("PATTERN"): 1841 self._match_l_paren() 1842 1843 if not self._curr: 1844 self.raise_error("Expecting )", self._curr) 1845 1846 paren = 1 1847 start = self._curr 1848 1849 while self._curr and paren > 0: 1850 if self._curr.token_type == TokenType.L_PAREN: 1851 paren += 1 1852 if self._curr.token_type == TokenType.R_PAREN: 1853 paren -= 1 1854 end = self._prev 1855 self._advance() 1856 if paren > 0: 1857 self.raise_error("Expecting )", self._curr) 1858 pattern = exp.Var(this=self._find_sql(start, end)) 1859 else: 1860 pattern = None 1861 1862 define = ( 1863 self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None 1864 ) 1865 self._match_r_paren() 1866 1867 return self.expression( 1868 exp.MatchRecognize, 1869 partition_by=partition, 1870 order=order, 1871 measures=measures, 1872 rows=rows, 1873 after=after, 1874 pattern=pattern, 1875 define=define, 1876 ) 1877 1878 def _parse_lateral(self) -> t.Optional[exp.Expression]: 1879 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 1880 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 1881 1882 if outer_apply or cross_apply: 1883 this = self._parse_select(table=True) 1884 view = None 1885 outer = not cross_apply 1886 elif self._match(TokenType.LATERAL): 1887 this = self._parse_select(table=True) 1888 view = self._match(TokenType.VIEW) 1889 outer = self._match(TokenType.OUTER) 1890 else: 1891 return None 1892 1893 if not this: 1894 this = self._parse_function() or self._parse_id_var(any_token=False) 1895 while self._match(TokenType.DOT): 1896 this = exp.Dot( 1897 this=this, 1898 expression=self._parse_function() or self._parse_id_var(any_token=False), 1899 ) 1900 1901 table_alias: t.Optional[exp.Expression] 1902 1903 if view: 1904 table = self._parse_id_var(any_token=False) 1905 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 1906 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 1907 else: 1908 table_alias = self._parse_table_alias() 1909 1910 expression = self.expression( 1911 exp.Lateral, 1912 this=this, 1913 view=view, 1914 outer=outer, 1915 alias=table_alias, 1916 ) 1917 1918 if outer_apply or cross_apply: 1919 return self.expression(exp.Join, this=expression, side=None if cross_apply else "LEFT") 1920 1921 return expression 1922 1923 def _parse_join_side_and_kind( 1924 self, 1925 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 1926 return ( 1927 self._match(TokenType.NATURAL) and self._prev, 1928 self._match_set(self.JOIN_SIDES) and self._prev, 1929 self._match_set(self.JOIN_KINDS) and self._prev, 1930 ) 1931 1932 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 1933 natural, side, kind = self._parse_join_side_and_kind() 1934 1935 if not skip_join_token and not self._match(TokenType.JOIN): 1936 return None 1937 1938 kwargs: t.Dict[ 1939 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 1940 ] = {"this": self._parse_table()} 1941 1942 if natural: 1943 kwargs["natural"] = True 1944 if side: 1945 kwargs["side"] = side.text 1946 if kind: 1947 kwargs["kind"] = kind.text 1948 1949 if self._match(TokenType.ON): 1950 kwargs["on"] = self._parse_conjunction() 1951 elif self._match(TokenType.USING): 1952 kwargs["using"] = self._parse_wrapped_id_vars() 1953 1954 return self.expression(exp.Join, **kwargs) # type: ignore 1955 1956 def _parse_index(self) -> exp.Expression: 1957 index = self._parse_id_var() 1958 self._match(TokenType.ON) 1959 self._match(TokenType.TABLE) # hive 1960 1961 return self.expression( 1962 exp.Index, 1963 this=index, 1964 table=self.expression(exp.Table, this=self._parse_id_var()), 1965 columns=self._parse_expression(), 1966 ) 1967 1968 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: 1969 unique = self._match(TokenType.UNIQUE) 1970 primary = self._match_text_seq("PRIMARY") 1971 amp = self._match_text_seq("AMP") 1972 if not self._match(TokenType.INDEX): 1973 return None 1974 index = self._parse_id_var() 1975 columns = None 1976 if self._match(TokenType.L_PAREN, advance=False): 1977 columns = self._parse_wrapped_csv(self._parse_column) 1978 return self.expression( 1979 exp.Index, 1980 this=index, 1981 columns=columns, 1982 unique=unique, 1983 primary=primary, 1984 amp=amp, 1985 ) 1986 1987 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 1988 catalog = None 1989 db = None 1990 table = (not schema and self._parse_function()) or self._parse_id_var(any_token=False) 1991 1992 while self._match(TokenType.DOT): 1993 if catalog: 1994 # This allows nesting the table in arbitrarily many dot expressions if needed 1995 table = self.expression(exp.Dot, this=table, expression=self._parse_id_var()) 1996 else: 1997 catalog = db 1998 db = table 1999 table = self._parse_id_var() 2000 2001 if not table: 2002 self.raise_error(f"Expected table name but got {self._curr}") 2003 2004 return self.expression( 2005 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2006 ) 2007 2008 def _parse_table( 2009 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2010 ) -> t.Optional[exp.Expression]: 2011 lateral = self._parse_lateral() 2012 2013 if lateral: 2014 return lateral 2015 2016 unnest = self._parse_unnest() 2017 2018 if unnest: 2019 return unnest 2020 2021 values = self._parse_derived_table_values() 2022 2023 if values: 2024 return values 2025 2026 subquery = self._parse_select(table=True) 2027 2028 if subquery: 2029 return subquery 2030 2031 this = self._parse_table_parts(schema=schema) 2032 2033 if schema: 2034 return self._parse_schema(this=this) 2035 2036 if self.alias_post_tablesample: 2037 table_sample = self._parse_table_sample() 2038 2039 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2040 2041 if alias: 2042 this.set("alias", alias) 2043 2044 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2045 this.set( 2046 "hints", 2047 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2048 ) 2049 self._match_r_paren() 2050 2051 if not self.alias_post_tablesample: 2052 table_sample = self._parse_table_sample() 2053 2054 if table_sample: 2055 table_sample.set("this", this) 2056 this = table_sample 2057 2058 return this 2059 2060 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2061 if not self._match(TokenType.UNNEST): 2062 return None 2063 2064 expressions = self._parse_wrapped_csv(self._parse_column) 2065 ordinality = bool(self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY)) 2066 alias = self._parse_table_alias() 2067 2068 if alias and self.unnest_column_only: 2069 if alias.args.get("columns"): 2070 self.raise_error("Unexpected extra column alias in unnest.") 2071 alias.set("columns", [alias.this]) 2072 alias.set("this", None) 2073 2074 offset = None 2075 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2076 self._match(TokenType.ALIAS) 2077 offset = self._parse_conjunction() 2078 2079 return self.expression( 2080 exp.Unnest, 2081 expressions=expressions, 2082 ordinality=ordinality, 2083 alias=alias, 2084 offset=offset, 2085 ) 2086 2087 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2088 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2089 if not is_derived and not self._match(TokenType.VALUES): 2090 return None 2091 2092 expressions = self._parse_csv(self._parse_value) 2093 2094 if is_derived: 2095 self._match_r_paren() 2096 2097 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2098 2099 def _parse_table_sample(self) -> t.Optional[exp.Expression]: 2100 if not self._match(TokenType.TABLE_SAMPLE): 2101 return None 2102 2103 method = self._parse_var() 2104 bucket_numerator = None 2105 bucket_denominator = None 2106 bucket_field = None 2107 percent = None 2108 rows = None 2109 size = None 2110 seed = None 2111 2112 self._match_l_paren() 2113 2114 if self._match(TokenType.BUCKET): 2115 bucket_numerator = self._parse_number() 2116 self._match(TokenType.OUT_OF) 2117 bucket_denominator = bucket_denominator = self._parse_number() 2118 self._match(TokenType.ON) 2119 bucket_field = self._parse_field() 2120 else: 2121 num = self._parse_number() 2122 2123 if self._match(TokenType.PERCENT): 2124 percent = num 2125 elif self._match(TokenType.ROWS): 2126 rows = num 2127 else: 2128 size = num 2129 2130 self._match_r_paren() 2131 2132 if self._match(TokenType.SEED): 2133 seed = self._parse_wrapped(self._parse_number) 2134 2135 return self.expression( 2136 exp.TableSample, 2137 method=method, 2138 bucket_numerator=bucket_numerator, 2139 bucket_denominator=bucket_denominator, 2140 bucket_field=bucket_field, 2141 percent=percent, 2142 rows=rows, 2143 size=size, 2144 seed=seed, 2145 ) 2146 2147 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2148 return list(iter(self._parse_pivot, None)) 2149 2150 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2151 index = self._index 2152 2153 if self._match(TokenType.PIVOT): 2154 unpivot = False 2155 elif self._match(TokenType.UNPIVOT): 2156 unpivot = True 2157 else: 2158 return None 2159 2160 expressions = [] 2161 field = None 2162 2163 if not self._match(TokenType.L_PAREN): 2164 self._retreat(index) 2165 return None 2166 2167 if unpivot: 2168 expressions = self._parse_csv(self._parse_column) 2169 else: 2170 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2171 2172 if not self._match(TokenType.FOR): 2173 self.raise_error("Expecting FOR") 2174 2175 value = self._parse_column() 2176 2177 if not self._match(TokenType.IN): 2178 self.raise_error("Expecting IN") 2179 2180 field = self._parse_in(value) 2181 2182 self._match_r_paren() 2183 2184 return self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2185 2186 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2187 if not skip_where_token and not self._match(TokenType.WHERE): 2188 return None 2189 2190 return self.expression( 2191 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2192 ) 2193 2194 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2195 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2196 return None 2197 2198 elements = defaultdict(list) 2199 2200 while True: 2201 expressions = self._parse_csv(self._parse_conjunction) 2202 if expressions: 2203 elements["expressions"].extend(expressions) 2204 2205 grouping_sets = self._parse_grouping_sets() 2206 if grouping_sets: 2207 elements["grouping_sets"].extend(grouping_sets) 2208 2209 rollup = None 2210 cube = None 2211 2212 with_ = self._match(TokenType.WITH) 2213 if self._match(TokenType.ROLLUP): 2214 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2215 elements["rollup"].extend(ensure_list(rollup)) 2216 2217 if self._match(TokenType.CUBE): 2218 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2219 elements["cube"].extend(ensure_list(cube)) 2220 2221 if not (expressions or grouping_sets or rollup or cube): 2222 break 2223 2224 return self.expression(exp.Group, **elements) # type: ignore 2225 2226 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2227 if not self._match(TokenType.GROUPING_SETS): 2228 return None 2229 2230 return self._parse_wrapped_csv(self._parse_grouping_set) 2231 2232 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2233 if self._match(TokenType.L_PAREN): 2234 grouping_set = self._parse_csv(self._parse_column) 2235 self._match_r_paren() 2236 return self.expression(exp.Tuple, expressions=grouping_set) 2237 2238 return self._parse_column() 2239 2240 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2241 if not skip_having_token and not self._match(TokenType.HAVING): 2242 return None 2243 return self.expression(exp.Having, this=self._parse_conjunction()) 2244 2245 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2246 if not self._match(TokenType.QUALIFY): 2247 return None 2248 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2249 2250 def _parse_order( 2251 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2252 ) -> t.Optional[exp.Expression]: 2253 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2254 return this 2255 2256 return self.expression( 2257 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2258 ) 2259 2260 def _parse_sort( 2261 self, token_type: TokenType, exp_class: t.Type[exp.Expression] 2262 ) -> t.Optional[exp.Expression]: 2263 if not self._match(token_type): 2264 return None 2265 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2266 2267 def _parse_ordered(self) -> exp.Expression: 2268 this = self._parse_conjunction() 2269 self._match(TokenType.ASC) 2270 is_desc = self._match(TokenType.DESC) 2271 is_nulls_first = self._match(TokenType.NULLS_FIRST) 2272 is_nulls_last = self._match(TokenType.NULLS_LAST) 2273 desc = is_desc or False 2274 asc = not desc 2275 nulls_first = is_nulls_first or False 2276 explicitly_null_ordered = is_nulls_first or is_nulls_last 2277 if ( 2278 not explicitly_null_ordered 2279 and ( 2280 (asc and self.null_ordering == "nulls_are_small") 2281 or (desc and self.null_ordering != "nulls_are_small") 2282 ) 2283 and self.null_ordering != "nulls_are_last" 2284 ): 2285 nulls_first = True 2286 2287 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2288 2289 def _parse_limit( 2290 self, this: t.Optional[exp.Expression] = None, top: bool = False 2291 ) -> t.Optional[exp.Expression]: 2292 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2293 limit_paren = self._match(TokenType.L_PAREN) 2294 limit_exp = self.expression( 2295 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2296 ) 2297 2298 if limit_paren: 2299 self._match_r_paren() 2300 2301 return limit_exp 2302 2303 if self._match(TokenType.FETCH): 2304 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2305 direction = self._prev.text if direction else "FIRST" 2306 count = self._parse_number() 2307 self._match_set((TokenType.ROW, TokenType.ROWS)) 2308 self._match(TokenType.ONLY) 2309 return self.expression(exp.Fetch, direction=direction, count=count) 2310 2311 return this 2312 2313 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2314 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2315 return this 2316 2317 count = self._parse_number() 2318 self._match_set((TokenType.ROW, TokenType.ROWS)) 2319 return self.expression(exp.Offset, this=this, expression=count) 2320 2321 def _parse_lock(self) -> t.Optional[exp.Expression]: 2322 if self._match_text_seq("FOR", "UPDATE"): 2323 return self.expression(exp.Lock, update=True) 2324 if self._match_text_seq("FOR", "SHARE"): 2325 return self.expression(exp.Lock, update=False) 2326 2327 return None 2328 2329 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2330 if not self._match_set(self.SET_OPERATIONS): 2331 return this 2332 2333 token_type = self._prev.token_type 2334 2335 if token_type == TokenType.UNION: 2336 expression = exp.Union 2337 elif token_type == TokenType.EXCEPT: 2338 expression = exp.Except 2339 else: 2340 expression = exp.Intersect 2341 2342 return self.expression( 2343 expression, 2344 this=this, 2345 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2346 expression=self._parse_set_operations(self._parse_select(nested=True)), 2347 ) 2348 2349 def _parse_expression(self) -> t.Optional[exp.Expression]: 2350 return self._parse_alias(self._parse_conjunction()) 2351 2352 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2353 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2354 2355 def _parse_equality(self) -> t.Optional[exp.Expression]: 2356 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2357 2358 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2359 return self._parse_tokens(self._parse_range, self.COMPARISON) 2360 2361 def _parse_range(self) -> t.Optional[exp.Expression]: 2362 this = self._parse_bitwise() 2363 negate = self._match(TokenType.NOT) 2364 2365 if self._match_set(self.RANGE_PARSERS): 2366 this = self.RANGE_PARSERS[self._prev.token_type](self, this) 2367 elif self._match(TokenType.ISNULL): 2368 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2369 2370 # Postgres supports ISNULL and NOTNULL for conditions. 2371 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2372 if self._match(TokenType.NOTNULL): 2373 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2374 this = self.expression(exp.Not, this=this) 2375 2376 if negate: 2377 this = self.expression(exp.Not, this=this) 2378 2379 if self._match(TokenType.IS): 2380 this = self._parse_is(this) 2381 2382 return this 2383 2384 def _parse_is(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2385 negate = self._match(TokenType.NOT) 2386 if self._match(TokenType.DISTINCT_FROM): 2387 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2388 return self.expression(klass, this=this, expression=self._parse_expression()) 2389 2390 this = self.expression( 2391 exp.Is, 2392 this=this, 2393 expression=self._parse_null() or self._parse_boolean(), 2394 ) 2395 return self.expression(exp.Not, this=this) if negate else this 2396 2397 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2398 unnest = self._parse_unnest() 2399 if unnest: 2400 this = self.expression(exp.In, this=this, unnest=unnest) 2401 elif self._match(TokenType.L_PAREN): 2402 expressions = self._parse_csv(self._parse_select_or_expression) 2403 2404 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2405 this = self.expression(exp.In, this=this, query=expressions[0]) 2406 else: 2407 this = self.expression(exp.In, this=this, expressions=expressions) 2408 2409 self._match_r_paren() 2410 else: 2411 this = self.expression(exp.In, this=this, field=self._parse_field()) 2412 2413 return this 2414 2415 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2416 low = self._parse_bitwise() 2417 self._match(TokenType.AND) 2418 high = self._parse_bitwise() 2419 return self.expression(exp.Between, this=this, low=low, high=high) 2420 2421 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2422 if not self._match(TokenType.ESCAPE): 2423 return this 2424 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2425 2426 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2427 this = self._parse_term() 2428 2429 while True: 2430 if self._match_set(self.BITWISE): 2431 this = self.expression( 2432 self.BITWISE[self._prev.token_type], 2433 this=this, 2434 expression=self._parse_term(), 2435 ) 2436 elif self._match_pair(TokenType.LT, TokenType.LT): 2437 this = self.expression( 2438 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2439 ) 2440 elif self._match_pair(TokenType.GT, TokenType.GT): 2441 this = self.expression( 2442 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2443 ) 2444 else: 2445 break 2446 2447 return this 2448 2449 def _parse_term(self) -> t.Optional[exp.Expression]: 2450 return self._parse_tokens(self._parse_factor, self.TERM) 2451 2452 def _parse_factor(self) -> t.Optional[exp.Expression]: 2453 return self._parse_tokens(self._parse_unary, self.FACTOR) 2454 2455 def _parse_unary(self) -> t.Optional[exp.Expression]: 2456 if self._match_set(self.UNARY_PARSERS): 2457 return self.UNARY_PARSERS[self._prev.token_type](self) 2458 return self._parse_at_time_zone(self._parse_type()) 2459 2460 def _parse_type(self) -> t.Optional[exp.Expression]: 2461 if self._match(TokenType.INTERVAL): 2462 return self.expression(exp.Interval, this=self._parse_term(), unit=self._parse_var()) 2463 2464 index = self._index 2465 type_token = self._parse_types(check_func=True) 2466 this = self._parse_column() 2467 2468 if type_token: 2469 if this and not isinstance(this, exp.Star): 2470 return self.expression(exp.Cast, this=this, to=type_token) 2471 if not type_token.args.get("expressions"): 2472 self._retreat(index) 2473 return self._parse_column() 2474 return type_token 2475 2476 return this 2477 2478 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2479 index = self._index 2480 2481 prefix = self._match_text_seq("SYSUDTLIB", ".") 2482 2483 if not self._match_set(self.TYPE_TOKENS): 2484 return None 2485 2486 type_token = self._prev.token_type 2487 2488 if type_token == TokenType.PSEUDO_TYPE: 2489 return self.expression(exp.PseudoType, this=self._prev.text) 2490 2491 nested = type_token in self.NESTED_TYPE_TOKENS 2492 is_struct = type_token == TokenType.STRUCT 2493 expressions = None 2494 maybe_func = False 2495 2496 if self._match(TokenType.L_PAREN): 2497 if is_struct: 2498 expressions = self._parse_csv(self._parse_struct_kwargs) 2499 elif nested: 2500 expressions = self._parse_csv(self._parse_types) 2501 else: 2502 expressions = self._parse_csv(self._parse_conjunction) 2503 2504 if not expressions: 2505 self._retreat(index) 2506 return None 2507 2508 self._match_r_paren() 2509 maybe_func = True 2510 2511 if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2512 this = exp.DataType( 2513 this=exp.DataType.Type.ARRAY, 2514 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2515 nested=True, 2516 ) 2517 2518 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2519 this = exp.DataType( 2520 this=exp.DataType.Type.ARRAY, 2521 expressions=[this], 2522 nested=True, 2523 ) 2524 2525 return this 2526 2527 if self._match(TokenType.L_BRACKET): 2528 self._retreat(index) 2529 return None 2530 2531 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2532 if nested and self._match(TokenType.LT): 2533 if is_struct: 2534 expressions = self._parse_csv(self._parse_struct_kwargs) 2535 else: 2536 expressions = self._parse_csv(self._parse_types) 2537 2538 if not self._match(TokenType.GT): 2539 self.raise_error("Expecting >") 2540 2541 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2542 values = self._parse_csv(self._parse_conjunction) 2543 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2544 2545 value: t.Optional[exp.Expression] = None 2546 if type_token in self.TIMESTAMPS: 2547 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: 2548 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2549 elif ( 2550 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ 2551 ): 2552 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2553 elif self._match(TokenType.WITHOUT_TIME_ZONE): 2554 if type_token == TokenType.TIME: 2555 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2556 else: 2557 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2558 2559 maybe_func = maybe_func and value is None 2560 2561 if value is None: 2562 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2563 elif type_token == TokenType.INTERVAL: 2564 value = self.expression(exp.Interval, unit=self._parse_var()) 2565 2566 if maybe_func and check_func: 2567 index2 = self._index 2568 peek = self._parse_string() 2569 2570 if not peek: 2571 self._retreat(index) 2572 return None 2573 2574 self._retreat(index2) 2575 2576 if value: 2577 return value 2578 2579 return exp.DataType( 2580 this=exp.DataType.Type[type_token.value.upper()], 2581 expressions=expressions, 2582 nested=nested, 2583 values=values, 2584 prefix=prefix, 2585 ) 2586 2587 def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]: 2588 if self._curr and self._curr.token_type in self.TYPE_TOKENS: 2589 return self._parse_types() 2590 2591 this = self._parse_id_var() 2592 self._match(TokenType.COLON) 2593 data_type = self._parse_types() 2594 2595 if not data_type: 2596 return None 2597 return self.expression(exp.StructKwarg, this=this, expression=data_type) 2598 2599 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2600 if not self._match(TokenType.AT_TIME_ZONE): 2601 return this 2602 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 2603 2604 def _parse_column(self) -> t.Optional[exp.Expression]: 2605 this = self._parse_field() 2606 if isinstance(this, exp.Identifier): 2607 this = self.expression(exp.Column, this=this) 2608 elif not this: 2609 return self._parse_bracket(this) 2610 this = self._parse_bracket(this) 2611 2612 while self._match_set(self.COLUMN_OPERATORS): 2613 op_token = self._prev.token_type 2614 op = self.COLUMN_OPERATORS.get(op_token) 2615 2616 if op_token == TokenType.DCOLON: 2617 field = self._parse_types() 2618 if not field: 2619 self.raise_error("Expected type") 2620 elif op: 2621 self._advance() 2622 value = self._prev.text 2623 field = ( 2624 exp.Literal.number(value) 2625 if self._prev.token_type == TokenType.NUMBER 2626 else exp.Literal.string(value) 2627 ) 2628 else: 2629 field = self._parse_star() or self._parse_function() or self._parse_id_var() 2630 2631 if isinstance(field, exp.Func): 2632 # bigquery allows function calls like x.y.count(...) 2633 # SAFE.SUBSTR(...) 2634 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 2635 this = self._replace_columns_with_dots(this) 2636 2637 if op: 2638 this = op(self, this, field) 2639 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 2640 this = self.expression( 2641 exp.Column, 2642 this=field, 2643 table=this.this, 2644 db=this.args.get("table"), 2645 catalog=this.args.get("db"), 2646 ) 2647 else: 2648 this = self.expression(exp.Dot, this=this, expression=field) 2649 this = self._parse_bracket(this) 2650 2651 return this 2652 2653 def _parse_primary(self) -> t.Optional[exp.Expression]: 2654 if self._match_set(self.PRIMARY_PARSERS): 2655 token_type = self._prev.token_type 2656 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 2657 2658 if token_type == TokenType.STRING: 2659 expressions = [primary] 2660 while self._match(TokenType.STRING): 2661 expressions.append(exp.Literal.string(self._prev.text)) 2662 if len(expressions) > 1: 2663 return self.expression(exp.Concat, expressions=expressions) 2664 return primary 2665 2666 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 2667 return exp.Literal.number(f"0.{self._prev.text}") 2668 2669 if self._match(TokenType.L_PAREN): 2670 comments = self._prev_comments 2671 query = self._parse_select() 2672 2673 if query: 2674 expressions = [query] 2675 else: 2676 expressions = self._parse_csv( 2677 lambda: self._parse_alias(self._parse_conjunction(), explicit=True) 2678 ) 2679 2680 this = seq_get(expressions, 0) 2681 self._parse_query_modifiers(this) 2682 self._match_r_paren() 2683 2684 if isinstance(this, exp.Subqueryable): 2685 this = self._parse_set_operations( 2686 self._parse_subquery(this=this, parse_alias=False) 2687 ) 2688 elif len(expressions) > 1: 2689 this = self.expression(exp.Tuple, expressions=expressions) 2690 else: 2691 this = self.expression(exp.Paren, this=this) 2692 2693 if this and comments: 2694 this.comments = comments 2695 2696 return this 2697 2698 return None 2699 2700 def _parse_field(self, any_token: bool = False) -> t.Optional[exp.Expression]: 2701 return self._parse_primary() or self._parse_function() or self._parse_id_var(any_token) 2702 2703 def _parse_function( 2704 self, functions: t.Optional[t.Dict[str, t.Callable]] = None 2705 ) -> t.Optional[exp.Expression]: 2706 if not self._curr: 2707 return None 2708 2709 token_type = self._curr.token_type 2710 2711 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 2712 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 2713 2714 if not self._next or self._next.token_type != TokenType.L_PAREN: 2715 if token_type in self.NO_PAREN_FUNCTIONS: 2716 self._advance() 2717 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 2718 2719 return None 2720 2721 if token_type not in self.FUNC_TOKENS: 2722 return None 2723 2724 this = self._curr.text 2725 upper = this.upper() 2726 self._advance(2) 2727 2728 parser = self.FUNCTION_PARSERS.get(upper) 2729 2730 if parser: 2731 this = parser(self) 2732 else: 2733 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 2734 2735 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 2736 this = self.expression(subquery_predicate, this=self._parse_select()) 2737 self._match_r_paren() 2738 return this 2739 2740 if functions is None: 2741 functions = self.FUNCTIONS 2742 2743 function = functions.get(upper) 2744 args = self._parse_csv(self._parse_lambda) 2745 2746 if function: 2747 # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the 2748 # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists. 2749 if count_params(function) == 2: 2750 params = None 2751 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 2752 params = self._parse_csv(self._parse_lambda) 2753 2754 this = function(args, params) 2755 else: 2756 this = function(args) 2757 2758 self.validate_expression(this, args) 2759 else: 2760 this = self.expression(exp.Anonymous, this=this, expressions=args) 2761 2762 self._match_r_paren(this) 2763 return self._parse_window(this) 2764 2765 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 2766 return self._parse_column_def(self._parse_id_var()) 2767 2768 def _parse_user_defined_function( 2769 self, kind: t.Optional[TokenType] = None 2770 ) -> t.Optional[exp.Expression]: 2771 this = self._parse_id_var() 2772 2773 while self._match(TokenType.DOT): 2774 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 2775 2776 if not self._match(TokenType.L_PAREN): 2777 return this 2778 2779 expressions = self._parse_csv(self._parse_function_parameter) 2780 self._match_r_paren() 2781 return self.expression( 2782 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 2783 ) 2784 2785 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 2786 literal = self._parse_primary() 2787 if literal: 2788 return self.expression(exp.Introducer, this=token.text, expression=literal) 2789 2790 return self.expression(exp.Identifier, this=token.text) 2791 2792 def _parse_national(self, token: Token) -> exp.Expression: 2793 return self.expression(exp.National, this=exp.Literal.string(token.text)) 2794 2795 def _parse_session_parameter(self) -> exp.Expression: 2796 kind = None 2797 this = self._parse_id_var() or self._parse_primary() 2798 2799 if this and self._match(TokenType.DOT): 2800 kind = this.name 2801 this = self._parse_var() or self._parse_primary() 2802 2803 return self.expression(exp.SessionParameter, this=this, kind=kind) 2804 2805 def _parse_lambda(self) -> t.Optional[exp.Expression]: 2806 index = self._index 2807 2808 if self._match(TokenType.L_PAREN): 2809 expressions = self._parse_csv(self._parse_id_var) 2810 2811 if not self._match(TokenType.R_PAREN): 2812 self._retreat(index) 2813 else: 2814 expressions = [self._parse_id_var()] 2815 2816 if self._match_set(self.LAMBDAS): 2817 return self.LAMBDAS[self._prev.token_type](self, expressions) 2818 2819 self._retreat(index) 2820 2821 this: t.Optional[exp.Expression] 2822 2823 if self._match(TokenType.DISTINCT): 2824 this = self.expression( 2825 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 2826 ) 2827 else: 2828 this = self._parse_select_or_expression() 2829 2830 if self._match(TokenType.IGNORE_NULLS): 2831 this = self.expression(exp.IgnoreNulls, this=this) 2832 else: 2833 self._match(TokenType.RESPECT_NULLS) 2834 2835 return self._parse_limit(self._parse_order(this)) 2836 2837 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2838 index = self._index 2839 if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT): 2840 self._retreat(index) 2841 return this 2842 2843 args = self._parse_csv( 2844 lambda: self._parse_constraint() 2845 or self._parse_column_def(self._parse_field(any_token=True)) 2846 ) 2847 self._match_r_paren() 2848 return self.expression(exp.Schema, this=this, expressions=args) 2849 2850 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2851 kind = self._parse_types() 2852 2853 if self._match_text_seq("FOR", "ORDINALITY"): 2854 return self.expression(exp.ColumnDef, this=this, ordinality=True) 2855 2856 constraints = [] 2857 while True: 2858 constraint = self._parse_column_constraint() 2859 if not constraint: 2860 break 2861 constraints.append(constraint) 2862 2863 if not kind and not constraints: 2864 return this 2865 2866 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 2867 2868 def _parse_auto_increment(self) -> exp.Expression: 2869 start = None 2870 increment = None 2871 2872 if self._match(TokenType.L_PAREN, advance=False): 2873 args = self._parse_wrapped_csv(self._parse_bitwise) 2874 start = seq_get(args, 0) 2875 increment = seq_get(args, 1) 2876 elif self._match_text_seq("START"): 2877 start = self._parse_bitwise() 2878 self._match_text_seq("INCREMENT") 2879 increment = self._parse_bitwise() 2880 2881 if start and increment: 2882 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 2883 2884 return exp.AutoIncrementColumnConstraint() 2885 2886 def _parse_generated_as_identity(self) -> exp.Expression: 2887 if self._match(TokenType.BY_DEFAULT): 2888 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False) 2889 else: 2890 self._match_text_seq("ALWAYS") 2891 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 2892 2893 self._match_text_seq("AS", "IDENTITY") 2894 if self._match(TokenType.L_PAREN): 2895 if self._match_text_seq("START", "WITH"): 2896 this.set("start", self._parse_bitwise()) 2897 if self._match_text_seq("INCREMENT", "BY"): 2898 this.set("increment", self._parse_bitwise()) 2899 if self._match_text_seq("MINVALUE"): 2900 this.set("minvalue", self._parse_bitwise()) 2901 if self._match_text_seq("MAXVALUE"): 2902 this.set("maxvalue", self._parse_bitwise()) 2903 2904 if self._match_text_seq("CYCLE"): 2905 this.set("cycle", True) 2906 elif self._match_text_seq("NO", "CYCLE"): 2907 this.set("cycle", False) 2908 2909 self._match_r_paren() 2910 2911 return this 2912 2913 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 2914 if self._match_text_seq("NULL"): 2915 return self.expression(exp.NotNullColumnConstraint) 2916 if self._match_text_seq("CASESPECIFIC"): 2917 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 2918 return None 2919 2920 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 2921 this = self._parse_references() 2922 if this: 2923 return this 2924 2925 if self._match(TokenType.CONSTRAINT): 2926 this = self._parse_id_var() 2927 2928 if self._match_texts(self.CONSTRAINT_PARSERS): 2929 return self.expression( 2930 exp.ColumnConstraint, 2931 this=this, 2932 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 2933 ) 2934 2935 return this 2936 2937 def _parse_constraint(self) -> t.Optional[exp.Expression]: 2938 if not self._match(TokenType.CONSTRAINT): 2939 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 2940 2941 this = self._parse_id_var() 2942 expressions = [] 2943 2944 while True: 2945 constraint = self._parse_unnamed_constraint() or self._parse_function() 2946 if not constraint: 2947 break 2948 expressions.append(constraint) 2949 2950 return self.expression(exp.Constraint, this=this, expressions=expressions) 2951 2952 def _parse_unnamed_constraint( 2953 self, constraints: t.Optional[t.Collection[str]] = None 2954 ) -> t.Optional[exp.Expression]: 2955 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 2956 return None 2957 2958 constraint = self._prev.text.upper() 2959 if constraint not in self.CONSTRAINT_PARSERS: 2960 self.raise_error(f"No parser found for schema constraint {constraint}.") 2961 2962 return self.CONSTRAINT_PARSERS[constraint](self) 2963 2964 def _parse_unique(self) -> exp.Expression: 2965 if not self._match(TokenType.L_PAREN, advance=False): 2966 return self.expression(exp.UniqueColumnConstraint) 2967 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 2968 2969 def _parse_key_constraint_options(self) -> t.List[str]: 2970 options = [] 2971 while True: 2972 if not self._curr: 2973 break 2974 2975 if self._match(TokenType.ON): 2976 action = None 2977 on = self._advance_any() and self._prev.text 2978 2979 if self._match(TokenType.NO_ACTION): 2980 action = "NO ACTION" 2981 elif self._match(TokenType.CASCADE): 2982 action = "CASCADE" 2983 elif self._match_pair(TokenType.SET, TokenType.NULL): 2984 action = "SET NULL" 2985 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 2986 action = "SET DEFAULT" 2987 else: 2988 self.raise_error("Invalid key constraint") 2989 2990 options.append(f"ON {on} {action}") 2991 elif self._match_text_seq("NOT", "ENFORCED"): 2992 options.append("NOT ENFORCED") 2993 elif self._match_text_seq("DEFERRABLE"): 2994 options.append("DEFERRABLE") 2995 elif self._match_text_seq("INITIALLY", "DEFERRED"): 2996 options.append("INITIALLY DEFERRED") 2997 elif self._match_text_seq("NORELY"): 2998 options.append("NORELY") 2999 elif self._match_text_seq("MATCH", "FULL"): 3000 options.append("MATCH FULL") 3001 else: 3002 break 3003 3004 return options 3005 3006 def _parse_references(self) -> t.Optional[exp.Expression]: 3007 if not self._match(TokenType.REFERENCES): 3008 return None 3009 3010 expressions = None 3011 this = self._parse_id_var() 3012 3013 if self._match(TokenType.L_PAREN, advance=False): 3014 expressions = self._parse_wrapped_id_vars() 3015 3016 options = self._parse_key_constraint_options() 3017 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3018 3019 def _parse_foreign_key(self) -> exp.Expression: 3020 expressions = self._parse_wrapped_id_vars() 3021 reference = self._parse_references() 3022 options = {} 3023 3024 while self._match(TokenType.ON): 3025 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3026 self.raise_error("Expected DELETE or UPDATE") 3027 3028 kind = self._prev.text.lower() 3029 3030 if self._match(TokenType.NO_ACTION): 3031 action = "NO ACTION" 3032 elif self._match(TokenType.SET): 3033 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3034 action = "SET " + self._prev.text.upper() 3035 else: 3036 self._advance() 3037 action = self._prev.text.upper() 3038 3039 options[kind] = action 3040 3041 return self.expression( 3042 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3043 ) 3044 3045 def _parse_primary_key(self) -> exp.Expression: 3046 desc = ( 3047 self._match_set((TokenType.ASC, TokenType.DESC)) 3048 and self._prev.token_type == TokenType.DESC 3049 ) 3050 3051 if not self._match(TokenType.L_PAREN, advance=False): 3052 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3053 3054 expressions = self._parse_wrapped_id_vars() 3055 options = self._parse_key_constraint_options() 3056 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3057 3058 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3059 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3060 return this 3061 3062 bracket_kind = self._prev.token_type 3063 expressions: t.List[t.Optional[exp.Expression]] 3064 3065 if self._match(TokenType.COLON): 3066 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3067 else: 3068 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3069 3070 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3071 if bracket_kind == TokenType.L_BRACE: 3072 this = self.expression(exp.Struct, expressions=expressions) 3073 elif not this or this.name.upper() == "ARRAY": 3074 this = self.expression(exp.Array, expressions=expressions) 3075 else: 3076 expressions = apply_index_offset(expressions, -self.index_offset) 3077 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3078 3079 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3080 self.raise_error("Expected ]") 3081 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3082 self.raise_error("Expected }") 3083 3084 this.comments = self._prev_comments 3085 return self._parse_bracket(this) 3086 3087 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3088 if self._match(TokenType.COLON): 3089 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3090 return this 3091 3092 def _parse_case(self) -> t.Optional[exp.Expression]: 3093 ifs = [] 3094 default = None 3095 3096 expression = self._parse_conjunction() 3097 3098 while self._match(TokenType.WHEN): 3099 this = self._parse_conjunction() 3100 self._match(TokenType.THEN) 3101 then = self._parse_conjunction() 3102 ifs.append(self.expression(exp.If, this=this, true=then)) 3103 3104 if self._match(TokenType.ELSE): 3105 default = self._parse_conjunction() 3106 3107 if not self._match(TokenType.END): 3108 self.raise_error("Expected END after CASE", self._prev) 3109 3110 return self._parse_window( 3111 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3112 ) 3113 3114 def _parse_if(self) -> t.Optional[exp.Expression]: 3115 if self._match(TokenType.L_PAREN): 3116 args = self._parse_csv(self._parse_conjunction) 3117 this = exp.If.from_arg_list(args) 3118 self.validate_expression(this, args) 3119 self._match_r_paren() 3120 else: 3121 condition = self._parse_conjunction() 3122 self._match(TokenType.THEN) 3123 true = self._parse_conjunction() 3124 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3125 self._match(TokenType.END) 3126 this = self.expression(exp.If, this=condition, true=true, false=false) 3127 3128 return self._parse_window(this) 3129 3130 def _parse_extract(self) -> exp.Expression: 3131 this = self._parse_function() or self._parse_var() or self._parse_type() 3132 3133 if self._match(TokenType.FROM): 3134 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3135 3136 if not self._match(TokenType.COMMA): 3137 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3138 3139 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3140 3141 def _parse_cast(self, strict: bool) -> exp.Expression: 3142 this = self._parse_conjunction() 3143 3144 if not self._match(TokenType.ALIAS): 3145 self.raise_error("Expected AS after CAST") 3146 3147 to = self._parse_types() 3148 3149 if not to: 3150 self.raise_error("Expected TYPE after CAST") 3151 elif to.this == exp.DataType.Type.CHAR: 3152 if self._match(TokenType.CHARACTER_SET): 3153 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3154 3155 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3156 3157 def _parse_string_agg(self) -> exp.Expression: 3158 expression: t.Optional[exp.Expression] 3159 3160 if self._match(TokenType.DISTINCT): 3161 args = self._parse_csv(self._parse_conjunction) 3162 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3163 else: 3164 args = self._parse_csv(self._parse_conjunction) 3165 expression = seq_get(args, 0) 3166 3167 index = self._index 3168 if not self._match(TokenType.R_PAREN): 3169 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3170 order = self._parse_order(this=expression) 3171 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3172 3173 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3174 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3175 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3176 if not self._match(TokenType.WITHIN_GROUP): 3177 self._retreat(index) 3178 this = exp.GroupConcat.from_arg_list(args) 3179 self.validate_expression(this, args) 3180 return this 3181 3182 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3183 order = self._parse_order(this=expression) 3184 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3185 3186 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3187 to: t.Optional[exp.Expression] 3188 this = self._parse_column() 3189 3190 if self._match(TokenType.USING): 3191 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3192 elif self._match(TokenType.COMMA): 3193 to = self._parse_types() 3194 else: 3195 to = None 3196 3197 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3198 3199 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3200 args = self._parse_csv(self._parse_bitwise) 3201 3202 if self._match(TokenType.IN): 3203 return self.expression( 3204 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3205 ) 3206 3207 if haystack_first: 3208 haystack = seq_get(args, 0) 3209 needle = seq_get(args, 1) 3210 else: 3211 needle = seq_get(args, 0) 3212 haystack = seq_get(args, 1) 3213 3214 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3215 3216 self.validate_expression(this, args) 3217 3218 return this 3219 3220 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3221 args = self._parse_csv(self._parse_table) 3222 return exp.JoinHint(this=func_name.upper(), expressions=args) 3223 3224 def _parse_substring(self) -> exp.Expression: 3225 # Postgres supports the form: substring(string [from int] [for int]) 3226 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3227 3228 args = self._parse_csv(self._parse_bitwise) 3229 3230 if self._match(TokenType.FROM): 3231 args.append(self._parse_bitwise()) 3232 if self._match(TokenType.FOR): 3233 args.append(self._parse_bitwise()) 3234 3235 this = exp.Substring.from_arg_list(args) 3236 self.validate_expression(this, args) 3237 3238 return this 3239 3240 def _parse_trim(self) -> exp.Expression: 3241 # https://www.w3resource.com/sql/character-functions/trim.php 3242 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3243 3244 position = None 3245 collation = None 3246 3247 if self._match_set(self.TRIM_TYPES): 3248 position = self._prev.text.upper() 3249 3250 expression = self._parse_term() 3251 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3252 this = self._parse_term() 3253 else: 3254 this = expression 3255 expression = None 3256 3257 if self._match(TokenType.COLLATE): 3258 collation = self._parse_term() 3259 3260 return self.expression( 3261 exp.Trim, 3262 this=this, 3263 position=position, 3264 expression=expression, 3265 collation=collation, 3266 ) 3267 3268 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3269 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3270 3271 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3272 return self._parse_window(self._parse_id_var(), alias=True) 3273 3274 def _parse_window( 3275 self, this: t.Optional[exp.Expression], alias: bool = False 3276 ) -> t.Optional[exp.Expression]: 3277 if self._match(TokenType.FILTER): 3278 where = self._parse_wrapped(self._parse_where) 3279 this = self.expression(exp.Filter, this=this, expression=where) 3280 3281 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3282 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3283 if self._match(TokenType.WITHIN_GROUP): 3284 order = self._parse_wrapped(self._parse_order) 3285 this = self.expression(exp.WithinGroup, this=this, expression=order) 3286 3287 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3288 # Some dialects choose to implement and some do not. 3289 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3290 3291 # There is some code above in _parse_lambda that handles 3292 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3293 3294 # The below changes handle 3295 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3296 3297 # Oracle allows both formats 3298 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3299 # and Snowflake chose to do the same for familiarity 3300 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3301 if self._match(TokenType.IGNORE_NULLS): 3302 this = self.expression(exp.IgnoreNulls, this=this) 3303 elif self._match(TokenType.RESPECT_NULLS): 3304 this = self.expression(exp.RespectNulls, this=this) 3305 3306 # bigquery select from window x AS (partition by ...) 3307 if alias: 3308 self._match(TokenType.ALIAS) 3309 elif not self._match(TokenType.OVER): 3310 return this 3311 3312 if not self._match(TokenType.L_PAREN): 3313 return self.expression(exp.Window, this=this, alias=self._parse_id_var(False)) 3314 3315 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3316 partition = self._parse_partition_by() 3317 order = self._parse_order() 3318 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3319 3320 if kind: 3321 self._match(TokenType.BETWEEN) 3322 start = self._parse_window_spec() 3323 self._match(TokenType.AND) 3324 end = self._parse_window_spec() 3325 3326 spec = self.expression( 3327 exp.WindowSpec, 3328 kind=kind, 3329 start=start["value"], 3330 start_side=start["side"], 3331 end=end["value"], 3332 end_side=end["side"], 3333 ) 3334 else: 3335 spec = None 3336 3337 self._match_r_paren() 3338 3339 return self.expression( 3340 exp.Window, 3341 this=this, 3342 partition_by=partition, 3343 order=order, 3344 spec=spec, 3345 alias=window_alias, 3346 ) 3347 3348 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3349 self._match(TokenType.BETWEEN) 3350 3351 return { 3352 "value": ( 3353 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text 3354 ) 3355 or self._parse_bitwise(), 3356 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, 3357 } 3358 3359 def _parse_alias( 3360 self, this: t.Optional[exp.Expression], explicit: bool = False 3361 ) -> t.Optional[exp.Expression]: 3362 any_token = self._match(TokenType.ALIAS) 3363 3364 if explicit and not any_token: 3365 return this 3366 3367 if self._match(TokenType.L_PAREN): 3368 aliases = self.expression( 3369 exp.Aliases, 3370 this=this, 3371 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3372 ) 3373 self._match_r_paren(aliases) 3374 return aliases 3375 3376 alias = self._parse_id_var(any_token) 3377 3378 if alias: 3379 return self.expression(exp.Alias, this=this, alias=alias) 3380 3381 return this 3382 3383 def _parse_id_var( 3384 self, 3385 any_token: bool = True, 3386 tokens: t.Optional[t.Collection[TokenType]] = None, 3387 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3388 ) -> t.Optional[exp.Expression]: 3389 identifier = self._parse_identifier() 3390 3391 if identifier: 3392 return identifier 3393 3394 prefix = "" 3395 3396 if prefix_tokens: 3397 while self._match_set(prefix_tokens): 3398 prefix += self._prev.text 3399 3400 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3401 quoted = self._prev.token_type == TokenType.STRING 3402 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3403 3404 return None 3405 3406 def _parse_string(self) -> t.Optional[exp.Expression]: 3407 if self._match(TokenType.STRING): 3408 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 3409 return self._parse_placeholder() 3410 3411 def _parse_number(self) -> t.Optional[exp.Expression]: 3412 if self._match(TokenType.NUMBER): 3413 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 3414 return self._parse_placeholder() 3415 3416 def _parse_identifier(self) -> t.Optional[exp.Expression]: 3417 if self._match(TokenType.IDENTIFIER): 3418 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 3419 return self._parse_placeholder() 3420 3421 def _parse_var(self, any_token: bool = False) -> t.Optional[exp.Expression]: 3422 if (any_token and self._advance_any()) or self._match(TokenType.VAR): 3423 return self.expression(exp.Var, this=self._prev.text) 3424 return self._parse_placeholder() 3425 3426 def _advance_any(self) -> t.Optional[Token]: 3427 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 3428 self._advance() 3429 return self._prev 3430 return None 3431 3432 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 3433 return self._parse_var() or self._parse_string() 3434 3435 def _parse_null(self) -> t.Optional[exp.Expression]: 3436 if self._match(TokenType.NULL): 3437 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 3438 return None 3439 3440 def _parse_boolean(self) -> t.Optional[exp.Expression]: 3441 if self._match(TokenType.TRUE): 3442 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 3443 if self._match(TokenType.FALSE): 3444 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 3445 return None 3446 3447 def _parse_star(self) -> t.Optional[exp.Expression]: 3448 if self._match(TokenType.STAR): 3449 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 3450 return None 3451 3452 def _parse_parameter(self) -> exp.Expression: 3453 wrapped = self._match(TokenType.L_BRACE) 3454 this = self._parse_var() or self._parse_primary() 3455 self._match(TokenType.R_BRACE) 3456 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 3457 3458 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 3459 if self._match_set(self.PLACEHOLDER_PARSERS): 3460 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 3461 if placeholder: 3462 return placeholder 3463 self._advance(-1) 3464 return None 3465 3466 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3467 if not self._match(TokenType.EXCEPT): 3468 return None 3469 if self._match(TokenType.L_PAREN, advance=False): 3470 return self._parse_wrapped_csv(self._parse_column) 3471 return self._parse_csv(self._parse_column) 3472 3473 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3474 if not self._match(TokenType.REPLACE): 3475 return None 3476 if self._match(TokenType.L_PAREN, advance=False): 3477 return self._parse_wrapped_csv(self._parse_expression) 3478 return self._parse_csv(self._parse_expression) 3479 3480 def _parse_csv( 3481 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3482 ) -> t.List[t.Optional[exp.Expression]]: 3483 parse_result = parse_method() 3484 items = [parse_result] if parse_result is not None else [] 3485 3486 while self._match(sep): 3487 if parse_result and self._prev_comments: 3488 parse_result.comments = self._prev_comments 3489 3490 parse_result = parse_method() 3491 if parse_result is not None: 3492 items.append(parse_result) 3493 3494 return items 3495 3496 def _parse_tokens( 3497 self, parse_method: t.Callable, expressions: t.Dict 3498 ) -> t.Optional[exp.Expression]: 3499 this = parse_method() 3500 3501 while self._match_set(expressions): 3502 this = self.expression( 3503 expressions[self._prev.token_type], 3504 this=this, 3505 comments=self._prev_comments, 3506 expression=parse_method(), 3507 ) 3508 3509 return this 3510 3511 def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]: 3512 return self._parse_wrapped_csv(self._parse_id_var) 3513 3514 def _parse_wrapped_csv( 3515 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3516 ) -> t.List[t.Optional[exp.Expression]]: 3517 return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep)) 3518 3519 def _parse_wrapped(self, parse_method: t.Callable) -> t.Any: 3520 self._match_l_paren() 3521 parse_result = parse_method() 3522 self._match_r_paren() 3523 return parse_result 3524 3525 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: 3526 return self._parse_select() or self._parse_expression() 3527 3528 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 3529 return self._parse_set_operations( 3530 self._parse_select(nested=True, parse_subquery_alias=False) 3531 ) 3532 3533 def _parse_transaction(self) -> exp.Expression: 3534 this = None 3535 if self._match_texts(self.TRANSACTION_KIND): 3536 this = self._prev.text 3537 3538 self._match_texts({"TRANSACTION", "WORK"}) 3539 3540 modes = [] 3541 while True: 3542 mode = [] 3543 while self._match(TokenType.VAR): 3544 mode.append(self._prev.text) 3545 3546 if mode: 3547 modes.append(" ".join(mode)) 3548 if not self._match(TokenType.COMMA): 3549 break 3550 3551 return self.expression(exp.Transaction, this=this, modes=modes) 3552 3553 def _parse_commit_or_rollback(self) -> exp.Expression: 3554 chain = None 3555 savepoint = None 3556 is_rollback = self._prev.token_type == TokenType.ROLLBACK 3557 3558 self._match_texts({"TRANSACTION", "WORK"}) 3559 3560 if self._match_text_seq("TO"): 3561 self._match_text_seq("SAVEPOINT") 3562 savepoint = self._parse_id_var() 3563 3564 if self._match(TokenType.AND): 3565 chain = not self._match_text_seq("NO") 3566 self._match_text_seq("CHAIN") 3567 3568 if is_rollback: 3569 return self.expression(exp.Rollback, savepoint=savepoint) 3570 return self.expression(exp.Commit, chain=chain) 3571 3572 def _parse_add_column(self) -> t.Optional[exp.Expression]: 3573 if not self._match_text_seq("ADD"): 3574 return None 3575 3576 self._match(TokenType.COLUMN) 3577 exists_column = self._parse_exists(not_=True) 3578 expression = self._parse_column_def(self._parse_field(any_token=True)) 3579 3580 if expression: 3581 expression.set("exists", exists_column) 3582 3583 return expression 3584 3585 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 3586 return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN") 3587 3588 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 3589 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 3590 return self.expression( 3591 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 3592 ) 3593 3594 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 3595 this = None 3596 kind = self._prev.token_type 3597 3598 if kind == TokenType.CONSTRAINT: 3599 this = self._parse_id_var() 3600 3601 if self._match_text_seq("CHECK"): 3602 expression = self._parse_wrapped(self._parse_conjunction) 3603 enforced = self._match_text_seq("ENFORCED") 3604 3605 return self.expression( 3606 exp.AddConstraint, this=this, expression=expression, enforced=enforced 3607 ) 3608 3609 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 3610 expression = self._parse_foreign_key() 3611 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 3612 expression = self._parse_primary_key() 3613 3614 return self.expression(exp.AddConstraint, this=this, expression=expression) 3615 3616 def _parse_alter(self) -> t.Optional[exp.Expression]: 3617 if not self._match(TokenType.TABLE): 3618 return self._parse_as_command(self._prev) 3619 3620 exists = self._parse_exists() 3621 this = self._parse_table(schema=True) 3622 3623 actions: t.Optional[exp.Expression | t.List[t.Optional[exp.Expression]]] = None 3624 3625 index = self._index 3626 if self._match(TokenType.DELETE): 3627 actions = [self.expression(exp.Delete, where=self._parse_where())] 3628 elif self._match_text_seq("ADD"): 3629 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 3630 actions = self._parse_csv(self._parse_add_constraint) 3631 else: 3632 self._retreat(index) 3633 actions = self._parse_csv(self._parse_add_column) 3634 elif self._match_text_seq("DROP"): 3635 partition_exists = self._parse_exists() 3636 3637 if self._match(TokenType.PARTITION, advance=False): 3638 actions = self._parse_csv( 3639 lambda: self._parse_drop_partition(exists=partition_exists) 3640 ) 3641 else: 3642 self._retreat(index) 3643 actions = self._parse_csv(self._parse_drop_column) 3644 elif self._match_text_seq("RENAME", "TO"): 3645 actions = self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 3646 elif self._match_text_seq("ALTER"): 3647 self._match(TokenType.COLUMN) 3648 column = self._parse_field(any_token=True) 3649 3650 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 3651 actions = self.expression(exp.AlterColumn, this=column, drop=True) 3652 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3653 actions = self.expression( 3654 exp.AlterColumn, this=column, default=self._parse_conjunction() 3655 ) 3656 else: 3657 self._match_text_seq("SET", "DATA") 3658 actions = self.expression( 3659 exp.AlterColumn, 3660 this=column, 3661 dtype=self._match_text_seq("TYPE") and self._parse_types(), 3662 collate=self._match(TokenType.COLLATE) and self._parse_term(), 3663 using=self._match(TokenType.USING) and self._parse_conjunction(), 3664 ) 3665 3666 actions = ensure_list(actions) 3667 return self.expression(exp.AlterTable, this=this, exists=exists, actions=actions) 3668 3669 def _parse_show(self) -> t.Optional[exp.Expression]: 3670 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 3671 if parser: 3672 return parser(self) 3673 self._advance() 3674 return self.expression(exp.Show, this=self._prev.text.upper()) 3675 3676 def _default_parse_set_item(self) -> exp.Expression: 3677 return self.expression( 3678 exp.SetItem, 3679 this=self._parse_statement(), 3680 ) 3681 3682 def _parse_set_item(self) -> t.Optional[exp.Expression]: 3683 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 3684 return parser(self) if parser else self._default_parse_set_item() 3685 3686 def _parse_merge(self) -> exp.Expression: 3687 self._match(TokenType.INTO) 3688 target = self._parse_table() 3689 3690 self._match(TokenType.USING) 3691 using = self._parse_table() 3692 3693 self._match(TokenType.ON) 3694 on = self._parse_conjunction() 3695 3696 whens = [] 3697 while self._match(TokenType.WHEN): 3698 this = self._parse_conjunction() 3699 self._match(TokenType.THEN) 3700 3701 if self._match(TokenType.INSERT): 3702 _this = self._parse_star() 3703 if _this: 3704 then = self.expression(exp.Insert, this=_this) 3705 else: 3706 then = self.expression( 3707 exp.Insert, 3708 this=self._parse_value(), 3709 expression=self._match(TokenType.VALUES) and self._parse_value(), 3710 ) 3711 elif self._match(TokenType.UPDATE): 3712 expressions = self._parse_star() 3713 if expressions: 3714 then = self.expression(exp.Update, expressions=expressions) 3715 else: 3716 then = self.expression( 3717 exp.Update, 3718 expressions=self._match(TokenType.SET) 3719 and self._parse_csv(self._parse_equality), 3720 ) 3721 elif self._match(TokenType.DELETE): 3722 then = self.expression(exp.Var, this=self._prev.text) 3723 3724 whens.append(self.expression(exp.When, this=this, then=then)) 3725 3726 return self.expression( 3727 exp.Merge, 3728 this=target, 3729 using=using, 3730 on=on, 3731 expressions=whens, 3732 ) 3733 3734 def _parse_set(self) -> exp.Expression: 3735 return self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 3736 3737 def _parse_as_command(self, start: Token) -> exp.Command: 3738 while self._curr: 3739 self._advance() 3740 return exp.Command(this=self._find_sql(start, self._prev)) 3741 3742 def _find_parser( 3743 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 3744 ) -> t.Optional[t.Callable]: 3745 index = self._index 3746 this = [] 3747 while True: 3748 # The current token might be multiple words 3749 curr = self._curr.text.upper() 3750 key = curr.split(" ") 3751 this.append(curr) 3752 self._advance() 3753 result, trie = in_trie(trie, key) 3754 if result == 0: 3755 break 3756 if result == 2: 3757 subparser = parsers[" ".join(this)] 3758 return subparser 3759 self._retreat(index) 3760 return None 3761 3762 def _match(self, token_type, advance=True): 3763 if not self._curr: 3764 return None 3765 3766 if self._curr.token_type == token_type: 3767 if advance: 3768 self._advance() 3769 return True 3770 3771 return None 3772 3773 def _match_set(self, types): 3774 if not self._curr: 3775 return None 3776 3777 if self._curr.token_type in types: 3778 self._advance() 3779 return True 3780 3781 return None 3782 3783 def _match_pair(self, token_type_a, token_type_b, advance=True): 3784 if not self._curr or not self._next: 3785 return None 3786 3787 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 3788 if advance: 3789 self._advance(2) 3790 return True 3791 3792 return None 3793 3794 def _match_l_paren(self, expression=None): 3795 if not self._match(TokenType.L_PAREN): 3796 self.raise_error("Expecting (") 3797 if expression and self._prev_comments: 3798 expression.comments = self._prev_comments 3799 3800 def _match_r_paren(self, expression=None): 3801 if not self._match(TokenType.R_PAREN): 3802 self.raise_error("Expecting )") 3803 if expression and self._prev_comments: 3804 expression.comments = self._prev_comments 3805 3806 def _match_texts(self, texts): 3807 if self._curr and self._curr.text.upper() in texts: 3808 self._advance() 3809 return True 3810 return False 3811 3812 def _match_text_seq(self, *texts, advance=True): 3813 index = self._index 3814 for text in texts: 3815 if self._curr and self._curr.text.upper() == text: 3816 self._advance() 3817 else: 3818 self._retreat(index) 3819 return False 3820 3821 if not advance: 3822 self._retreat(index) 3823 3824 return True 3825 3826 def _replace_columns_with_dots(self, this): 3827 if isinstance(this, exp.Dot): 3828 exp.replace_children(this, self._replace_columns_with_dots) 3829 elif isinstance(this, exp.Column): 3830 exp.replace_children(this, self._replace_columns_with_dots) 3831 table = this.args.get("table") 3832 this = ( 3833 self.expression(exp.Dot, this=table, expression=this.this) 3834 if table 3835 else self.expression(exp.Var, this=this.name) 3836 ) 3837 elif isinstance(this, exp.Identifier): 3838 this = self.expression(exp.Var, this=this.name) 3839 return this 3840 3841 def _replace_lambda(self, node, lambda_variables): 3842 if isinstance(node, exp.Column): 3843 if node.name in lambda_variables: 3844 return node.this 3845 return node
Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer
and produces
a parsed syntax tree.
Arguments:
- error_level: the desired error level. Default: ErrorLevel.RAISE
- error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
- index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
- alias_post_tablesample: If the table alias comes after tablesample. Default: False
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
701 def __init__( 702 self, 703 error_level: t.Optional[ErrorLevel] = None, 704 error_message_context: int = 100, 705 index_offset: int = 0, 706 unnest_column_only: bool = False, 707 alias_post_tablesample: bool = False, 708 max_errors: int = 3, 709 null_ordering: t.Optional[str] = None, 710 ): 711 self.error_level = error_level or ErrorLevel.IMMEDIATE 712 self.error_message_context = error_message_context 713 self.index_offset = index_offset 714 self.unnest_column_only = unnest_column_only 715 self.alias_post_tablesample = alias_post_tablesample 716 self.max_errors = max_errors 717 self.null_ordering = null_ordering 718 self.reset()
730 def parse( 731 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 732 ) -> t.List[t.Optional[exp.Expression]]: 733 """ 734 Parses a list of tokens and returns a list of syntax trees, one tree 735 per parsed SQL statement. 736 737 Args: 738 raw_tokens: the list of tokens. 739 sql: the original SQL string, used to produce helpful debug messages. 740 741 Returns: 742 The list of syntax trees. 743 """ 744 return self._parse( 745 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 746 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The list of syntax trees.
748 def parse_into( 749 self, 750 expression_types: exp.IntoType, 751 raw_tokens: t.List[Token], 752 sql: t.Optional[str] = None, 753 ) -> t.List[t.Optional[exp.Expression]]: 754 """ 755 Parses a list of tokens into a given Expression type. If a collection of Expression 756 types is given instead, this method will try to parse the token list into each one 757 of them, stopping at the first for which the parsing succeeds. 758 759 Args: 760 expression_types: the expression type(s) to try and parse the token list into. 761 raw_tokens: the list of tokens. 762 sql: the original SQL string, used to produce helpful debug messages. 763 764 Returns: 765 The target Expression. 766 """ 767 errors = [] 768 for expression_type in ensure_collection(expression_types): 769 parser = self.EXPRESSION_PARSERS.get(expression_type) 770 if not parser: 771 raise TypeError(f"No parser registered for {expression_type}") 772 try: 773 return self._parse(parser, raw_tokens, sql) 774 except ParseError as e: 775 e.errors[0]["into_expression"] = expression_type 776 errors.append(e) 777 raise ParseError( 778 f"Failed to parse into {expression_types}", 779 errors=merge_errors(errors), 780 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: the expression type(s) to try and parse the token list into.
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
816 def check_errors(self) -> None: 817 """ 818 Logs or raises any found errors, depending on the chosen error level setting. 819 """ 820 if self.error_level == ErrorLevel.WARN: 821 for error in self.errors: 822 logger.error(str(error)) 823 elif self.error_level == ErrorLevel.RAISE and self.errors: 824 raise ParseError( 825 concat_messages(self.errors, self.max_errors), 826 errors=merge_errors(self.errors), 827 )
Logs or raises any found errors, depending on the chosen error level setting.
829 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 830 """ 831 Appends an error in the list of recorded errors or raises it, depending on the chosen 832 error level setting. 833 """ 834 token = token or self._curr or self._prev or Token.string("") 835 start = self._find_token(token) 836 end = start + len(token.text) 837 start_context = self.sql[max(start - self.error_message_context, 0) : start] 838 highlight = self.sql[start:end] 839 end_context = self.sql[end : end + self.error_message_context] 840 841 error = ParseError.new( 842 f"{message}. Line {token.line}, Col: {token.col}.\n" 843 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 844 description=message, 845 line=token.line, 846 col=token.col, 847 start_context=start_context, 848 highlight=highlight, 849 end_context=end_context, 850 ) 851 852 if self.error_level == ErrorLevel.IMMEDIATE: 853 raise error 854 855 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
857 def expression( 858 self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs 859 ) -> exp.Expression: 860 """ 861 Creates a new, validated Expression. 862 863 Args: 864 exp_class: the expression class to instantiate. 865 comments: an optional list of comments to attach to the expression. 866 kwargs: the arguments to set for the expression along with their respective values. 867 868 Returns: 869 The target expression. 870 """ 871 instance = exp_class(**kwargs) 872 if self._prev_comments: 873 instance.comments = self._prev_comments 874 self._prev_comments = None 875 if comments: 876 instance.comments = comments 877 self.validate_expression(instance) 878 return instance
Creates a new, validated Expression.
Arguments:
- exp_class: the expression class to instantiate.
- comments: an optional list of comments to attach to the expression.
- kwargs: the arguments to set for the expression along with their respective values.
Returns:
The target expression.
880 def validate_expression( 881 self, expression: exp.Expression, args: t.Optional[t.List] = None 882 ) -> None: 883 """ 884 Validates an already instantiated expression, making sure that all its mandatory arguments 885 are set. 886 887 Args: 888 expression: the expression to validate. 889 args: an optional list of items that was used to instantiate the expression, if it's a Func. 890 """ 891 if self.error_level == ErrorLevel.IGNORE: 892 return 893 894 for error_message in expression.error_messages(args): 895 self.raise_error(error_message)
Validates an already instantiated expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: the expression to validate.
- args: an optional list of items that was used to instantiate the expression, if it's a Func.