Edit on GitHub

sqlglot.parser

   1from __future__ import annotations
   2
   3import logging
   4import typing as t
   5
   6from sqlglot import exp
   7from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors
   8from sqlglot.helper import (
   9    apply_index_offset,
  10    count_params,
  11    ensure_collection,
  12    ensure_list,
  13    seq_get,
  14)
  15from sqlglot.tokens import Token, Tokenizer, TokenType
  16from sqlglot.trie import in_trie, new_trie
  17
  18logger = logging.getLogger("sqlglot")
  19
  20
  21def parse_var_map(args):
  22    keys = []
  23    values = []
  24    for i in range(0, len(args), 2):
  25        keys.append(args[i])
  26        values.append(args[i + 1])
  27    return exp.VarMap(
  28        keys=exp.Array(expressions=keys),
  29        values=exp.Array(expressions=values),
  30    )
  31
  32
  33class _Parser(type):
  34    def __new__(cls, clsname, bases, attrs):
  35        klass = super().__new__(cls, clsname, bases, attrs)
  36        klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS)
  37        klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS)
  38        return klass
  39
  40
  41class Parser(metaclass=_Parser):
  42    """
  43    Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces
  44    a parsed syntax tree.
  45
  46    Args:
  47        error_level: the desired error level.
  48            Default: ErrorLevel.RAISE
  49        error_message_context: determines the amount of context to capture from a
  50            query string when displaying the error message (in number of characters).
  51            Default: 50.
  52        index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list.
  53            Default: 0
  54        alias_post_tablesample: If the table alias comes after tablesample.
  55            Default: False
  56        max_errors: Maximum number of error messages to include in a raised ParseError.
  57            This is only relevant if error_level is ErrorLevel.RAISE.
  58            Default: 3
  59        null_ordering: Indicates the default null ordering method to use if not explicitly set.
  60            Options are "nulls_are_small", "nulls_are_large", "nulls_are_last".
  61            Default: "nulls_are_small"
  62    """
  63
  64    FUNCTIONS: t.Dict[str, t.Callable] = {
  65        **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()},
  66        "DATE_TO_DATE_STR": lambda args: exp.Cast(
  67            this=seq_get(args, 0),
  68            to=exp.DataType(this=exp.DataType.Type.TEXT),
  69        ),
  70        "TIME_TO_TIME_STR": lambda args: exp.Cast(
  71            this=seq_get(args, 0),
  72            to=exp.DataType(this=exp.DataType.Type.TEXT),
  73        ),
  74        "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
  75            this=exp.Cast(
  76                this=seq_get(args, 0),
  77                to=exp.DataType(this=exp.DataType.Type.TEXT),
  78            ),
  79            start=exp.Literal.number(1),
  80            length=exp.Literal.number(10),
  81        ),
  82        "VAR_MAP": parse_var_map,
  83        "IFNULL": exp.Coalesce.from_arg_list,
  84    }
  85
  86    NO_PAREN_FUNCTIONS = {
  87        TokenType.CURRENT_DATE: exp.CurrentDate,
  88        TokenType.CURRENT_DATETIME: exp.CurrentDate,
  89        TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
  90    }
  91
  92    NESTED_TYPE_TOKENS = {
  93        TokenType.ARRAY,
  94        TokenType.MAP,
  95        TokenType.STRUCT,
  96        TokenType.NULLABLE,
  97    }
  98
  99    TYPE_TOKENS = {
 100        TokenType.BOOLEAN,
 101        TokenType.TINYINT,
 102        TokenType.SMALLINT,
 103        TokenType.INT,
 104        TokenType.BIGINT,
 105        TokenType.FLOAT,
 106        TokenType.DOUBLE,
 107        TokenType.CHAR,
 108        TokenType.NCHAR,
 109        TokenType.VARCHAR,
 110        TokenType.NVARCHAR,
 111        TokenType.TEXT,
 112        TokenType.MEDIUMTEXT,
 113        TokenType.LONGTEXT,
 114        TokenType.MEDIUMBLOB,
 115        TokenType.LONGBLOB,
 116        TokenType.BINARY,
 117        TokenType.VARBINARY,
 118        TokenType.JSON,
 119        TokenType.JSONB,
 120        TokenType.INTERVAL,
 121        TokenType.TIME,
 122        TokenType.TIMESTAMP,
 123        TokenType.TIMESTAMPTZ,
 124        TokenType.TIMESTAMPLTZ,
 125        TokenType.DATETIME,
 126        TokenType.DATE,
 127        TokenType.DECIMAL,
 128        TokenType.UUID,
 129        TokenType.GEOGRAPHY,
 130        TokenType.GEOMETRY,
 131        TokenType.HLLSKETCH,
 132        TokenType.HSTORE,
 133        TokenType.PSEUDO_TYPE,
 134        TokenType.SUPER,
 135        TokenType.SERIAL,
 136        TokenType.SMALLSERIAL,
 137        TokenType.BIGSERIAL,
 138        TokenType.XML,
 139        TokenType.UNIQUEIDENTIFIER,
 140        TokenType.MONEY,
 141        TokenType.SMALLMONEY,
 142        TokenType.ROWVERSION,
 143        TokenType.IMAGE,
 144        TokenType.VARIANT,
 145        TokenType.OBJECT,
 146        *NESTED_TYPE_TOKENS,
 147    }
 148
 149    SUBQUERY_PREDICATES = {
 150        TokenType.ANY: exp.Any,
 151        TokenType.ALL: exp.All,
 152        TokenType.EXISTS: exp.Exists,
 153        TokenType.SOME: exp.Any,
 154    }
 155
 156    RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT}
 157
 158    ID_VAR_TOKENS = {
 159        TokenType.VAR,
 160        TokenType.ALWAYS,
 161        TokenType.ANTI,
 162        TokenType.APPLY,
 163        TokenType.AUTO_INCREMENT,
 164        TokenType.BEGIN,
 165        TokenType.BOTH,
 166        TokenType.BUCKET,
 167        TokenType.CACHE,
 168        TokenType.CASCADE,
 169        TokenType.COLLATE,
 170        TokenType.COLUMN,
 171        TokenType.COMMAND,
 172        TokenType.COMMIT,
 173        TokenType.COMPOUND,
 174        TokenType.CONSTRAINT,
 175        TokenType.CURRENT_TIME,
 176        TokenType.DEFAULT,
 177        TokenType.DELETE,
 178        TokenType.DESCRIBE,
 179        TokenType.DIV,
 180        TokenType.END,
 181        TokenType.EXECUTE,
 182        TokenType.ESCAPE,
 183        TokenType.FALSE,
 184        TokenType.FIRST,
 185        TokenType.FILTER,
 186        TokenType.FOLLOWING,
 187        TokenType.FORMAT,
 188        TokenType.FUNCTION,
 189        TokenType.GENERATED,
 190        TokenType.IDENTITY,
 191        TokenType.IF,
 192        TokenType.INDEX,
 193        TokenType.ISNULL,
 194        TokenType.INTERVAL,
 195        TokenType.LAZY,
 196        TokenType.LEADING,
 197        TokenType.LEFT,
 198        TokenType.LOCAL,
 199        TokenType.MATERIALIZED,
 200        TokenType.MERGE,
 201        TokenType.NATURAL,
 202        TokenType.NEXT,
 203        TokenType.OFFSET,
 204        TokenType.ONLY,
 205        TokenType.OPTIONS,
 206        TokenType.ORDINALITY,
 207        TokenType.PERCENT,
 208        TokenType.PIVOT,
 209        TokenType.PRECEDING,
 210        TokenType.RANGE,
 211        TokenType.REFERENCES,
 212        TokenType.RIGHT,
 213        TokenType.ROW,
 214        TokenType.ROWS,
 215        TokenType.SCHEMA,
 216        TokenType.SCHEMA_COMMENT,
 217        TokenType.SEED,
 218        TokenType.SEMI,
 219        TokenType.SET,
 220        TokenType.SHOW,
 221        TokenType.SORTKEY,
 222        TokenType.TABLE,
 223        TokenType.TEMPORARY,
 224        TokenType.TOP,
 225        TokenType.TRAILING,
 226        TokenType.TRUE,
 227        TokenType.UNBOUNDED,
 228        TokenType.UNIQUE,
 229        TokenType.UNLOGGED,
 230        TokenType.UNPIVOT,
 231        TokenType.PROCEDURE,
 232        TokenType.VIEW,
 233        TokenType.VOLATILE,
 234        TokenType.WINDOW,
 235        *SUBQUERY_PREDICATES,
 236        *TYPE_TOKENS,
 237        *NO_PAREN_FUNCTIONS,
 238    }
 239
 240    TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {
 241        TokenType.APPLY,
 242        TokenType.LEFT,
 243        TokenType.NATURAL,
 244        TokenType.OFFSET,
 245        TokenType.RIGHT,
 246        TokenType.WINDOW,
 247    }
 248
 249    UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
 250
 251    TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH}
 252
 253    FUNC_TOKENS = {
 254        TokenType.COMMAND,
 255        TokenType.CURRENT_DATE,
 256        TokenType.CURRENT_DATETIME,
 257        TokenType.CURRENT_TIMESTAMP,
 258        TokenType.CURRENT_TIME,
 259        TokenType.FILTER,
 260        TokenType.FIRST,
 261        TokenType.FORMAT,
 262        TokenType.IDENTIFIER,
 263        TokenType.INDEX,
 264        TokenType.ISNULL,
 265        TokenType.ILIKE,
 266        TokenType.LIKE,
 267        TokenType.MERGE,
 268        TokenType.OFFSET,
 269        TokenType.PRIMARY_KEY,
 270        TokenType.REPLACE,
 271        TokenType.ROW,
 272        TokenType.UNNEST,
 273        TokenType.VAR,
 274        TokenType.LEFT,
 275        TokenType.RIGHT,
 276        TokenType.DATE,
 277        TokenType.DATETIME,
 278        TokenType.TABLE,
 279        TokenType.TIMESTAMP,
 280        TokenType.TIMESTAMPTZ,
 281        TokenType.WINDOW,
 282        *TYPE_TOKENS,
 283        *SUBQUERY_PREDICATES,
 284    }
 285
 286    CONJUNCTION = {
 287        TokenType.AND: exp.And,
 288        TokenType.OR: exp.Or,
 289    }
 290
 291    EQUALITY = {
 292        TokenType.EQ: exp.EQ,
 293        TokenType.NEQ: exp.NEQ,
 294        TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
 295    }
 296
 297    COMPARISON = {
 298        TokenType.GT: exp.GT,
 299        TokenType.GTE: exp.GTE,
 300        TokenType.LT: exp.LT,
 301        TokenType.LTE: exp.LTE,
 302    }
 303
 304    BITWISE = {
 305        TokenType.AMP: exp.BitwiseAnd,
 306        TokenType.CARET: exp.BitwiseXor,
 307        TokenType.PIPE: exp.BitwiseOr,
 308        TokenType.DPIPE: exp.DPipe,
 309    }
 310
 311    TERM = {
 312        TokenType.DASH: exp.Sub,
 313        TokenType.PLUS: exp.Add,
 314        TokenType.MOD: exp.Mod,
 315        TokenType.COLLATE: exp.Collate,
 316    }
 317
 318    FACTOR = {
 319        TokenType.DIV: exp.IntDiv,
 320        TokenType.LR_ARROW: exp.Distance,
 321        TokenType.SLASH: exp.Div,
 322        TokenType.STAR: exp.Mul,
 323    }
 324
 325    TIMESTAMPS = {
 326        TokenType.TIME,
 327        TokenType.TIMESTAMP,
 328        TokenType.TIMESTAMPTZ,
 329        TokenType.TIMESTAMPLTZ,
 330    }
 331
 332    SET_OPERATIONS = {
 333        TokenType.UNION,
 334        TokenType.INTERSECT,
 335        TokenType.EXCEPT,
 336    }
 337
 338    JOIN_SIDES = {
 339        TokenType.LEFT,
 340        TokenType.RIGHT,
 341        TokenType.FULL,
 342    }
 343
 344    JOIN_KINDS = {
 345        TokenType.INNER,
 346        TokenType.OUTER,
 347        TokenType.CROSS,
 348        TokenType.SEMI,
 349        TokenType.ANTI,
 350    }
 351
 352    LAMBDAS = {
 353        TokenType.ARROW: lambda self, expressions: self.expression(
 354            exp.Lambda,
 355            this=self._parse_conjunction().transform(
 356                self._replace_lambda, {node.name for node in expressions}
 357            ),
 358            expressions=expressions,
 359        ),
 360        TokenType.FARROW: lambda self, expressions: self.expression(
 361            exp.Kwarg,
 362            this=exp.Var(this=expressions[0].name),
 363            expression=self._parse_conjunction(),
 364        ),
 365    }
 366
 367    COLUMN_OPERATORS = {
 368        TokenType.DOT: None,
 369        TokenType.DCOLON: lambda self, this, to: self.expression(
 370            exp.Cast,
 371            this=this,
 372            to=to,
 373        ),
 374        TokenType.ARROW: lambda self, this, path: self.expression(
 375            exp.JSONExtract,
 376            this=this,
 377            expression=path,
 378        ),
 379        TokenType.DARROW: lambda self, this, path: self.expression(
 380            exp.JSONExtractScalar,
 381            this=this,
 382            expression=path,
 383        ),
 384        TokenType.HASH_ARROW: lambda self, this, path: self.expression(
 385            exp.JSONBExtract,
 386            this=this,
 387            expression=path,
 388        ),
 389        TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
 390            exp.JSONBExtractScalar,
 391            this=this,
 392            expression=path,
 393        ),
 394        TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
 395            exp.JSONBContains,
 396            this=this,
 397            expression=key,
 398        ),
 399    }
 400
 401    EXPRESSION_PARSERS = {
 402        exp.Column: lambda self: self._parse_column(),
 403        exp.DataType: lambda self: self._parse_types(),
 404        exp.From: lambda self: self._parse_from(),
 405        exp.Group: lambda self: self._parse_group(),
 406        exp.Identifier: lambda self: self._parse_id_var(),
 407        exp.Lateral: lambda self: self._parse_lateral(),
 408        exp.Join: lambda self: self._parse_join(),
 409        exp.Order: lambda self: self._parse_order(),
 410        exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
 411        exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
 412        exp.Lambda: lambda self: self._parse_lambda(),
 413        exp.Limit: lambda self: self._parse_limit(),
 414        exp.Offset: lambda self: self._parse_offset(),
 415        exp.TableAlias: lambda self: self._parse_table_alias(),
 416        exp.Table: lambda self: self._parse_table(),
 417        exp.Condition: lambda self: self._parse_conjunction(),
 418        exp.Expression: lambda self: self._parse_statement(),
 419        exp.Properties: lambda self: self._parse_properties(),
 420        exp.Where: lambda self: self._parse_where(),
 421        exp.Ordered: lambda self: self._parse_ordered(),
 422        exp.Having: lambda self: self._parse_having(),
 423        exp.With: lambda self: self._parse_with(),
 424        exp.Window: lambda self: self._parse_named_window(),
 425        "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(),
 426    }
 427
 428    STATEMENT_PARSERS = {
 429        TokenType.ALTER: lambda self: self._parse_alter(),
 430        TokenType.BEGIN: lambda self: self._parse_transaction(),
 431        TokenType.CACHE: lambda self: self._parse_cache(),
 432        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
 433        TokenType.CREATE: lambda self: self._parse_create(),
 434        TokenType.DELETE: lambda self: self._parse_delete(),
 435        TokenType.DESC: lambda self: self._parse_describe(),
 436        TokenType.DESCRIBE: lambda self: self._parse_describe(),
 437        TokenType.DROP: lambda self: self._parse_drop(),
 438        TokenType.END: lambda self: self._parse_commit_or_rollback(),
 439        TokenType.INSERT: lambda self: self._parse_insert(),
 440        TokenType.LOAD_DATA: lambda self: self._parse_load_data(),
 441        TokenType.MERGE: lambda self: self._parse_merge(),
 442        TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
 443        TokenType.UNCACHE: lambda self: self._parse_uncache(),
 444        TokenType.UPDATE: lambda self: self._parse_update(),
 445        TokenType.USE: lambda self: self.expression(
 446            exp.Use,
 447            kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"))
 448            and exp.Var(this=self._prev.text),
 449            this=self._parse_table(schema=False),
 450        ),
 451    }
 452
 453    UNARY_PARSERS = {
 454        TokenType.PLUS: lambda self: self._parse_unary(),  # Unary + is handled as a no-op
 455        TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()),
 456        TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()),
 457        TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()),
 458    }
 459
 460    PRIMARY_PARSERS = {
 461        TokenType.STRING: lambda self, token: self.expression(
 462            exp.Literal, this=token.text, is_string=True
 463        ),
 464        TokenType.NUMBER: lambda self, token: self.expression(
 465            exp.Literal, this=token.text, is_string=False
 466        ),
 467        TokenType.STAR: lambda self, _: self.expression(
 468            exp.Star,
 469            **{"except": self._parse_except(), "replace": self._parse_replace()},
 470        ),
 471        TokenType.NULL: lambda self, _: self.expression(exp.Null),
 472        TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True),
 473        TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False),
 474        TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text),
 475        TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text),
 476        TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text),
 477        TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
 478        TokenType.NATIONAL: lambda self, token: self._parse_national(token),
 479        TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
 480    }
 481
 482    PLACEHOLDER_PARSERS = {
 483        TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
 484        TokenType.PARAMETER: lambda self: self.expression(
 485            exp.Parameter, this=self._parse_var() or self._parse_primary()
 486        ),
 487        TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text)
 488        if self._match_set((TokenType.NUMBER, TokenType.VAR))
 489        else None,
 490    }
 491
 492    RANGE_PARSERS = {
 493        TokenType.BETWEEN: lambda self, this: self._parse_between(this),
 494        TokenType.GLOB: lambda self, this: self._parse_escape(
 495            self.expression(exp.Glob, this=this, expression=self._parse_bitwise())
 496        ),
 497        TokenType.IN: lambda self, this: self._parse_in(this),
 498        TokenType.IS: lambda self, this: self._parse_is(this),
 499        TokenType.LIKE: lambda self, this: self._parse_escape(
 500            self.expression(exp.Like, this=this, expression=self._parse_bitwise())
 501        ),
 502        TokenType.ILIKE: lambda self, this: self._parse_escape(
 503            self.expression(exp.ILike, this=this, expression=self._parse_bitwise())
 504        ),
 505        TokenType.IRLIKE: lambda self, this: self.expression(
 506            exp.RegexpILike, this=this, expression=self._parse_bitwise()
 507        ),
 508        TokenType.RLIKE: lambda self, this: self.expression(
 509            exp.RegexpLike, this=this, expression=self._parse_bitwise()
 510        ),
 511        TokenType.SIMILAR_TO: lambda self, this: self.expression(
 512            exp.SimilarTo, this=this, expression=self._parse_bitwise()
 513        ),
 514    }
 515
 516    PROPERTY_PARSERS = {
 517        "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
 518        "CHARACTER SET": lambda self: self._parse_character_set(),
 519        "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
 520        "PARTITION BY": lambda self: self._parse_partitioned_by(),
 521        "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
 522        "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
 523        "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
 524        "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 525        "DISTKEY": lambda self: self._parse_distkey(),
 526        "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
 527        "SORTKEY": lambda self: self._parse_sortkey(),
 528        "LIKE": lambda self: self._parse_create_like(),
 529        "RETURNS": lambda self: self._parse_returns(),
 530        "ROW": lambda self: self._parse_row(),
 531        "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty),
 532        "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 533        "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty),
 534        "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty),
 535        "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
 536        "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
 537        "DETERMINISTIC": lambda self: self.expression(
 538            exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE")
 539        ),
 540        "IMMUTABLE": lambda self: self.expression(
 541            exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE")
 542        ),
 543        "STABLE": lambda self: self.expression(
 544            exp.VolatilityProperty, this=exp.Literal.string("STABLE")
 545        ),
 546        "VOLATILE": lambda self: self.expression(
 547            exp.VolatilityProperty, this=exp.Literal.string("VOLATILE")
 548        ),
 549        "WITH": lambda self: self._parse_with_property(),
 550        "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property),
 551        "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"),
 552        "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"),
 553        "BEFORE": lambda self: self._parse_journal(
 554            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 555        ),
 556        "JOURNAL": lambda self: self._parse_journal(
 557            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 558        ),
 559        "AFTER": lambda self: self._parse_afterjournal(
 560            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 561        ),
 562        "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True),
 563        "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False),
 564        "CHECKSUM": lambda self: self._parse_checksum(),
 565        "FREESPACE": lambda self: self._parse_freespace(),
 566        "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio(
 567            no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT"
 568        ),
 569        "MIN": lambda self: self._parse_datablocksize(),
 570        "MINIMUM": lambda self: self._parse_datablocksize(),
 571        "MAX": lambda self: self._parse_datablocksize(),
 572        "MAXIMUM": lambda self: self._parse_datablocksize(),
 573        "DATABLOCKSIZE": lambda self: self._parse_datablocksize(
 574            default=self._prev.text.upper() == "DEFAULT"
 575        ),
 576        "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
 577        "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
 578        "DEFINER": lambda self: self._parse_definer(),
 579    }
 580
 581    CONSTRAINT_PARSERS = {
 582        TokenType.CHECK: lambda self: self.expression(
 583            exp.Check, this=self._parse_wrapped(self._parse_conjunction)
 584        ),
 585        TokenType.FOREIGN_KEY: lambda self: self._parse_foreign_key(),
 586        TokenType.UNIQUE: lambda self: self._parse_unique(),
 587        TokenType.LIKE: lambda self: self._parse_create_like(),
 588    }
 589
 590    NO_PAREN_FUNCTION_PARSERS = {
 591        TokenType.CASE: lambda self: self._parse_case(),
 592        TokenType.IF: lambda self: self._parse_if(),
 593    }
 594
 595    FUNCTION_PARSERS: t.Dict[str, t.Callable] = {
 596        "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
 597        "TRY_CONVERT": lambda self: self._parse_convert(False),
 598        "EXTRACT": lambda self: self._parse_extract(),
 599        "POSITION": lambda self: self._parse_position(),
 600        "SUBSTRING": lambda self: self._parse_substring(),
 601        "TRIM": lambda self: self._parse_trim(),
 602        "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
 603        "TRY_CAST": lambda self: self._parse_cast(False),
 604        "STRING_AGG": lambda self: self._parse_string_agg(),
 605    }
 606
 607    QUERY_MODIFIER_PARSERS = {
 608        "match": lambda self: self._parse_match_recognize(),
 609        "where": lambda self: self._parse_where(),
 610        "group": lambda self: self._parse_group(),
 611        "having": lambda self: self._parse_having(),
 612        "qualify": lambda self: self._parse_qualify(),
 613        "windows": lambda self: self._parse_window_clause(),
 614        "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute),
 615        "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
 616        "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
 617        "order": lambda self: self._parse_order(),
 618        "limit": lambda self: self._parse_limit(),
 619        "offset": lambda self: self._parse_offset(),
 620        "lock": lambda self: self._parse_lock(),
 621    }
 622
 623    SHOW_PARSERS: t.Dict[str, t.Callable] = {}
 624    SET_PARSERS: t.Dict[str, t.Callable] = {}
 625
 626    MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
 627
 628    CREATABLES = {
 629        TokenType.COLUMN,
 630        TokenType.FUNCTION,
 631        TokenType.INDEX,
 632        TokenType.PROCEDURE,
 633        TokenType.SCHEMA,
 634        TokenType.TABLE,
 635        TokenType.VIEW,
 636    }
 637
 638    TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
 639
 640    WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
 641
 642    ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY}
 643
 644    STRICT_CAST = True
 645
 646    __slots__ = (
 647        "error_level",
 648        "error_message_context",
 649        "sql",
 650        "errors",
 651        "index_offset",
 652        "unnest_column_only",
 653        "alias_post_tablesample",
 654        "max_errors",
 655        "null_ordering",
 656        "_tokens",
 657        "_index",
 658        "_curr",
 659        "_next",
 660        "_prev",
 661        "_prev_comments",
 662        "_show_trie",
 663        "_set_trie",
 664    )
 665
 666    def __init__(
 667        self,
 668        error_level: t.Optional[ErrorLevel] = None,
 669        error_message_context: int = 100,
 670        index_offset: int = 0,
 671        unnest_column_only: bool = False,
 672        alias_post_tablesample: bool = False,
 673        max_errors: int = 3,
 674        null_ordering: t.Optional[str] = None,
 675    ):
 676        self.error_level = error_level or ErrorLevel.IMMEDIATE
 677        self.error_message_context = error_message_context
 678        self.index_offset = index_offset
 679        self.unnest_column_only = unnest_column_only
 680        self.alias_post_tablesample = alias_post_tablesample
 681        self.max_errors = max_errors
 682        self.null_ordering = null_ordering
 683        self.reset()
 684
 685    def reset(self):
 686        self.sql = ""
 687        self.errors = []
 688        self._tokens = []
 689        self._index = 0
 690        self._curr = None
 691        self._next = None
 692        self._prev = None
 693        self._prev_comments = None
 694
 695    def parse(
 696        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
 697    ) -> t.List[t.Optional[exp.Expression]]:
 698        """
 699        Parses a list of tokens and returns a list of syntax trees, one tree
 700        per parsed SQL statement.
 701
 702        Args:
 703            raw_tokens: the list of tokens.
 704            sql: the original SQL string, used to produce helpful debug messages.
 705
 706        Returns:
 707            The list of syntax trees.
 708        """
 709        return self._parse(
 710            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
 711        )
 712
 713    def parse_into(
 714        self,
 715        expression_types: exp.IntoType,
 716        raw_tokens: t.List[Token],
 717        sql: t.Optional[str] = None,
 718    ) -> t.List[t.Optional[exp.Expression]]:
 719        """
 720        Parses a list of tokens into a given Expression type. If a collection of Expression
 721        types is given instead, this method will try to parse the token list into each one
 722        of them, stopping at the first for which the parsing succeeds.
 723
 724        Args:
 725            expression_types: the expression type(s) to try and parse the token list into.
 726            raw_tokens: the list of tokens.
 727            sql: the original SQL string, used to produce helpful debug messages.
 728
 729        Returns:
 730            The target Expression.
 731        """
 732        errors = []
 733        for expression_type in ensure_collection(expression_types):
 734            parser = self.EXPRESSION_PARSERS.get(expression_type)
 735            if not parser:
 736                raise TypeError(f"No parser registered for {expression_type}")
 737            try:
 738                return self._parse(parser, raw_tokens, sql)
 739            except ParseError as e:
 740                e.errors[0]["into_expression"] = expression_type
 741                errors.append(e)
 742        raise ParseError(
 743            f"Failed to parse into {expression_types}",
 744            errors=merge_errors(errors),
 745        ) from errors[-1]
 746
 747    def _parse(
 748        self,
 749        parse_method: t.Callable[[Parser], t.Optional[exp.Expression]],
 750        raw_tokens: t.List[Token],
 751        sql: t.Optional[str] = None,
 752    ) -> t.List[t.Optional[exp.Expression]]:
 753        self.reset()
 754        self.sql = sql or ""
 755        total = len(raw_tokens)
 756        chunks: t.List[t.List[Token]] = [[]]
 757
 758        for i, token in enumerate(raw_tokens):
 759            if token.token_type == TokenType.SEMICOLON:
 760                if i < total - 1:
 761                    chunks.append([])
 762            else:
 763                chunks[-1].append(token)
 764
 765        expressions = []
 766
 767        for tokens in chunks:
 768            self._index = -1
 769            self._tokens = tokens
 770            self._advance()
 771
 772            expressions.append(parse_method(self))
 773
 774            if self._index < len(self._tokens):
 775                self.raise_error("Invalid expression / Unexpected token")
 776
 777            self.check_errors()
 778
 779        return expressions
 780
 781    def check_errors(self) -> None:
 782        """
 783        Logs or raises any found errors, depending on the chosen error level setting.
 784        """
 785        if self.error_level == ErrorLevel.WARN:
 786            for error in self.errors:
 787                logger.error(str(error))
 788        elif self.error_level == ErrorLevel.RAISE and self.errors:
 789            raise ParseError(
 790                concat_messages(self.errors, self.max_errors),
 791                errors=merge_errors(self.errors),
 792            )
 793
 794    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
 795        """
 796        Appends an error in the list of recorded errors or raises it, depending on the chosen
 797        error level setting.
 798        """
 799        token = token or self._curr or self._prev or Token.string("")
 800        start = self._find_token(token)
 801        end = start + len(token.text)
 802        start_context = self.sql[max(start - self.error_message_context, 0) : start]
 803        highlight = self.sql[start:end]
 804        end_context = self.sql[end : end + self.error_message_context]
 805
 806        error = ParseError.new(
 807            f"{message}. Line {token.line}, Col: {token.col}.\n"
 808            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
 809            description=message,
 810            line=token.line,
 811            col=token.col,
 812            start_context=start_context,
 813            highlight=highlight,
 814            end_context=end_context,
 815        )
 816
 817        if self.error_level == ErrorLevel.IMMEDIATE:
 818            raise error
 819
 820        self.errors.append(error)
 821
 822    def expression(
 823        self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs
 824    ) -> exp.Expression:
 825        """
 826        Creates a new, validated Expression.
 827
 828        Args:
 829            exp_class: the expression class to instantiate.
 830            comments: an optional list of comments to attach to the expression.
 831            kwargs: the arguments to set for the expression along with their respective values.
 832
 833        Returns:
 834            The target expression.
 835        """
 836        instance = exp_class(**kwargs)
 837        if self._prev_comments:
 838            instance.comments = self._prev_comments
 839            self._prev_comments = None
 840        if comments:
 841            instance.comments = comments
 842        self.validate_expression(instance)
 843        return instance
 844
 845    def validate_expression(
 846        self, expression: exp.Expression, args: t.Optional[t.List] = None
 847    ) -> None:
 848        """
 849        Validates an already instantiated expression, making sure that all its mandatory arguments
 850        are set.
 851
 852        Args:
 853            expression: the expression to validate.
 854            args: an optional list of items that was used to instantiate the expression, if it's a Func.
 855        """
 856        if self.error_level == ErrorLevel.IGNORE:
 857            return
 858
 859        for error_message in expression.error_messages(args):
 860            self.raise_error(error_message)
 861
 862    def _find_sql(self, start: Token, end: Token) -> str:
 863        return self.sql[self._find_token(start) : self._find_token(end) + len(end.text)]
 864
 865    def _find_token(self, token: Token) -> int:
 866        line = 1
 867        col = 1
 868        index = 0
 869
 870        while line < token.line or col < token.col:
 871            if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK:
 872                line += 1
 873                col = 1
 874            else:
 875                col += 1
 876            index += 1
 877
 878        return index
 879
 880    def _advance(self, times: int = 1) -> None:
 881        self._index += times
 882        self._curr = seq_get(self._tokens, self._index)
 883        self._next = seq_get(self._tokens, self._index + 1)
 884        if self._index > 0:
 885            self._prev = self._tokens[self._index - 1]
 886            self._prev_comments = self._prev.comments
 887        else:
 888            self._prev = None
 889            self._prev_comments = None
 890
 891    def _retreat(self, index: int) -> None:
 892        self._advance(index - self._index)
 893
 894    def _parse_command(self) -> exp.Expression:
 895        return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string())
 896
 897    def _parse_statement(self) -> t.Optional[exp.Expression]:
 898        if self._curr is None:
 899            return None
 900
 901        if self._match_set(self.STATEMENT_PARSERS):
 902            return self.STATEMENT_PARSERS[self._prev.token_type](self)
 903
 904        if self._match_set(Tokenizer.COMMANDS):
 905            return self._parse_command()
 906
 907        expression = self._parse_expression()
 908        expression = self._parse_set_operations(expression) if expression else self._parse_select()
 909
 910        self._parse_query_modifiers(expression)
 911        return expression
 912
 913    def _parse_drop(self, default_kind: t.Optional[str] = None) -> t.Optional[exp.Expression]:
 914        start = self._prev
 915        temporary = self._match(TokenType.TEMPORARY)
 916        materialized = self._match(TokenType.MATERIALIZED)
 917        kind = self._match_set(self.CREATABLES) and self._prev.text
 918        if not kind:
 919            if default_kind:
 920                kind = default_kind
 921            else:
 922                return self._parse_as_command(start)
 923
 924        return self.expression(
 925            exp.Drop,
 926            exists=self._parse_exists(),
 927            this=self._parse_table(schema=True),
 928            kind=kind,
 929            temporary=temporary,
 930            materialized=materialized,
 931            cascade=self._match(TokenType.CASCADE),
 932        )
 933
 934    def _parse_exists(self, not_: bool = False) -> t.Optional[bool]:
 935        return (
 936            self._match(TokenType.IF)
 937            and (not not_ or self._match(TokenType.NOT))
 938            and self._match(TokenType.EXISTS)
 939        )
 940
 941    def _parse_create(self) -> t.Optional[exp.Expression]:
 942        start = self._prev
 943        replace = self._match_pair(TokenType.OR, TokenType.REPLACE)
 944        set_ = self._match(TokenType.SET)  # Teradata
 945        multiset = self._match_text_seq("MULTISET")  # Teradata
 946        global_temporary = self._match_text_seq("GLOBAL", "TEMPORARY")  # Teradata
 947        volatile = self._match(TokenType.VOLATILE)  # Teradata
 948        temporary = self._match(TokenType.TEMPORARY)
 949        transient = self._match_text_seq("TRANSIENT")
 950        external = self._match_text_seq("EXTERNAL")
 951        unique = self._match(TokenType.UNIQUE)
 952        materialized = self._match(TokenType.MATERIALIZED)
 953
 954        if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
 955            self._match(TokenType.TABLE)
 956
 957        properties = None
 958        create_token = self._match_set(self.CREATABLES) and self._prev
 959
 960        if not create_token:
 961            properties = self._parse_properties()
 962            create_token = self._match_set(self.CREATABLES) and self._prev
 963
 964            if not properties or not create_token:
 965                return self._parse_as_command(start)
 966
 967        exists = self._parse_exists(not_=True)
 968        this = None
 969        expression = None
 970        data = None
 971        statistics = None
 972        no_primary_index = None
 973        indexes = None
 974        no_schema_binding = None
 975        begin = None
 976
 977        if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
 978            this = self._parse_user_defined_function(kind=create_token.token_type)
 979            properties = self._parse_properties()
 980
 981            self._match(TokenType.ALIAS)
 982            begin = self._match(TokenType.BEGIN)
 983            return_ = self._match_text_seq("RETURN")
 984            expression = self._parse_statement()
 985
 986            if return_:
 987                expression = self.expression(exp.Return, this=expression)
 988        elif create_token.token_type == TokenType.INDEX:
 989            this = self._parse_index()
 990        elif create_token.token_type in (
 991            TokenType.TABLE,
 992            TokenType.VIEW,
 993            TokenType.SCHEMA,
 994        ):
 995            table_parts = self._parse_table_parts(schema=True)
 996
 997            if self._match(TokenType.COMMA):  # comma-separated properties before schema definition
 998                properties = self._parse_properties(before=True)
 999
1000            this = self._parse_schema(this=table_parts)
1001
1002            if not properties:  # properties after schema definition
1003                properties = self._parse_properties()
1004
1005            self._match(TokenType.ALIAS)
1006            expression = self._parse_ddl_select()
1007
1008            if create_token.token_type == TokenType.TABLE:
1009                if self._match_text_seq("WITH", "DATA"):
1010                    data = True
1011                elif self._match_text_seq("WITH", "NO", "DATA"):
1012                    data = False
1013
1014                if self._match_text_seq("AND", "STATISTICS"):
1015                    statistics = True
1016                elif self._match_text_seq("AND", "NO", "STATISTICS"):
1017                    statistics = False
1018
1019                no_primary_index = self._match_text_seq("NO", "PRIMARY", "INDEX")
1020
1021                indexes = []
1022                while True:
1023                    index = self._parse_create_table_index()
1024
1025                    # post index PARTITION BY property
1026                    if self._match(TokenType.PARTITION_BY, advance=False):
1027                        if properties:
1028                            properties.expressions.append(self._parse_property())
1029                        else:
1030                            properties = self._parse_properties()
1031
1032                    if not index:
1033                        break
1034                    else:
1035                        indexes.append(index)
1036            elif create_token.token_type == TokenType.VIEW:
1037                if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
1038                    no_schema_binding = True
1039
1040        return self.expression(
1041            exp.Create,
1042            this=this,
1043            kind=create_token.text,
1044            expression=expression,
1045            set=set_,
1046            multiset=multiset,
1047            global_temporary=global_temporary,
1048            volatile=volatile,
1049            exists=exists,
1050            properties=properties,
1051            temporary=temporary,
1052            transient=transient,
1053            external=external,
1054            replace=replace,
1055            unique=unique,
1056            materialized=materialized,
1057            data=data,
1058            statistics=statistics,
1059            no_primary_index=no_primary_index,
1060            indexes=indexes,
1061            no_schema_binding=no_schema_binding,
1062            begin=begin,
1063        )
1064
1065    def _parse_property_before(self) -> t.Optional[exp.Expression]:
1066        self._match(TokenType.COMMA)
1067
1068        # parsers look to _prev for no/dual/default, so need to consume first
1069        self._match_text_seq("NO")
1070        self._match_text_seq("DUAL")
1071        self._match_text_seq("DEFAULT")
1072
1073        if self.PROPERTY_PARSERS.get(self._curr.text.upper()):
1074            return self.PROPERTY_PARSERS[self._curr.text.upper()](self)
1075
1076        return None
1077
1078    def _parse_property(self) -> t.Optional[exp.Expression]:
1079        if self._match_texts(self.PROPERTY_PARSERS):
1080            return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
1081
1082        if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET):
1083            return self._parse_character_set(True)
1084
1085        if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY):
1086            return self._parse_sortkey(compound=True)
1087
1088        if self._match_text_seq("SQL", "SECURITY"):
1089            return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER"))
1090
1091        assignment = self._match_pair(
1092            TokenType.VAR, TokenType.EQ, advance=False
1093        ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False)
1094
1095        if assignment:
1096            key = self._parse_var_or_string()
1097            self._match(TokenType.EQ)
1098            return self.expression(exp.Property, this=key, value=self._parse_column())
1099
1100        return None
1101
1102    def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression:
1103        self._match(TokenType.EQ)
1104        self._match(TokenType.ALIAS)
1105        return self.expression(
1106            exp_class,
1107            this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
1108        )
1109
1110    def _parse_properties(self, before=None) -> t.Optional[exp.Expression]:
1111        properties = []
1112
1113        while True:
1114            if before:
1115                identified_property = self._parse_property_before()
1116            else:
1117                identified_property = self._parse_property()
1118
1119            if not identified_property:
1120                break
1121            for p in ensure_collection(identified_property):
1122                properties.append(p)
1123
1124        if properties:
1125            return self.expression(exp.Properties, expressions=properties)
1126
1127        return None
1128
1129    def _parse_fallback(self, no=False) -> exp.Expression:
1130        self._match_text_seq("FALLBACK")
1131        return self.expression(
1132            exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
1133        )
1134
1135    def _parse_with_property(
1136        self,
1137    ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]:
1138        if self._match(TokenType.L_PAREN, advance=False):
1139            return self._parse_wrapped_csv(self._parse_property)
1140
1141        if not self._next:
1142            return None
1143
1144        if self._next.text.upper() == "JOURNAL":
1145            return self._parse_withjournaltable()
1146
1147        return self._parse_withisolatedloading()
1148
1149    # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
1150    def _parse_definer(self) -> t.Optional[exp.Expression]:
1151        self._match(TokenType.EQ)
1152
1153        user = self._parse_id_var()
1154        self._match(TokenType.PARAMETER)
1155        host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
1156
1157        if not user or not host:
1158            return None
1159
1160        return exp.DefinerProperty(this=f"{user}@{host}")
1161
1162    def _parse_withjournaltable(self) -> exp.Expression:
1163        self._match_text_seq("WITH", "JOURNAL", "TABLE")
1164        self._match(TokenType.EQ)
1165        return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
1166
1167    def _parse_log(self, no=False) -> exp.Expression:
1168        self._match_text_seq("LOG")
1169        return self.expression(exp.LogProperty, no=no)
1170
1171    def _parse_journal(self, no=False, dual=False) -> exp.Expression:
1172        before = self._match_text_seq("BEFORE")
1173        self._match_text_seq("JOURNAL")
1174        return self.expression(exp.JournalProperty, no=no, dual=dual, before=before)
1175
1176    def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression:
1177        self._match_text_seq("NOT")
1178        self._match_text_seq("LOCAL")
1179        self._match_text_seq("AFTER", "JOURNAL")
1180        return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local)
1181
1182    def _parse_checksum(self) -> exp.Expression:
1183        self._match_text_seq("CHECKSUM")
1184        self._match(TokenType.EQ)
1185
1186        on = None
1187        if self._match(TokenType.ON):
1188            on = True
1189        elif self._match_text_seq("OFF"):
1190            on = False
1191        default = self._match(TokenType.DEFAULT)
1192
1193        return self.expression(
1194            exp.ChecksumProperty,
1195            on=on,
1196            default=default,
1197        )
1198
1199    def _parse_freespace(self) -> exp.Expression:
1200        self._match_text_seq("FREESPACE")
1201        self._match(TokenType.EQ)
1202        return self.expression(
1203            exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
1204        )
1205
1206    def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression:
1207        self._match_text_seq("MERGEBLOCKRATIO")
1208        if self._match(TokenType.EQ):
1209            return self.expression(
1210                exp.MergeBlockRatioProperty,
1211                this=self._parse_number(),
1212                percent=self._match(TokenType.PERCENT),
1213            )
1214        else:
1215            return self.expression(
1216                exp.MergeBlockRatioProperty,
1217                no=no,
1218                default=default,
1219            )
1220
1221    def _parse_datablocksize(self, default=None) -> exp.Expression:
1222        if default:
1223            self._match_text_seq("DATABLOCKSIZE")
1224            return self.expression(exp.DataBlocksizeProperty, default=True)
1225        elif self._match_texts(("MIN", "MINIMUM")):
1226            self._match_text_seq("DATABLOCKSIZE")
1227            return self.expression(exp.DataBlocksizeProperty, min=True)
1228        elif self._match_texts(("MAX", "MAXIMUM")):
1229            self._match_text_seq("DATABLOCKSIZE")
1230            return self.expression(exp.DataBlocksizeProperty, min=False)
1231
1232        self._match_text_seq("DATABLOCKSIZE")
1233        self._match(TokenType.EQ)
1234        size = self._parse_number()
1235        units = None
1236        if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
1237            units = self._prev.text
1238        return self.expression(exp.DataBlocksizeProperty, size=size, units=units)
1239
1240    def _parse_blockcompression(self) -> exp.Expression:
1241        self._match_text_seq("BLOCKCOMPRESSION")
1242        self._match(TokenType.EQ)
1243        always = self._match(TokenType.ALWAYS)
1244        manual = self._match_text_seq("MANUAL")
1245        never = self._match_text_seq("NEVER")
1246        default = self._match_text_seq("DEFAULT")
1247        autotemp = None
1248        if self._match_text_seq("AUTOTEMP"):
1249            autotemp = self._parse_schema()
1250
1251        return self.expression(
1252            exp.BlockCompressionProperty,
1253            always=always,
1254            manual=manual,
1255            never=never,
1256            default=default,
1257            autotemp=autotemp,
1258        )
1259
1260    def _parse_withisolatedloading(self) -> exp.Expression:
1261        self._match(TokenType.WITH)
1262        no = self._match_text_seq("NO")
1263        concurrent = self._match_text_seq("CONCURRENT")
1264        self._match_text_seq("ISOLATED", "LOADING")
1265        for_all = self._match_text_seq("FOR", "ALL")
1266        for_insert = self._match_text_seq("FOR", "INSERT")
1267        for_none = self._match_text_seq("FOR", "NONE")
1268        return self.expression(
1269            exp.IsolatedLoadingProperty,
1270            no=no,
1271            concurrent=concurrent,
1272            for_all=for_all,
1273            for_insert=for_insert,
1274            for_none=for_none,
1275        )
1276
1277    def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]:
1278        if self._match(TokenType.PARTITION_BY):
1279            return self._parse_csv(self._parse_conjunction)
1280        return []
1281
1282    def _parse_partitioned_by(self) -> exp.Expression:
1283        self._match(TokenType.EQ)
1284        return self.expression(
1285            exp.PartitionedByProperty,
1286            this=self._parse_schema() or self._parse_bracket(self._parse_field()),
1287        )
1288
1289    def _parse_distkey(self) -> exp.Expression:
1290        return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
1291
1292    def _parse_create_like(self) -> t.Optional[exp.Expression]:
1293        table = self._parse_table(schema=True)
1294        options = []
1295        while self._match_texts(("INCLUDING", "EXCLUDING")):
1296            this = self._prev.text.upper()
1297            id_var = self._parse_id_var()
1298
1299            if not id_var:
1300                return None
1301
1302            options.append(
1303                self.expression(
1304                    exp.Property,
1305                    this=this,
1306                    value=exp.Var(this=id_var.this.upper()),
1307                )
1308            )
1309        return self.expression(exp.LikeProperty, this=table, expressions=options)
1310
1311    def _parse_sortkey(self, compound: bool = False) -> exp.Expression:
1312        return self.expression(
1313            exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound
1314        )
1315
1316    def _parse_character_set(self, default: bool = False) -> exp.Expression:
1317        self._match(TokenType.EQ)
1318        return self.expression(
1319            exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default
1320        )
1321
1322    def _parse_returns(self) -> exp.Expression:
1323        value: t.Optional[exp.Expression]
1324        is_table = self._match(TokenType.TABLE)
1325
1326        if is_table:
1327            if self._match(TokenType.LT):
1328                value = self.expression(
1329                    exp.Schema,
1330                    this="TABLE",
1331                    expressions=self._parse_csv(self._parse_struct_kwargs),
1332                )
1333                if not self._match(TokenType.GT):
1334                    self.raise_error("Expecting >")
1335            else:
1336                value = self._parse_schema(exp.Var(this="TABLE"))
1337        else:
1338            value = self._parse_types()
1339
1340        return self.expression(exp.ReturnsProperty, this=value, is_table=is_table)
1341
1342    def _parse_describe(self) -> exp.Expression:
1343        kind = self._match_set(self.CREATABLES) and self._prev.text
1344        this = self._parse_table()
1345
1346        return self.expression(exp.Describe, this=this, kind=kind)
1347
1348    def _parse_insert(self) -> exp.Expression:
1349        overwrite = self._match(TokenType.OVERWRITE)
1350        local = self._match(TokenType.LOCAL)
1351
1352        this: t.Optional[exp.Expression]
1353
1354        if self._match_text_seq("DIRECTORY"):
1355            this = self.expression(
1356                exp.Directory,
1357                this=self._parse_var_or_string(),
1358                local=local,
1359                row_format=self._parse_row_format(match_row=True),
1360            )
1361        else:
1362            self._match(TokenType.INTO)
1363            self._match(TokenType.TABLE)
1364            this = self._parse_table(schema=True)
1365
1366        return self.expression(
1367            exp.Insert,
1368            this=this,
1369            exists=self._parse_exists(),
1370            partition=self._parse_partition(),
1371            expression=self._parse_ddl_select(),
1372            overwrite=overwrite,
1373        )
1374
1375    def _parse_row(self) -> t.Optional[exp.Expression]:
1376        if not self._match(TokenType.FORMAT):
1377            return None
1378        return self._parse_row_format()
1379
1380    def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]:
1381        if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
1382            return None
1383
1384        if self._match_text_seq("SERDE"):
1385            return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string())
1386
1387        self._match_text_seq("DELIMITED")
1388
1389        kwargs = {}
1390
1391        if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
1392            kwargs["fields"] = self._parse_string()
1393            if self._match_text_seq("ESCAPED", "BY"):
1394                kwargs["escaped"] = self._parse_string()
1395        if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
1396            kwargs["collection_items"] = self._parse_string()
1397        if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
1398            kwargs["map_keys"] = self._parse_string()
1399        if self._match_text_seq("LINES", "TERMINATED", "BY"):
1400            kwargs["lines"] = self._parse_string()
1401        if self._match_text_seq("NULL", "DEFINED", "AS"):
1402            kwargs["null"] = self._parse_string()
1403
1404        return self.expression(exp.RowFormatDelimitedProperty, **kwargs)  # type: ignore
1405
1406    def _parse_load_data(self) -> exp.Expression:
1407        local = self._match(TokenType.LOCAL)
1408        self._match_text_seq("INPATH")
1409        inpath = self._parse_string()
1410        overwrite = self._match(TokenType.OVERWRITE)
1411        self._match_pair(TokenType.INTO, TokenType.TABLE)
1412
1413        return self.expression(
1414            exp.LoadData,
1415            this=self._parse_table(schema=True),
1416            local=local,
1417            overwrite=overwrite,
1418            inpath=inpath,
1419            partition=self._parse_partition(),
1420            input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
1421            serde=self._match_text_seq("SERDE") and self._parse_string(),
1422        )
1423
1424    def _parse_delete(self) -> exp.Expression:
1425        self._match(TokenType.FROM)
1426
1427        return self.expression(
1428            exp.Delete,
1429            this=self._parse_table(schema=True),
1430            using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()),
1431            where=self._parse_where(),
1432        )
1433
1434    def _parse_update(self) -> exp.Expression:
1435        return self.expression(
1436            exp.Update,
1437            **{  # type: ignore
1438                "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS),
1439                "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality),
1440                "from": self._parse_from(),
1441                "where": self._parse_where(),
1442            },
1443        )
1444
1445    def _parse_uncache(self) -> exp.Expression:
1446        if not self._match(TokenType.TABLE):
1447            self.raise_error("Expecting TABLE after UNCACHE")
1448
1449        return self.expression(
1450            exp.Uncache,
1451            exists=self._parse_exists(),
1452            this=self._parse_table(schema=True),
1453        )
1454
1455    def _parse_cache(self) -> exp.Expression:
1456        lazy = self._match(TokenType.LAZY)
1457        self._match(TokenType.TABLE)
1458        table = self._parse_table(schema=True)
1459        options = []
1460
1461        if self._match(TokenType.OPTIONS):
1462            self._match_l_paren()
1463            k = self._parse_string()
1464            self._match(TokenType.EQ)
1465            v = self._parse_string()
1466            options = [k, v]
1467            self._match_r_paren()
1468
1469        self._match(TokenType.ALIAS)
1470        return self.expression(
1471            exp.Cache,
1472            this=table,
1473            lazy=lazy,
1474            options=options,
1475            expression=self._parse_select(nested=True),
1476        )
1477
1478    def _parse_partition(self) -> t.Optional[exp.Expression]:
1479        if not self._match(TokenType.PARTITION):
1480            return None
1481
1482        return self.expression(
1483            exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction)
1484        )
1485
1486    def _parse_value(self) -> exp.Expression:
1487        if self._match(TokenType.L_PAREN):
1488            expressions = self._parse_csv(self._parse_conjunction)
1489            self._match_r_paren()
1490            return self.expression(exp.Tuple, expressions=expressions)
1491
1492        # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows.
1493        # Source: https://prestodb.io/docs/current/sql/values.html
1494        return self.expression(exp.Tuple, expressions=[self._parse_conjunction()])
1495
1496    def _parse_select(
1497        self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True
1498    ) -> t.Optional[exp.Expression]:
1499        cte = self._parse_with()
1500        if cte:
1501            this = self._parse_statement()
1502
1503            if not this:
1504                self.raise_error("Failed to parse any statement following CTE")
1505                return cte
1506
1507            if "with" in this.arg_types:
1508                this.set("with", cte)
1509            else:
1510                self.raise_error(f"{this.key} does not support CTE")
1511                this = cte
1512        elif self._match(TokenType.SELECT):
1513            comments = self._prev_comments
1514
1515            hint = self._parse_hint()
1516            all_ = self._match(TokenType.ALL)
1517            distinct = self._match(TokenType.DISTINCT)
1518
1519            if distinct:
1520                distinct = self.expression(
1521                    exp.Distinct,
1522                    on=self._parse_value() if self._match(TokenType.ON) else None,
1523                )
1524
1525            if all_ and distinct:
1526                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
1527
1528            limit = self._parse_limit(top=True)
1529            expressions = self._parse_csv(self._parse_expression)
1530
1531            this = self.expression(
1532                exp.Select,
1533                hint=hint,
1534                distinct=distinct,
1535                expressions=expressions,
1536                limit=limit,
1537            )
1538            this.comments = comments
1539
1540            into = self._parse_into()
1541            if into:
1542                this.set("into", into)
1543
1544            from_ = self._parse_from()
1545            if from_:
1546                this.set("from", from_)
1547
1548            self._parse_query_modifiers(this)
1549        elif (table or nested) and self._match(TokenType.L_PAREN):
1550            this = self._parse_table() if table else self._parse_select(nested=True)
1551            self._parse_query_modifiers(this)
1552            this = self._parse_set_operations(this)
1553            self._match_r_paren()
1554
1555            # early return so that subquery unions aren't parsed again
1556            # SELECT * FROM (SELECT 1) UNION ALL SELECT 1
1557            # Union ALL should be a property of the top select node, not the subquery
1558            return self._parse_subquery(this, parse_alias=parse_subquery_alias)
1559        elif self._match(TokenType.VALUES):
1560            this = self.expression(
1561                exp.Values,
1562                expressions=self._parse_csv(self._parse_value),
1563                alias=self._parse_table_alias(),
1564            )
1565        else:
1566            this = None
1567
1568        return self._parse_set_operations(this)
1569
1570    def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]:
1571        if not skip_with_token and not self._match(TokenType.WITH):
1572            return None
1573
1574        recursive = self._match(TokenType.RECURSIVE)
1575
1576        expressions = []
1577        while True:
1578            expressions.append(self._parse_cte())
1579
1580            if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
1581                break
1582            else:
1583                self._match(TokenType.WITH)
1584
1585        return self.expression(exp.With, expressions=expressions, recursive=recursive)
1586
1587    def _parse_cte(self) -> exp.Expression:
1588        alias = self._parse_table_alias()
1589        if not alias or not alias.this:
1590            self.raise_error("Expected CTE to have alias")
1591
1592        self._match(TokenType.ALIAS)
1593
1594        return self.expression(
1595            exp.CTE,
1596            this=self._parse_wrapped(self._parse_statement),
1597            alias=alias,
1598        )
1599
1600    def _parse_table_alias(
1601        self, alias_tokens: t.Optional[t.Collection[TokenType]] = None
1602    ) -> t.Optional[exp.Expression]:
1603        any_token = self._match(TokenType.ALIAS)
1604        alias = self._parse_id_var(
1605            any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS
1606        )
1607        index = self._index
1608
1609        if self._match(TokenType.L_PAREN):
1610            columns = self._parse_csv(lambda: self._parse_column_def(self._parse_id_var()))
1611            self._match_r_paren() if columns else self._retreat(index)
1612        else:
1613            columns = None
1614
1615        if not alias and not columns:
1616            return None
1617
1618        return self.expression(exp.TableAlias, this=alias, columns=columns)
1619
1620    def _parse_subquery(
1621        self, this: t.Optional[exp.Expression], parse_alias: bool = True
1622    ) -> exp.Expression:
1623        return self.expression(
1624            exp.Subquery,
1625            this=this,
1626            pivots=self._parse_pivots(),
1627            alias=self._parse_table_alias() if parse_alias else None,
1628        )
1629
1630    def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None:
1631        if not isinstance(this, self.MODIFIABLES):
1632            return
1633
1634        table = isinstance(this, exp.Table)
1635
1636        while True:
1637            lateral = self._parse_lateral()
1638            join = self._parse_join()
1639            comma = None if table else self._match(TokenType.COMMA)
1640            if lateral:
1641                this.append("laterals", lateral)
1642            if join:
1643                this.append("joins", join)
1644            if comma:
1645                this.args["from"].append("expressions", self._parse_table())
1646            if not (lateral or join or comma):
1647                break
1648
1649        for key, parser in self.QUERY_MODIFIER_PARSERS.items():
1650            expression = parser(self)
1651
1652            if expression:
1653                this.set(key, expression)
1654
1655    def _parse_hint(self) -> t.Optional[exp.Expression]:
1656        if self._match(TokenType.HINT):
1657            hints = self._parse_csv(self._parse_function)
1658            if not self._match_pair(TokenType.STAR, TokenType.SLASH):
1659                self.raise_error("Expected */ after HINT")
1660            return self.expression(exp.Hint, expressions=hints)
1661
1662        return None
1663
1664    def _parse_into(self) -> t.Optional[exp.Expression]:
1665        if not self._match(TokenType.INTO):
1666            return None
1667
1668        temp = self._match(TokenType.TEMPORARY)
1669        unlogged = self._match(TokenType.UNLOGGED)
1670        self._match(TokenType.TABLE)
1671
1672        return self.expression(
1673            exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged
1674        )
1675
1676    def _parse_from(self) -> t.Optional[exp.Expression]:
1677        if not self._match(TokenType.FROM):
1678            return None
1679
1680        return self.expression(
1681            exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table)
1682        )
1683
1684    def _parse_match_recognize(self) -> t.Optional[exp.Expression]:
1685        if not self._match(TokenType.MATCH_RECOGNIZE):
1686            return None
1687        self._match_l_paren()
1688
1689        partition = self._parse_partition_by()
1690        order = self._parse_order()
1691        measures = (
1692            self._parse_alias(self._parse_conjunction())
1693            if self._match_text_seq("MEASURES")
1694            else None
1695        )
1696
1697        if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
1698            rows = exp.Var(this="ONE ROW PER MATCH")
1699        elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
1700            text = "ALL ROWS PER MATCH"
1701            if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
1702                text += f" SHOW EMPTY MATCHES"
1703            elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
1704                text += f" OMIT EMPTY MATCHES"
1705            elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
1706                text += f" WITH UNMATCHED ROWS"
1707            rows = exp.Var(this=text)
1708        else:
1709            rows = None
1710
1711        if self._match_text_seq("AFTER", "MATCH", "SKIP"):
1712            text = "AFTER MATCH SKIP"
1713            if self._match_text_seq("PAST", "LAST", "ROW"):
1714                text += f" PAST LAST ROW"
1715            elif self._match_text_seq("TO", "NEXT", "ROW"):
1716                text += f" TO NEXT ROW"
1717            elif self._match_text_seq("TO", "FIRST"):
1718                text += f" TO FIRST {self._advance_any().text}"  # type: ignore
1719            elif self._match_text_seq("TO", "LAST"):
1720                text += f" TO LAST {self._advance_any().text}"  # type: ignore
1721            after = exp.Var(this=text)
1722        else:
1723            after = None
1724
1725        if self._match_text_seq("PATTERN"):
1726            self._match_l_paren()
1727
1728            if not self._curr:
1729                self.raise_error("Expecting )", self._curr)
1730
1731            paren = 1
1732            start = self._curr
1733
1734            while self._curr and paren > 0:
1735                if self._curr.token_type == TokenType.L_PAREN:
1736                    paren += 1
1737                if self._curr.token_type == TokenType.R_PAREN:
1738                    paren -= 1
1739                end = self._prev
1740                self._advance()
1741            if paren > 0:
1742                self.raise_error("Expecting )", self._curr)
1743            pattern = exp.Var(this=self._find_sql(start, end))
1744        else:
1745            pattern = None
1746
1747        define = (
1748            self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None
1749        )
1750        self._match_r_paren()
1751
1752        return self.expression(
1753            exp.MatchRecognize,
1754            partition_by=partition,
1755            order=order,
1756            measures=measures,
1757            rows=rows,
1758            after=after,
1759            pattern=pattern,
1760            define=define,
1761        )
1762
1763    def _parse_lateral(self) -> t.Optional[exp.Expression]:
1764        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY)
1765        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
1766
1767        if outer_apply or cross_apply:
1768            this = self._parse_select(table=True)
1769            view = None
1770            outer = not cross_apply
1771        elif self._match(TokenType.LATERAL):
1772            this = self._parse_select(table=True)
1773            view = self._match(TokenType.VIEW)
1774            outer = self._match(TokenType.OUTER)
1775        else:
1776            return None
1777
1778        if not this:
1779            this = self._parse_function() or self._parse_id_var(any_token=False)
1780            while self._match(TokenType.DOT):
1781                this = exp.Dot(
1782                    this=this,
1783                    expression=self._parse_function() or self._parse_id_var(any_token=False),
1784                )
1785
1786        table_alias: t.Optional[exp.Expression]
1787
1788        if view:
1789            table = self._parse_id_var(any_token=False)
1790            columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
1791            table_alias = self.expression(exp.TableAlias, this=table, columns=columns)
1792        else:
1793            table_alias = self._parse_table_alias()
1794
1795        expression = self.expression(
1796            exp.Lateral,
1797            this=this,
1798            view=view,
1799            outer=outer,
1800            alias=table_alias,
1801        )
1802
1803        if outer_apply or cross_apply:
1804            return self.expression(exp.Join, this=expression, side=None if cross_apply else "LEFT")
1805
1806        return expression
1807
1808    def _parse_join_side_and_kind(
1809        self,
1810    ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
1811        return (
1812            self._match(TokenType.NATURAL) and self._prev,
1813            self._match_set(self.JOIN_SIDES) and self._prev,
1814            self._match_set(self.JOIN_KINDS) and self._prev,
1815        )
1816
1817    def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]:
1818        natural, side, kind = self._parse_join_side_and_kind()
1819
1820        if not skip_join_token and not self._match(TokenType.JOIN):
1821            return None
1822
1823        kwargs: t.Dict[
1824            str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]]
1825        ] = {"this": self._parse_table()}
1826
1827        if natural:
1828            kwargs["natural"] = True
1829        if side:
1830            kwargs["side"] = side.text
1831        if kind:
1832            kwargs["kind"] = kind.text
1833
1834        if self._match(TokenType.ON):
1835            kwargs["on"] = self._parse_conjunction()
1836        elif self._match(TokenType.USING):
1837            kwargs["using"] = self._parse_wrapped_id_vars()
1838
1839        return self.expression(exp.Join, **kwargs)  # type: ignore
1840
1841    def _parse_index(self) -> exp.Expression:
1842        index = self._parse_id_var()
1843        self._match(TokenType.ON)
1844        self._match(TokenType.TABLE)  # hive
1845
1846        return self.expression(
1847            exp.Index,
1848            this=index,
1849            table=self.expression(exp.Table, this=self._parse_id_var()),
1850            columns=self._parse_expression(),
1851        )
1852
1853    def _parse_create_table_index(self) -> t.Optional[exp.Expression]:
1854        unique = self._match(TokenType.UNIQUE)
1855        primary = self._match_text_seq("PRIMARY")
1856        amp = self._match_text_seq("AMP")
1857        if not self._match(TokenType.INDEX):
1858            return None
1859        index = self._parse_id_var()
1860        columns = None
1861        if self._match(TokenType.L_PAREN, advance=False):
1862            columns = self._parse_wrapped_csv(self._parse_column)
1863        return self.expression(
1864            exp.Index,
1865            this=index,
1866            columns=columns,
1867            unique=unique,
1868            primary=primary,
1869            amp=amp,
1870        )
1871
1872    def _parse_table_parts(self, schema: bool = False) -> exp.Expression:
1873        catalog = None
1874        db = None
1875        table = (not schema and self._parse_function()) or self._parse_id_var(any_token=False)
1876
1877        while self._match(TokenType.DOT):
1878            if catalog:
1879                # This allows nesting the table in arbitrarily many dot expressions if needed
1880                table = self.expression(exp.Dot, this=table, expression=self._parse_id_var())
1881            else:
1882                catalog = db
1883                db = table
1884                table = self._parse_id_var()
1885
1886        if not table:
1887            self.raise_error(f"Expected table name but got {self._curr}")
1888
1889        return self.expression(
1890            exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots()
1891        )
1892
1893    def _parse_table(
1894        self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
1895    ) -> t.Optional[exp.Expression]:
1896        lateral = self._parse_lateral()
1897
1898        if lateral:
1899            return lateral
1900
1901        unnest = self._parse_unnest()
1902
1903        if unnest:
1904            return unnest
1905
1906        values = self._parse_derived_table_values()
1907
1908        if values:
1909            return values
1910
1911        subquery = self._parse_select(table=True)
1912
1913        if subquery:
1914            return subquery
1915
1916        this = self._parse_table_parts(schema=schema)
1917
1918        if schema:
1919            return self._parse_schema(this=this)
1920
1921        if self.alias_post_tablesample:
1922            table_sample = self._parse_table_sample()
1923
1924        alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
1925
1926        if alias:
1927            this.set("alias", alias)
1928
1929        if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
1930            this.set(
1931                "hints",
1932                self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)),
1933            )
1934            self._match_r_paren()
1935
1936        if not self.alias_post_tablesample:
1937            table_sample = self._parse_table_sample()
1938
1939        if table_sample:
1940            table_sample.set("this", this)
1941            this = table_sample
1942
1943        return this
1944
1945    def _parse_unnest(self) -> t.Optional[exp.Expression]:
1946        if not self._match(TokenType.UNNEST):
1947            return None
1948
1949        expressions = self._parse_wrapped_csv(self._parse_column)
1950        ordinality = bool(self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY))
1951        alias = self._parse_table_alias()
1952
1953        if alias and self.unnest_column_only:
1954            if alias.args.get("columns"):
1955                self.raise_error("Unexpected extra column alias in unnest.")
1956            alias.set("columns", [alias.this])
1957            alias.set("this", None)
1958
1959        offset = None
1960        if self._match_pair(TokenType.WITH, TokenType.OFFSET):
1961            self._match(TokenType.ALIAS)
1962            offset = self._parse_conjunction()
1963
1964        return self.expression(
1965            exp.Unnest,
1966            expressions=expressions,
1967            ordinality=ordinality,
1968            alias=alias,
1969            offset=offset,
1970        )
1971
1972    def _parse_derived_table_values(self) -> t.Optional[exp.Expression]:
1973        is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES)
1974        if not is_derived and not self._match(TokenType.VALUES):
1975            return None
1976
1977        expressions = self._parse_csv(self._parse_value)
1978
1979        if is_derived:
1980            self._match_r_paren()
1981
1982        return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias())
1983
1984    def _parse_table_sample(self) -> t.Optional[exp.Expression]:
1985        if not self._match(TokenType.TABLE_SAMPLE):
1986            return None
1987
1988        method = self._parse_var()
1989        bucket_numerator = None
1990        bucket_denominator = None
1991        bucket_field = None
1992        percent = None
1993        rows = None
1994        size = None
1995        seed = None
1996
1997        self._match_l_paren()
1998
1999        if self._match(TokenType.BUCKET):
2000            bucket_numerator = self._parse_number()
2001            self._match(TokenType.OUT_OF)
2002            bucket_denominator = bucket_denominator = self._parse_number()
2003            self._match(TokenType.ON)
2004            bucket_field = self._parse_field()
2005        else:
2006            num = self._parse_number()
2007
2008            if self._match(TokenType.PERCENT):
2009                percent = num
2010            elif self._match(TokenType.ROWS):
2011                rows = num
2012            else:
2013                size = num
2014
2015        self._match_r_paren()
2016
2017        if self._match(TokenType.SEED):
2018            seed = self._parse_wrapped(self._parse_number)
2019
2020        return self.expression(
2021            exp.TableSample,
2022            method=method,
2023            bucket_numerator=bucket_numerator,
2024            bucket_denominator=bucket_denominator,
2025            bucket_field=bucket_field,
2026            percent=percent,
2027            rows=rows,
2028            size=size,
2029            seed=seed,
2030        )
2031
2032    def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]:
2033        return list(iter(self._parse_pivot, None))
2034
2035    def _parse_pivot(self) -> t.Optional[exp.Expression]:
2036        index = self._index
2037
2038        if self._match(TokenType.PIVOT):
2039            unpivot = False
2040        elif self._match(TokenType.UNPIVOT):
2041            unpivot = True
2042        else:
2043            return None
2044
2045        expressions = []
2046        field = None
2047
2048        if not self._match(TokenType.L_PAREN):
2049            self._retreat(index)
2050            return None
2051
2052        if unpivot:
2053            expressions = self._parse_csv(self._parse_column)
2054        else:
2055            expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function()))
2056
2057        if not self._match(TokenType.FOR):
2058            self.raise_error("Expecting FOR")
2059
2060        value = self._parse_column()
2061
2062        if not self._match(TokenType.IN):
2063            self.raise_error("Expecting IN")
2064
2065        field = self._parse_in(value)
2066
2067        self._match_r_paren()
2068
2069        return self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot)
2070
2071    def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]:
2072        if not skip_where_token and not self._match(TokenType.WHERE):
2073            return None
2074
2075        return self.expression(
2076            exp.Where, comments=self._prev_comments, this=self._parse_conjunction()
2077        )
2078
2079    def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]:
2080        if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
2081            return None
2082
2083        expressions = self._parse_csv(self._parse_conjunction)
2084        grouping_sets = self._parse_grouping_sets()
2085
2086        self._match(TokenType.COMMA)
2087        with_ = self._match(TokenType.WITH)
2088        cube = self._match(TokenType.CUBE) and (
2089            with_ or self._parse_wrapped_csv(self._parse_column)
2090        )
2091
2092        self._match(TokenType.COMMA)
2093        rollup = self._match(TokenType.ROLLUP) and (
2094            with_ or self._parse_wrapped_csv(self._parse_column)
2095        )
2096
2097        return self.expression(
2098            exp.Group,
2099            expressions=expressions,
2100            grouping_sets=grouping_sets,
2101            cube=cube,
2102            rollup=rollup,
2103        )
2104
2105    def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
2106        if not self._match(TokenType.GROUPING_SETS):
2107            return None
2108
2109        return self._parse_wrapped_csv(self._parse_grouping_set)
2110
2111    def _parse_grouping_set(self) -> t.Optional[exp.Expression]:
2112        if self._match(TokenType.L_PAREN):
2113            grouping_set = self._parse_csv(self._parse_column)
2114            self._match_r_paren()
2115            return self.expression(exp.Tuple, expressions=grouping_set)
2116
2117        return self._parse_column()
2118
2119    def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]:
2120        if not skip_having_token and not self._match(TokenType.HAVING):
2121            return None
2122        return self.expression(exp.Having, this=self._parse_conjunction())
2123
2124    def _parse_qualify(self) -> t.Optional[exp.Expression]:
2125        if not self._match(TokenType.QUALIFY):
2126            return None
2127        return self.expression(exp.Qualify, this=self._parse_conjunction())
2128
2129    def _parse_order(
2130        self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
2131    ) -> t.Optional[exp.Expression]:
2132        if not skip_order_token and not self._match(TokenType.ORDER_BY):
2133            return this
2134
2135        return self.expression(
2136            exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
2137        )
2138
2139    def _parse_sort(
2140        self, token_type: TokenType, exp_class: t.Type[exp.Expression]
2141    ) -> t.Optional[exp.Expression]:
2142        if not self._match(token_type):
2143            return None
2144        return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered))
2145
2146    def _parse_ordered(self) -> exp.Expression:
2147        this = self._parse_conjunction()
2148        self._match(TokenType.ASC)
2149        is_desc = self._match(TokenType.DESC)
2150        is_nulls_first = self._match(TokenType.NULLS_FIRST)
2151        is_nulls_last = self._match(TokenType.NULLS_LAST)
2152        desc = is_desc or False
2153        asc = not desc
2154        nulls_first = is_nulls_first or False
2155        explicitly_null_ordered = is_nulls_first or is_nulls_last
2156        if (
2157            not explicitly_null_ordered
2158            and (
2159                (asc and self.null_ordering == "nulls_are_small")
2160                or (desc and self.null_ordering != "nulls_are_small")
2161            )
2162            and self.null_ordering != "nulls_are_last"
2163        ):
2164            nulls_first = True
2165
2166        return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first)
2167
2168    def _parse_limit(
2169        self, this: t.Optional[exp.Expression] = None, top: bool = False
2170    ) -> t.Optional[exp.Expression]:
2171        if self._match(TokenType.TOP if top else TokenType.LIMIT):
2172            limit_paren = self._match(TokenType.L_PAREN)
2173            limit_exp = self.expression(
2174                exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term()
2175            )
2176
2177            if limit_paren:
2178                self._match_r_paren()
2179
2180            return limit_exp
2181
2182        if self._match(TokenType.FETCH):
2183            direction = self._match_set((TokenType.FIRST, TokenType.NEXT))
2184            direction = self._prev.text if direction else "FIRST"
2185            count = self._parse_number()
2186            self._match_set((TokenType.ROW, TokenType.ROWS))
2187            self._match(TokenType.ONLY)
2188            return self.expression(exp.Fetch, direction=direction, count=count)
2189
2190        return this
2191
2192    def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2193        if not self._match_set((TokenType.OFFSET, TokenType.COMMA)):
2194            return this
2195
2196        count = self._parse_number()
2197        self._match_set((TokenType.ROW, TokenType.ROWS))
2198        return self.expression(exp.Offset, this=this, expression=count)
2199
2200    def _parse_lock(self) -> t.Optional[exp.Expression]:
2201        if self._match_text_seq("FOR", "UPDATE"):
2202            return self.expression(exp.Lock, update=True)
2203        if self._match_text_seq("FOR", "SHARE"):
2204            return self.expression(exp.Lock, update=False)
2205
2206        return None
2207
2208    def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2209        if not self._match_set(self.SET_OPERATIONS):
2210            return this
2211
2212        token_type = self._prev.token_type
2213
2214        if token_type == TokenType.UNION:
2215            expression = exp.Union
2216        elif token_type == TokenType.EXCEPT:
2217            expression = exp.Except
2218        else:
2219            expression = exp.Intersect
2220
2221        return self.expression(
2222            expression,
2223            this=this,
2224            distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
2225            expression=self._parse_set_operations(self._parse_select(nested=True)),
2226        )
2227
2228    def _parse_expression(self) -> t.Optional[exp.Expression]:
2229        return self._parse_alias(self._parse_conjunction())
2230
2231    def _parse_conjunction(self) -> t.Optional[exp.Expression]:
2232        return self._parse_tokens(self._parse_equality, self.CONJUNCTION)
2233
2234    def _parse_equality(self) -> t.Optional[exp.Expression]:
2235        return self._parse_tokens(self._parse_comparison, self.EQUALITY)
2236
2237    def _parse_comparison(self) -> t.Optional[exp.Expression]:
2238        return self._parse_tokens(self._parse_range, self.COMPARISON)
2239
2240    def _parse_range(self) -> t.Optional[exp.Expression]:
2241        this = self._parse_bitwise()
2242        negate = self._match(TokenType.NOT)
2243
2244        if self._match_set(self.RANGE_PARSERS):
2245            this = self.RANGE_PARSERS[self._prev.token_type](self, this)
2246        elif self._match(TokenType.ISNULL):
2247            this = self.expression(exp.Is, this=this, expression=exp.Null())
2248
2249        # Postgres supports ISNULL and NOTNULL for conditions.
2250        # https://blog.andreiavram.ro/postgresql-null-composite-type/
2251        if self._match(TokenType.NOTNULL):
2252            this = self.expression(exp.Is, this=this, expression=exp.Null())
2253            this = self.expression(exp.Not, this=this)
2254
2255        if negate:
2256            this = self.expression(exp.Not, this=this)
2257
2258        if self._match(TokenType.IS):
2259            this = self._parse_is(this)
2260
2261        return this
2262
2263    def _parse_is(self, this: t.Optional[exp.Expression]) -> exp.Expression:
2264        negate = self._match(TokenType.NOT)
2265        if self._match(TokenType.DISTINCT_FROM):
2266            klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ
2267            return self.expression(klass, this=this, expression=self._parse_expression())
2268
2269        this = self.expression(
2270            exp.Is,
2271            this=this,
2272            expression=self._parse_null() or self._parse_boolean(),
2273        )
2274        return self.expression(exp.Not, this=this) if negate else this
2275
2276    def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression:
2277        unnest = self._parse_unnest()
2278        if unnest:
2279            this = self.expression(exp.In, this=this, unnest=unnest)
2280        elif self._match(TokenType.L_PAREN):
2281            expressions = self._parse_csv(self._parse_select_or_expression)
2282
2283            if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable):
2284                this = self.expression(exp.In, this=this, query=expressions[0])
2285            else:
2286                this = self.expression(exp.In, this=this, expressions=expressions)
2287
2288            self._match_r_paren()
2289        else:
2290            this = self.expression(exp.In, this=this, field=self._parse_field())
2291
2292        return this
2293
2294    def _parse_between(self, this: exp.Expression) -> exp.Expression:
2295        low = self._parse_bitwise()
2296        self._match(TokenType.AND)
2297        high = self._parse_bitwise()
2298        return self.expression(exp.Between, this=this, low=low, high=high)
2299
2300    def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2301        if not self._match(TokenType.ESCAPE):
2302            return this
2303        return self.expression(exp.Escape, this=this, expression=self._parse_string())
2304
2305    def _parse_bitwise(self) -> t.Optional[exp.Expression]:
2306        this = self._parse_term()
2307
2308        while True:
2309            if self._match_set(self.BITWISE):
2310                this = self.expression(
2311                    self.BITWISE[self._prev.token_type],
2312                    this=this,
2313                    expression=self._parse_term(),
2314                )
2315            elif self._match_pair(TokenType.LT, TokenType.LT):
2316                this = self.expression(
2317                    exp.BitwiseLeftShift, this=this, expression=self._parse_term()
2318                )
2319            elif self._match_pair(TokenType.GT, TokenType.GT):
2320                this = self.expression(
2321                    exp.BitwiseRightShift, this=this, expression=self._parse_term()
2322                )
2323            else:
2324                break
2325
2326        return this
2327
2328    def _parse_term(self) -> t.Optional[exp.Expression]:
2329        return self._parse_tokens(self._parse_factor, self.TERM)
2330
2331    def _parse_factor(self) -> t.Optional[exp.Expression]:
2332        return self._parse_tokens(self._parse_unary, self.FACTOR)
2333
2334    def _parse_unary(self) -> t.Optional[exp.Expression]:
2335        if self._match_set(self.UNARY_PARSERS):
2336            return self.UNARY_PARSERS[self._prev.token_type](self)
2337        return self._parse_at_time_zone(self._parse_type())
2338
2339    def _parse_type(self) -> t.Optional[exp.Expression]:
2340        if self._match(TokenType.INTERVAL):
2341            return self.expression(exp.Interval, this=self._parse_term(), unit=self._parse_var())
2342
2343        index = self._index
2344        type_token = self._parse_types(check_func=True)
2345        this = self._parse_column()
2346
2347        if type_token:
2348            if this and not isinstance(this, exp.Star):
2349                return self.expression(exp.Cast, this=this, to=type_token)
2350            if not type_token.args.get("expressions"):
2351                self._retreat(index)
2352                return self._parse_column()
2353            return type_token
2354
2355        return this
2356
2357    def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]:
2358        index = self._index
2359
2360        if not self._match_set(self.TYPE_TOKENS):
2361            return None
2362
2363        type_token = self._prev.token_type
2364
2365        if type_token == TokenType.PSEUDO_TYPE:
2366            return self.expression(exp.PseudoType, this=self._prev.text)
2367
2368        nested = type_token in self.NESTED_TYPE_TOKENS
2369        is_struct = type_token == TokenType.STRUCT
2370        expressions = None
2371        maybe_func = False
2372
2373        if self._match(TokenType.L_PAREN):
2374            if is_struct:
2375                expressions = self._parse_csv(self._parse_struct_kwargs)
2376            elif nested:
2377                expressions = self._parse_csv(self._parse_types)
2378            else:
2379                expressions = self._parse_csv(self._parse_conjunction)
2380
2381            if not expressions:
2382                self._retreat(index)
2383                return None
2384
2385            self._match_r_paren()
2386            maybe_func = True
2387
2388        if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2389            this = exp.DataType(
2390                this=exp.DataType.Type.ARRAY,
2391                expressions=[exp.DataType.build(type_token.value, expressions=expressions)],
2392                nested=True,
2393            )
2394
2395            while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2396                this = exp.DataType(
2397                    this=exp.DataType.Type.ARRAY,
2398                    expressions=[this],
2399                    nested=True,
2400                )
2401
2402            return this
2403
2404        if self._match(TokenType.L_BRACKET):
2405            self._retreat(index)
2406            return None
2407
2408        values: t.Optional[t.List[t.Optional[exp.Expression]]] = None
2409        if nested and self._match(TokenType.LT):
2410            if is_struct:
2411                expressions = self._parse_csv(self._parse_struct_kwargs)
2412            else:
2413                expressions = self._parse_csv(self._parse_types)
2414
2415            if not self._match(TokenType.GT):
2416                self.raise_error("Expecting >")
2417
2418            if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)):
2419                values = self._parse_csv(self._parse_conjunction)
2420                self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN))
2421
2422        value: t.Optional[exp.Expression] = None
2423        if type_token in self.TIMESTAMPS:
2424            if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ:
2425                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions)
2426            elif (
2427                self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ
2428            ):
2429                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions)
2430            elif self._match(TokenType.WITHOUT_TIME_ZONE):
2431                if type_token == TokenType.TIME:
2432                    value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions)
2433                else:
2434                    value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2435
2436            maybe_func = maybe_func and value is None
2437
2438            if value is None:
2439                value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2440        elif type_token == TokenType.INTERVAL:
2441            value = self.expression(exp.Interval, unit=self._parse_var())
2442
2443        if maybe_func and check_func:
2444            index2 = self._index
2445            peek = self._parse_string()
2446
2447            if not peek:
2448                self._retreat(index)
2449                return None
2450
2451            self._retreat(index2)
2452
2453        if value:
2454            return value
2455
2456        return exp.DataType(
2457            this=exp.DataType.Type[type_token.value.upper()],
2458            expressions=expressions,
2459            nested=nested,
2460            values=values,
2461        )
2462
2463    def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]:
2464        if self._curr and self._curr.token_type in self.TYPE_TOKENS:
2465            return self._parse_types()
2466
2467        this = self._parse_id_var()
2468        self._match(TokenType.COLON)
2469        data_type = self._parse_types()
2470
2471        if not data_type:
2472            return None
2473        return self.expression(exp.StructKwarg, this=this, expression=data_type)
2474
2475    def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2476        if not self._match(TokenType.AT_TIME_ZONE):
2477            return this
2478        return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary())
2479
2480    def _parse_column(self) -> t.Optional[exp.Expression]:
2481        this = self._parse_field()
2482        if isinstance(this, exp.Identifier):
2483            this = self.expression(exp.Column, this=this)
2484        elif not this:
2485            return self._parse_bracket(this)
2486        this = self._parse_bracket(this)
2487
2488        while self._match_set(self.COLUMN_OPERATORS):
2489            op_token = self._prev.token_type
2490            op = self.COLUMN_OPERATORS.get(op_token)
2491
2492            if op_token == TokenType.DCOLON:
2493                field = self._parse_types()
2494                if not field:
2495                    self.raise_error("Expected type")
2496            elif op:
2497                self._advance()
2498                value = self._prev.text
2499                field = (
2500                    exp.Literal.number(value)
2501                    if self._prev.token_type == TokenType.NUMBER
2502                    else exp.Literal.string(value)
2503                )
2504            else:
2505                field = self._parse_star() or self._parse_function() or self._parse_id_var()
2506
2507            if isinstance(field, exp.Func):
2508                # bigquery allows function calls like x.y.count(...)
2509                # SAFE.SUBSTR(...)
2510                # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
2511                this = self._replace_columns_with_dots(this)
2512
2513            if op:
2514                this = op(self, this, field)
2515            elif isinstance(this, exp.Column) and not this.table:
2516                this = self.expression(exp.Column, this=field, table=this.this)
2517            else:
2518                this = self.expression(exp.Dot, this=this, expression=field)
2519            this = self._parse_bracket(this)
2520
2521        return this
2522
2523    def _parse_primary(self) -> t.Optional[exp.Expression]:
2524        if self._match_set(self.PRIMARY_PARSERS):
2525            token_type = self._prev.token_type
2526            primary = self.PRIMARY_PARSERS[token_type](self, self._prev)
2527
2528            if token_type == TokenType.STRING:
2529                expressions = [primary]
2530                while self._match(TokenType.STRING):
2531                    expressions.append(exp.Literal.string(self._prev.text))
2532                if len(expressions) > 1:
2533                    return self.expression(exp.Concat, expressions=expressions)
2534            return primary
2535
2536        if self._match_pair(TokenType.DOT, TokenType.NUMBER):
2537            return exp.Literal.number(f"0.{self._prev.text}")
2538
2539        if self._match(TokenType.L_PAREN):
2540            comments = self._prev_comments
2541            query = self._parse_select()
2542
2543            if query:
2544                expressions = [query]
2545            else:
2546                expressions = self._parse_csv(
2547                    lambda: self._parse_alias(self._parse_conjunction(), explicit=True)
2548                )
2549
2550            this = seq_get(expressions, 0)
2551            self._parse_query_modifiers(this)
2552            self._match_r_paren()
2553
2554            if isinstance(this, exp.Subqueryable):
2555                this = self._parse_set_operations(
2556                    self._parse_subquery(this=this, parse_alias=False)
2557                )
2558            elif len(expressions) > 1:
2559                this = self.expression(exp.Tuple, expressions=expressions)
2560            else:
2561                this = self.expression(exp.Paren, this=this)
2562
2563            if this and comments:
2564                this.comments = comments
2565
2566            return this
2567
2568        return None
2569
2570    def _parse_field(self, any_token: bool = False) -> t.Optional[exp.Expression]:
2571        return self._parse_primary() or self._parse_function() or self._parse_id_var(any_token)
2572
2573    def _parse_function(
2574        self, functions: t.Optional[t.Dict[str, t.Callable]] = None
2575    ) -> t.Optional[exp.Expression]:
2576        if not self._curr:
2577            return None
2578
2579        token_type = self._curr.token_type
2580
2581        if self._match_set(self.NO_PAREN_FUNCTION_PARSERS):
2582            return self.NO_PAREN_FUNCTION_PARSERS[token_type](self)
2583
2584        if not self._next or self._next.token_type != TokenType.L_PAREN:
2585            if token_type in self.NO_PAREN_FUNCTIONS:
2586                self._advance()
2587                return self.expression(self.NO_PAREN_FUNCTIONS[token_type])
2588
2589            return None
2590
2591        if token_type not in self.FUNC_TOKENS:
2592            return None
2593
2594        this = self._curr.text
2595        upper = this.upper()
2596        self._advance(2)
2597
2598        parser = self.FUNCTION_PARSERS.get(upper)
2599
2600        if parser:
2601            this = parser(self)
2602        else:
2603            subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
2604
2605            if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH):
2606                this = self.expression(subquery_predicate, this=self._parse_select())
2607                self._match_r_paren()
2608                return this
2609
2610            if functions is None:
2611                functions = self.FUNCTIONS
2612
2613            function = functions.get(upper)
2614            args = self._parse_csv(self._parse_lambda)
2615
2616            if function:
2617                # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the
2618                # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists.
2619                if count_params(function) == 2:
2620                    params = None
2621                    if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN):
2622                        params = self._parse_csv(self._parse_lambda)
2623
2624                    this = function(args, params)
2625                else:
2626                    this = function(args)
2627
2628                self.validate_expression(this, args)
2629            else:
2630                this = self.expression(exp.Anonymous, this=this, expressions=args)
2631
2632        self._match_r_paren(this)
2633        return self._parse_window(this)
2634
2635    def _parse_user_defined_function(
2636        self, kind: t.Optional[TokenType] = None
2637    ) -> t.Optional[exp.Expression]:
2638        this = self._parse_id_var()
2639
2640        while self._match(TokenType.DOT):
2641            this = self.expression(exp.Dot, this=this, expression=self._parse_id_var())
2642
2643        if not self._match(TokenType.L_PAREN):
2644            return this
2645
2646        expressions = self._parse_csv(self._parse_udf_kwarg)
2647        self._match_r_paren()
2648        return self.expression(
2649            exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True
2650        )
2651
2652    def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]:
2653        literal = self._parse_primary()
2654        if literal:
2655            return self.expression(exp.Introducer, this=token.text, expression=literal)
2656
2657        return self.expression(exp.Identifier, this=token.text)
2658
2659    def _parse_national(self, token: Token) -> exp.Expression:
2660        return self.expression(exp.National, this=exp.Literal.string(token.text))
2661
2662    def _parse_session_parameter(self) -> exp.Expression:
2663        kind = None
2664        this = self._parse_id_var() or self._parse_primary()
2665
2666        if this and self._match(TokenType.DOT):
2667            kind = this.name
2668            this = self._parse_var() or self._parse_primary()
2669
2670        return self.expression(exp.SessionParameter, this=this, kind=kind)
2671
2672    def _parse_udf_kwarg(self) -> t.Optional[exp.Expression]:
2673        this = self._parse_id_var()
2674        kind = self._parse_types()
2675
2676        if not kind:
2677            return this
2678
2679        return self.expression(exp.UserDefinedFunctionKwarg, this=this, kind=kind)
2680
2681    def _parse_lambda(self) -> t.Optional[exp.Expression]:
2682        index = self._index
2683
2684        if self._match(TokenType.L_PAREN):
2685            expressions = self._parse_csv(self._parse_id_var)
2686
2687            if not self._match(TokenType.R_PAREN):
2688                self._retreat(index)
2689        else:
2690            expressions = [self._parse_id_var()]
2691
2692        if self._match_set(self.LAMBDAS):
2693            return self.LAMBDAS[self._prev.token_type](self, expressions)
2694
2695        self._retreat(index)
2696
2697        this: t.Optional[exp.Expression]
2698
2699        if self._match(TokenType.DISTINCT):
2700            this = self.expression(
2701                exp.Distinct, expressions=self._parse_csv(self._parse_conjunction)
2702            )
2703        else:
2704            this = self._parse_select_or_expression()
2705
2706        if self._match(TokenType.IGNORE_NULLS):
2707            this = self.expression(exp.IgnoreNulls, this=this)
2708        else:
2709            self._match(TokenType.RESPECT_NULLS)
2710
2711        return self._parse_limit(self._parse_order(this))
2712
2713    def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2714        index = self._index
2715        if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT):
2716            self._retreat(index)
2717            return this
2718
2719        args = self._parse_csv(
2720            lambda: self._parse_constraint()
2721            or self._parse_column_def(self._parse_field(any_token=True))
2722        )
2723        self._match_r_paren()
2724        return self.expression(exp.Schema, this=this, expressions=args)
2725
2726    def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2727        kind = self._parse_types()
2728
2729        constraints = []
2730        while True:
2731            constraint = self._parse_column_constraint()
2732            if not constraint:
2733                break
2734            constraints.append(constraint)
2735
2736        if not kind and not constraints:
2737            return this
2738
2739        return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints)
2740
2741    def _parse_column_constraint(self) -> t.Optional[exp.Expression]:
2742        this = self._parse_references()
2743
2744        if this:
2745            return this
2746
2747        if self._match(TokenType.CONSTRAINT):
2748            this = self._parse_id_var()
2749
2750        kind: exp.Expression
2751
2752        if self._match_set((TokenType.AUTO_INCREMENT, TokenType.IDENTITY)):
2753            start = None
2754            increment = None
2755
2756            if self._match(TokenType.L_PAREN, advance=False):
2757                args = self._parse_wrapped_csv(self._parse_bitwise)
2758                start = seq_get(args, 0)
2759                increment = seq_get(args, 1)
2760            elif self._match_text_seq("START"):
2761                start = self._parse_bitwise()
2762                self._match_text_seq("INCREMENT")
2763                increment = self._parse_bitwise()
2764
2765            if start and increment:
2766                kind = exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment)
2767            else:
2768                kind = exp.AutoIncrementColumnConstraint()
2769        elif self._match(TokenType.CHECK):
2770            constraint = self._parse_wrapped(self._parse_conjunction)
2771            kind = self.expression(exp.CheckColumnConstraint, this=constraint)
2772        elif self._match(TokenType.COLLATE):
2773            kind = self.expression(exp.CollateColumnConstraint, this=self._parse_var())
2774        elif self._match(TokenType.ENCODE):
2775            kind = self.expression(exp.EncodeColumnConstraint, this=self._parse_var())
2776        elif self._match(TokenType.DEFAULT):
2777            kind = self.expression(exp.DefaultColumnConstraint, this=self._parse_bitwise())
2778        elif self._match_pair(TokenType.NOT, TokenType.NULL):
2779            kind = exp.NotNullColumnConstraint()
2780        elif self._match(TokenType.NULL):
2781            kind = exp.NotNullColumnConstraint(allow_null=True)
2782        elif self._match(TokenType.SCHEMA_COMMENT):
2783            kind = self.expression(exp.CommentColumnConstraint, this=self._parse_string())
2784        elif self._match(TokenType.PRIMARY_KEY):
2785            desc = None
2786            if self._match(TokenType.ASC) or self._match(TokenType.DESC):
2787                desc = self._prev.token_type == TokenType.DESC
2788            kind = exp.PrimaryKeyColumnConstraint(desc=desc)
2789        elif self._match(TokenType.UNIQUE):
2790            kind = exp.UniqueColumnConstraint()
2791        elif self._match(TokenType.GENERATED):
2792            if self._match(TokenType.BY_DEFAULT):
2793                kind = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False)
2794            else:
2795                self._match(TokenType.ALWAYS)
2796                kind = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True)
2797            self._match_pair(TokenType.ALIAS, TokenType.IDENTITY)
2798
2799            if self._match(TokenType.L_PAREN):
2800                if self._match_text_seq("START", "WITH"):
2801                    kind.set("start", self._parse_bitwise())
2802                if self._match_text_seq("INCREMENT", "BY"):
2803                    kind.set("increment", self._parse_bitwise())
2804
2805                self._match_r_paren()
2806        else:
2807            return this
2808
2809        return self.expression(exp.ColumnConstraint, this=this, kind=kind)
2810
2811    def _parse_constraint(self) -> t.Optional[exp.Expression]:
2812        if not self._match(TokenType.CONSTRAINT):
2813            return self._parse_unnamed_constraint()
2814
2815        this = self._parse_id_var()
2816        expressions = []
2817
2818        while True:
2819            constraint = self._parse_unnamed_constraint() or self._parse_function()
2820            if not constraint:
2821                break
2822            expressions.append(constraint)
2823
2824        return self.expression(exp.Constraint, this=this, expressions=expressions)
2825
2826    def _parse_unnamed_constraint(self) -> t.Optional[exp.Expression]:
2827        if not self._match_set(self.CONSTRAINT_PARSERS):
2828            return None
2829        return self.CONSTRAINT_PARSERS[self._prev.token_type](self)
2830
2831    def _parse_unique(self) -> exp.Expression:
2832        return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars())
2833
2834    def _parse_key_constraint_options(self) -> t.List[str]:
2835        options = []
2836        while True:
2837            if not self._curr:
2838                break
2839
2840            if self._match(TokenType.ON):
2841                action = None
2842                on = self._advance_any() and self._prev.text
2843
2844                if self._match(TokenType.NO_ACTION):
2845                    action = "NO ACTION"
2846                elif self._match(TokenType.CASCADE):
2847                    action = "CASCADE"
2848                elif self._match_pair(TokenType.SET, TokenType.NULL):
2849                    action = "SET NULL"
2850                elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
2851                    action = "SET DEFAULT"
2852                else:
2853                    self.raise_error("Invalid key constraint")
2854
2855                options.append(f"ON {on} {action}")
2856            elif self._match_text_seq("NOT", "ENFORCED"):
2857                options.append("NOT ENFORCED")
2858            elif self._match_text_seq("DEFERRABLE"):
2859                options.append("DEFERRABLE")
2860            elif self._match_text_seq("INITIALLY", "DEFERRED"):
2861                options.append("INITIALLY DEFERRED")
2862            elif self._match_text_seq("NORELY"):
2863                options.append("NORELY")
2864            elif self._match_text_seq("MATCH", "FULL"):
2865                options.append("MATCH FULL")
2866            else:
2867                break
2868
2869        return options
2870
2871    def _parse_references(self) -> t.Optional[exp.Expression]:
2872        if not self._match(TokenType.REFERENCES):
2873            return None
2874
2875        expressions = None
2876        this = self._parse_id_var()
2877
2878        if self._match(TokenType.L_PAREN, advance=False):
2879            expressions = self._parse_wrapped_id_vars()
2880
2881        options = self._parse_key_constraint_options()
2882        return self.expression(exp.Reference, this=this, expressions=expressions, options=options)
2883
2884    def _parse_foreign_key(self) -> exp.Expression:
2885        expressions = self._parse_wrapped_id_vars()
2886        reference = self._parse_references()
2887        options = {}
2888
2889        while self._match(TokenType.ON):
2890            if not self._match_set((TokenType.DELETE, TokenType.UPDATE)):
2891                self.raise_error("Expected DELETE or UPDATE")
2892
2893            kind = self._prev.text.lower()
2894
2895            if self._match(TokenType.NO_ACTION):
2896                action = "NO ACTION"
2897            elif self._match(TokenType.SET):
2898                self._match_set((TokenType.NULL, TokenType.DEFAULT))
2899                action = "SET " + self._prev.text.upper()
2900            else:
2901                self._advance()
2902                action = self._prev.text.upper()
2903
2904            options[kind] = action
2905
2906        return self.expression(
2907            exp.ForeignKey, expressions=expressions, reference=reference, **options  # type: ignore
2908        )
2909
2910    def _parse_primary_key(self) -> exp.Expression:
2911        expressions = self._parse_wrapped_id_vars()
2912        options = self._parse_key_constraint_options()
2913        return self.expression(exp.PrimaryKey, expressions=expressions, options=options)
2914
2915    def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2916        if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)):
2917            return this
2918
2919        bracket_kind = self._prev.token_type
2920        expressions: t.List[t.Optional[exp.Expression]]
2921
2922        if self._match(TokenType.COLON):
2923            expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())]
2924        else:
2925            expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction()))
2926
2927        # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
2928        if bracket_kind == TokenType.L_BRACE:
2929            this = self.expression(exp.Struct, expressions=expressions)
2930        elif not this or this.name.upper() == "ARRAY":
2931            this = self.expression(exp.Array, expressions=expressions)
2932        else:
2933            expressions = apply_index_offset(expressions, -self.index_offset)
2934            this = self.expression(exp.Bracket, this=this, expressions=expressions)
2935
2936        if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET:
2937            self.raise_error("Expected ]")
2938        elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE:
2939            self.raise_error("Expected }")
2940
2941        this.comments = self._prev_comments
2942        return self._parse_bracket(this)
2943
2944    def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2945        if self._match(TokenType.COLON):
2946            return self.expression(exp.Slice, this=this, expression=self._parse_conjunction())
2947        return this
2948
2949    def _parse_case(self) -> t.Optional[exp.Expression]:
2950        ifs = []
2951        default = None
2952
2953        expression = self._parse_conjunction()
2954
2955        while self._match(TokenType.WHEN):
2956            this = self._parse_conjunction()
2957            self._match(TokenType.THEN)
2958            then = self._parse_conjunction()
2959            ifs.append(self.expression(exp.If, this=this, true=then))
2960
2961        if self._match(TokenType.ELSE):
2962            default = self._parse_conjunction()
2963
2964        if not self._match(TokenType.END):
2965            self.raise_error("Expected END after CASE", self._prev)
2966
2967        return self._parse_window(
2968            self.expression(exp.Case, this=expression, ifs=ifs, default=default)
2969        )
2970
2971    def _parse_if(self) -> t.Optional[exp.Expression]:
2972        if self._match(TokenType.L_PAREN):
2973            args = self._parse_csv(self._parse_conjunction)
2974            this = exp.If.from_arg_list(args)
2975            self.validate_expression(this, args)
2976            self._match_r_paren()
2977        else:
2978            condition = self._parse_conjunction()
2979            self._match(TokenType.THEN)
2980            true = self._parse_conjunction()
2981            false = self._parse_conjunction() if self._match(TokenType.ELSE) else None
2982            self._match(TokenType.END)
2983            this = self.expression(exp.If, this=condition, true=true, false=false)
2984
2985        return self._parse_window(this)
2986
2987    def _parse_extract(self) -> exp.Expression:
2988        this = self._parse_function() or self._parse_var() or self._parse_type()
2989
2990        if self._match(TokenType.FROM):
2991            return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
2992
2993        if not self._match(TokenType.COMMA):
2994            self.raise_error("Expected FROM or comma after EXTRACT", self._prev)
2995
2996        return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
2997
2998    def _parse_cast(self, strict: bool) -> exp.Expression:
2999        this = self._parse_conjunction()
3000
3001        if not self._match(TokenType.ALIAS):
3002            self.raise_error("Expected AS after CAST")
3003
3004        to = self._parse_types()
3005
3006        if not to:
3007            self.raise_error("Expected TYPE after CAST")
3008        elif to.this == exp.DataType.Type.CHAR:
3009            if self._match(TokenType.CHARACTER_SET):
3010                to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())
3011
3012        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3013
3014    def _parse_string_agg(self) -> exp.Expression:
3015        expression: t.Optional[exp.Expression]
3016
3017        if self._match(TokenType.DISTINCT):
3018            args = self._parse_csv(self._parse_conjunction)
3019            expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)])
3020        else:
3021            args = self._parse_csv(self._parse_conjunction)
3022            expression = seq_get(args, 0)
3023
3024        index = self._index
3025        if not self._match(TokenType.R_PAREN):
3026            # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]])
3027            order = self._parse_order(this=expression)
3028            return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3029
3030        # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]).
3031        # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that
3032        # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them.
3033        if not self._match(TokenType.WITHIN_GROUP):
3034            self._retreat(index)
3035            this = exp.GroupConcat.from_arg_list(args)
3036            self.validate_expression(this, args)
3037            return this
3038
3039        self._match_l_paren()  # The corresponding match_r_paren will be called in parse_function (caller)
3040        order = self._parse_order(this=expression)
3041        return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3042
3043    def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]:
3044        to: t.Optional[exp.Expression]
3045        this = self._parse_column()
3046
3047        if self._match(TokenType.USING):
3048            to = self.expression(exp.CharacterSet, this=self._parse_var())
3049        elif self._match(TokenType.COMMA):
3050            to = self._parse_types()
3051        else:
3052            to = None
3053
3054        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3055
3056    def _parse_position(self, haystack_first: bool = False) -> exp.Expression:
3057        args = self._parse_csv(self._parse_bitwise)
3058
3059        if self._match(TokenType.IN):
3060            return self.expression(
3061                exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0)
3062            )
3063
3064        if haystack_first:
3065            haystack = seq_get(args, 0)
3066            needle = seq_get(args, 1)
3067        else:
3068            needle = seq_get(args, 0)
3069            haystack = seq_get(args, 1)
3070
3071        this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2))
3072
3073        self.validate_expression(this, args)
3074
3075        return this
3076
3077    def _parse_join_hint(self, func_name: str) -> exp.Expression:
3078        args = self._parse_csv(self._parse_table)
3079        return exp.JoinHint(this=func_name.upper(), expressions=args)
3080
3081    def _parse_substring(self) -> exp.Expression:
3082        # Postgres supports the form: substring(string [from int] [for int])
3083        # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
3084
3085        args = self._parse_csv(self._parse_bitwise)
3086
3087        if self._match(TokenType.FROM):
3088            args.append(self._parse_bitwise())
3089            if self._match(TokenType.FOR):
3090                args.append(self._parse_bitwise())
3091
3092        this = exp.Substring.from_arg_list(args)
3093        self.validate_expression(this, args)
3094
3095        return this
3096
3097    def _parse_trim(self) -> exp.Expression:
3098        # https://www.w3resource.com/sql/character-functions/trim.php
3099        # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html
3100
3101        position = None
3102        collation = None
3103
3104        if self._match_set(self.TRIM_TYPES):
3105            position = self._prev.text.upper()
3106
3107        expression = self._parse_term()
3108        if self._match_set((TokenType.FROM, TokenType.COMMA)):
3109            this = self._parse_term()
3110        else:
3111            this = expression
3112            expression = None
3113
3114        if self._match(TokenType.COLLATE):
3115            collation = self._parse_term()
3116
3117        return self.expression(
3118            exp.Trim,
3119            this=this,
3120            position=position,
3121            expression=expression,
3122            collation=collation,
3123        )
3124
3125    def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3126        return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window)
3127
3128    def _parse_named_window(self) -> t.Optional[exp.Expression]:
3129        return self._parse_window(self._parse_id_var(), alias=True)
3130
3131    def _parse_window(
3132        self, this: t.Optional[exp.Expression], alias: bool = False
3133    ) -> t.Optional[exp.Expression]:
3134        if self._match(TokenType.FILTER):
3135            where = self._parse_wrapped(self._parse_where)
3136            this = self.expression(exp.Filter, this=this, expression=where)
3137
3138        # T-SQL allows the OVER (...) syntax after WITHIN GROUP.
3139        # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
3140        if self._match(TokenType.WITHIN_GROUP):
3141            order = self._parse_wrapped(self._parse_order)
3142            this = self.expression(exp.WithinGroup, this=this, expression=order)
3143
3144        # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
3145        # Some dialects choose to implement and some do not.
3146        # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
3147
3148        # There is some code above in _parse_lambda that handles
3149        #   SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ...
3150
3151        # The below changes handle
3152        #   SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ...
3153
3154        # Oracle allows both formats
3155        #   (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html)
3156        #   and Snowflake chose to do the same for familiarity
3157        #   https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes
3158        if self._match(TokenType.IGNORE_NULLS):
3159            this = self.expression(exp.IgnoreNulls, this=this)
3160        elif self._match(TokenType.RESPECT_NULLS):
3161            this = self.expression(exp.RespectNulls, this=this)
3162
3163        # bigquery select from window x AS (partition by ...)
3164        if alias:
3165            self._match(TokenType.ALIAS)
3166        elif not self._match(TokenType.OVER):
3167            return this
3168
3169        if not self._match(TokenType.L_PAREN):
3170            return self.expression(exp.Window, this=this, alias=self._parse_id_var(False))
3171
3172        window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
3173        partition = self._parse_partition_by()
3174        order = self._parse_order()
3175        kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text
3176
3177        if kind:
3178            self._match(TokenType.BETWEEN)
3179            start = self._parse_window_spec()
3180            self._match(TokenType.AND)
3181            end = self._parse_window_spec()
3182
3183            spec = self.expression(
3184                exp.WindowSpec,
3185                kind=kind,
3186                start=start["value"],
3187                start_side=start["side"],
3188                end=end["value"],
3189                end_side=end["side"],
3190            )
3191        else:
3192            spec = None
3193
3194        self._match_r_paren()
3195
3196        return self.expression(
3197            exp.Window,
3198            this=this,
3199            partition_by=partition,
3200            order=order,
3201            spec=spec,
3202            alias=window_alias,
3203        )
3204
3205    def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]:
3206        self._match(TokenType.BETWEEN)
3207
3208        return {
3209            "value": (
3210                self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text
3211            )
3212            or self._parse_bitwise(),
3213            "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text,
3214        }
3215
3216    def _parse_alias(
3217        self, this: t.Optional[exp.Expression], explicit: bool = False
3218    ) -> t.Optional[exp.Expression]:
3219        any_token = self._match(TokenType.ALIAS)
3220
3221        if explicit and not any_token:
3222            return this
3223
3224        if self._match(TokenType.L_PAREN):
3225            aliases = self.expression(
3226                exp.Aliases,
3227                this=this,
3228                expressions=self._parse_csv(lambda: self._parse_id_var(any_token)),
3229            )
3230            self._match_r_paren(aliases)
3231            return aliases
3232
3233        alias = self._parse_id_var(any_token)
3234
3235        if alias:
3236            return self.expression(exp.Alias, this=this, alias=alias)
3237
3238        return this
3239
3240    def _parse_id_var(
3241        self,
3242        any_token: bool = True,
3243        tokens: t.Optional[t.Collection[TokenType]] = None,
3244        prefix_tokens: t.Optional[t.Collection[TokenType]] = None,
3245    ) -> t.Optional[exp.Expression]:
3246        identifier = self._parse_identifier()
3247
3248        if identifier:
3249            return identifier
3250
3251        prefix = ""
3252
3253        if prefix_tokens:
3254            while self._match_set(prefix_tokens):
3255                prefix += self._prev.text
3256
3257        if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS):
3258            quoted = self._prev.token_type == TokenType.STRING
3259            return exp.Identifier(this=prefix + self._prev.text, quoted=quoted)
3260
3261        return None
3262
3263    def _parse_string(self) -> t.Optional[exp.Expression]:
3264        if self._match(TokenType.STRING):
3265            return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev)
3266        return self._parse_placeholder()
3267
3268    def _parse_number(self) -> t.Optional[exp.Expression]:
3269        if self._match(TokenType.NUMBER):
3270            return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev)
3271        return self._parse_placeholder()
3272
3273    def _parse_identifier(self) -> t.Optional[exp.Expression]:
3274        if self._match(TokenType.IDENTIFIER):
3275            return self.expression(exp.Identifier, this=self._prev.text, quoted=True)
3276        return self._parse_placeholder()
3277
3278    def _parse_var(self, any_token: bool = False) -> t.Optional[exp.Expression]:
3279        if (any_token and self._advance_any()) or self._match(TokenType.VAR):
3280            return self.expression(exp.Var, this=self._prev.text)
3281        return self._parse_placeholder()
3282
3283    def _advance_any(self) -> t.Optional[Token]:
3284        if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS:
3285            self._advance()
3286            return self._prev
3287        return None
3288
3289    def _parse_var_or_string(self) -> t.Optional[exp.Expression]:
3290        return self._parse_var() or self._parse_string()
3291
3292    def _parse_null(self) -> t.Optional[exp.Expression]:
3293        if self._match(TokenType.NULL):
3294            return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev)
3295        return None
3296
3297    def _parse_boolean(self) -> t.Optional[exp.Expression]:
3298        if self._match(TokenType.TRUE):
3299            return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev)
3300        if self._match(TokenType.FALSE):
3301            return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev)
3302        return None
3303
3304    def _parse_star(self) -> t.Optional[exp.Expression]:
3305        if self._match(TokenType.STAR):
3306            return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev)
3307        return None
3308
3309    def _parse_placeholder(self) -> t.Optional[exp.Expression]:
3310        if self._match_set(self.PLACEHOLDER_PARSERS):
3311            placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self)
3312            if placeholder:
3313                return placeholder
3314            self._advance(-1)
3315        return None
3316
3317    def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3318        if not self._match(TokenType.EXCEPT):
3319            return None
3320        if self._match(TokenType.L_PAREN, advance=False):
3321            return self._parse_wrapped_csv(self._parse_column)
3322        return self._parse_csv(self._parse_column)
3323
3324    def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3325        if not self._match(TokenType.REPLACE):
3326            return None
3327        if self._match(TokenType.L_PAREN, advance=False):
3328            return self._parse_wrapped_csv(self._parse_expression)
3329        return self._parse_csv(self._parse_expression)
3330
3331    def _parse_csv(
3332        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
3333    ) -> t.List[t.Optional[exp.Expression]]:
3334        parse_result = parse_method()
3335        items = [parse_result] if parse_result is not None else []
3336
3337        while self._match(sep):
3338            if parse_result and self._prev_comments:
3339                parse_result.comments = self._prev_comments
3340
3341            parse_result = parse_method()
3342            if parse_result is not None:
3343                items.append(parse_result)
3344
3345        return items
3346
3347    def _parse_tokens(
3348        self, parse_method: t.Callable, expressions: t.Dict
3349    ) -> t.Optional[exp.Expression]:
3350        this = parse_method()
3351
3352        while self._match_set(expressions):
3353            this = self.expression(
3354                expressions[self._prev.token_type],
3355                this=this,
3356                comments=self._prev_comments,
3357                expression=parse_method(),
3358            )
3359
3360        return this
3361
3362    def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]:
3363        return self._parse_wrapped_csv(self._parse_id_var)
3364
3365    def _parse_wrapped_csv(
3366        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
3367    ) -> t.List[t.Optional[exp.Expression]]:
3368        return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep))
3369
3370    def _parse_wrapped(self, parse_method: t.Callable) -> t.Any:
3371        self._match_l_paren()
3372        parse_result = parse_method()
3373        self._match_r_paren()
3374        return parse_result
3375
3376    def _parse_select_or_expression(self) -> t.Optional[exp.Expression]:
3377        return self._parse_select() or self._parse_expression()
3378
3379    def _parse_ddl_select(self) -> t.Optional[exp.Expression]:
3380        return self._parse_set_operations(
3381            self._parse_select(nested=True, parse_subquery_alias=False)
3382        )
3383
3384    def _parse_transaction(self) -> exp.Expression:
3385        this = None
3386        if self._match_texts(self.TRANSACTION_KIND):
3387            this = self._prev.text
3388
3389        self._match_texts({"TRANSACTION", "WORK"})
3390
3391        modes = []
3392        while True:
3393            mode = []
3394            while self._match(TokenType.VAR):
3395                mode.append(self._prev.text)
3396
3397            if mode:
3398                modes.append(" ".join(mode))
3399            if not self._match(TokenType.COMMA):
3400                break
3401
3402        return self.expression(exp.Transaction, this=this, modes=modes)
3403
3404    def _parse_commit_or_rollback(self) -> exp.Expression:
3405        chain = None
3406        savepoint = None
3407        is_rollback = self._prev.token_type == TokenType.ROLLBACK
3408
3409        self._match_texts({"TRANSACTION", "WORK"})
3410
3411        if self._match_text_seq("TO"):
3412            self._match_text_seq("SAVEPOINT")
3413            savepoint = self._parse_id_var()
3414
3415        if self._match(TokenType.AND):
3416            chain = not self._match_text_seq("NO")
3417            self._match_text_seq("CHAIN")
3418
3419        if is_rollback:
3420            return self.expression(exp.Rollback, savepoint=savepoint)
3421        return self.expression(exp.Commit, chain=chain)
3422
3423    def _parse_add_column(self) -> t.Optional[exp.Expression]:
3424        if not self._match_text_seq("ADD"):
3425            return None
3426
3427        self._match(TokenType.COLUMN)
3428        exists_column = self._parse_exists(not_=True)
3429        expression = self._parse_column_def(self._parse_field(any_token=True))
3430
3431        if expression:
3432            expression.set("exists", exists_column)
3433
3434        return expression
3435
3436    def _parse_drop_column(self) -> t.Optional[exp.Expression]:
3437        return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN")
3438
3439    # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html
3440    def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression:
3441        return self.expression(
3442            exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists
3443        )
3444
3445    def _parse_add_constraint(self) -> t.Optional[exp.Expression]:
3446        this = None
3447        kind = self._prev.token_type
3448
3449        if kind == TokenType.CONSTRAINT:
3450            this = self._parse_id_var()
3451
3452            if self._match(TokenType.CHECK):
3453                expression = self._parse_wrapped(self._parse_conjunction)
3454                enforced = self._match_text_seq("ENFORCED")
3455
3456                return self.expression(
3457                    exp.AddConstraint, this=this, expression=expression, enforced=enforced
3458                )
3459
3460        if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY):
3461            expression = self._parse_foreign_key()
3462        elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY):
3463            expression = self._parse_primary_key()
3464
3465        return self.expression(exp.AddConstraint, this=this, expression=expression)
3466
3467    def _parse_alter(self) -> t.Optional[exp.Expression]:
3468        if not self._match(TokenType.TABLE):
3469            return self._parse_as_command(self._prev)
3470
3471        exists = self._parse_exists()
3472        this = self._parse_table(schema=True)
3473
3474        actions: t.Optional[exp.Expression | t.List[t.Optional[exp.Expression]]] = None
3475
3476        index = self._index
3477        if self._match(TokenType.DELETE):
3478            actions = [self.expression(exp.Delete, where=self._parse_where())]
3479        elif self._match_text_seq("ADD"):
3480            if self._match_set(self.ADD_CONSTRAINT_TOKENS):
3481                actions = self._parse_csv(self._parse_add_constraint)
3482            else:
3483                self._retreat(index)
3484                actions = self._parse_csv(self._parse_add_column)
3485        elif self._match_text_seq("DROP"):
3486            partition_exists = self._parse_exists()
3487
3488            if self._match(TokenType.PARTITION, advance=False):
3489                actions = self._parse_csv(
3490                    lambda: self._parse_drop_partition(exists=partition_exists)
3491                )
3492            else:
3493                self._retreat(index)
3494                actions = self._parse_csv(self._parse_drop_column)
3495        elif self._match_text_seq("RENAME", "TO"):
3496            actions = self.expression(exp.RenameTable, this=self._parse_table(schema=True))
3497        elif self._match_text_seq("ALTER"):
3498            self._match(TokenType.COLUMN)
3499            column = self._parse_field(any_token=True)
3500
3501            if self._match_pair(TokenType.DROP, TokenType.DEFAULT):
3502                actions = self.expression(exp.AlterColumn, this=column, drop=True)
3503            elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
3504                actions = self.expression(
3505                    exp.AlterColumn, this=column, default=self._parse_conjunction()
3506                )
3507            else:
3508                self._match_text_seq("SET", "DATA")
3509                actions = self.expression(
3510                    exp.AlterColumn,
3511                    this=column,
3512                    dtype=self._match_text_seq("TYPE") and self._parse_types(),
3513                    collate=self._match(TokenType.COLLATE) and self._parse_term(),
3514                    using=self._match(TokenType.USING) and self._parse_conjunction(),
3515                )
3516
3517        actions = ensure_list(actions)
3518        return self.expression(exp.AlterTable, this=this, exists=exists, actions=actions)
3519
3520    def _parse_show(self) -> t.Optional[exp.Expression]:
3521        parser = self._find_parser(self.SHOW_PARSERS, self._show_trie)  # type: ignore
3522        if parser:
3523            return parser(self)
3524        self._advance()
3525        return self.expression(exp.Show, this=self._prev.text.upper())
3526
3527    def _default_parse_set_item(self) -> exp.Expression:
3528        return self.expression(
3529            exp.SetItem,
3530            this=self._parse_statement(),
3531        )
3532
3533    def _parse_set_item(self) -> t.Optional[exp.Expression]:
3534        parser = self._find_parser(self.SET_PARSERS, self._set_trie)  # type: ignore
3535        return parser(self) if parser else self._default_parse_set_item()
3536
3537    def _parse_merge(self) -> exp.Expression:
3538        self._match(TokenType.INTO)
3539        target = self._parse_table()
3540
3541        self._match(TokenType.USING)
3542        using = self._parse_table()
3543
3544        self._match(TokenType.ON)
3545        on = self._parse_conjunction()
3546
3547        whens = []
3548        while self._match(TokenType.WHEN):
3549            this = self._parse_conjunction()
3550            self._match(TokenType.THEN)
3551
3552            if self._match(TokenType.INSERT):
3553                _this = self._parse_star()
3554                if _this:
3555                    then = self.expression(exp.Insert, this=_this)
3556                else:
3557                    then = self.expression(
3558                        exp.Insert,
3559                        this=self._parse_value(),
3560                        expression=self._match(TokenType.VALUES) and self._parse_value(),
3561                    )
3562            elif self._match(TokenType.UPDATE):
3563                expressions = self._parse_star()
3564                if expressions:
3565                    then = self.expression(exp.Update, expressions=expressions)
3566                else:
3567                    then = self.expression(
3568                        exp.Update,
3569                        expressions=self._match(TokenType.SET)
3570                        and self._parse_csv(self._parse_equality),
3571                    )
3572            elif self._match(TokenType.DELETE):
3573                then = self.expression(exp.Var, this=self._prev.text)
3574
3575            whens.append(self.expression(exp.When, this=this, then=then))
3576
3577        return self.expression(
3578            exp.Merge,
3579            this=target,
3580            using=using,
3581            on=on,
3582            expressions=whens,
3583        )
3584
3585    def _parse_set(self) -> exp.Expression:
3586        return self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item))
3587
3588    def _parse_as_command(self, start: Token) -> exp.Command:
3589        while self._curr:
3590            self._advance()
3591        return exp.Command(this=self._find_sql(start, self._prev))
3592
3593    def _find_parser(
3594        self, parsers: t.Dict[str, t.Callable], trie: t.Dict
3595    ) -> t.Optional[t.Callable]:
3596        index = self._index
3597        this = []
3598        while True:
3599            # The current token might be multiple words
3600            curr = self._curr.text.upper()
3601            key = curr.split(" ")
3602            this.append(curr)
3603            self._advance()
3604            result, trie = in_trie(trie, key)
3605            if result == 0:
3606                break
3607            if result == 2:
3608                subparser = parsers[" ".join(this)]
3609                return subparser
3610        self._retreat(index)
3611        return None
3612
3613    def _match(self, token_type, advance=True):
3614        if not self._curr:
3615            return None
3616
3617        if self._curr.token_type == token_type:
3618            if advance:
3619                self._advance()
3620            return True
3621
3622        return None
3623
3624    def _match_set(self, types):
3625        if not self._curr:
3626            return None
3627
3628        if self._curr.token_type in types:
3629            self._advance()
3630            return True
3631
3632        return None
3633
3634    def _match_pair(self, token_type_a, token_type_b, advance=True):
3635        if not self._curr or not self._next:
3636            return None
3637
3638        if self._curr.token_type == token_type_a and self._next.token_type == token_type_b:
3639            if advance:
3640                self._advance(2)
3641            return True
3642
3643        return None
3644
3645    def _match_l_paren(self, expression=None):
3646        if not self._match(TokenType.L_PAREN):
3647            self.raise_error("Expecting (")
3648        if expression and self._prev_comments:
3649            expression.comments = self._prev_comments
3650
3651    def _match_r_paren(self, expression=None):
3652        if not self._match(TokenType.R_PAREN):
3653            self.raise_error("Expecting )")
3654        if expression and self._prev_comments:
3655            expression.comments = self._prev_comments
3656
3657    def _match_texts(self, texts):
3658        if self._curr and self._curr.text.upper() in texts:
3659            self._advance()
3660            return True
3661        return False
3662
3663    def _match_text_seq(self, *texts, advance=True):
3664        index = self._index
3665        for text in texts:
3666            if self._curr and self._curr.text.upper() == text:
3667                self._advance()
3668            else:
3669                self._retreat(index)
3670                return False
3671
3672        if not advance:
3673            self._retreat(index)
3674
3675        return True
3676
3677    def _replace_columns_with_dots(self, this):
3678        if isinstance(this, exp.Dot):
3679            exp.replace_children(this, self._replace_columns_with_dots)
3680        elif isinstance(this, exp.Column):
3681            exp.replace_children(this, self._replace_columns_with_dots)
3682            table = this.args.get("table")
3683            this = (
3684                self.expression(exp.Dot, this=table, expression=this.this)
3685                if table
3686                else self.expression(exp.Var, this=this.name)
3687            )
3688        elif isinstance(this, exp.Identifier):
3689            this = self.expression(exp.Var, this=this.name)
3690        return this
3691
3692    def _replace_lambda(self, node, lambda_variables):
3693        if isinstance(node, exp.Column):
3694            if node.name in lambda_variables:
3695                return node.this
3696        return node
def parse_var_map(args):
22def parse_var_map(args):
23    keys = []
24    values = []
25    for i in range(0, len(args), 2):
26        keys.append(args[i])
27        values.append(args[i + 1])
28    return exp.VarMap(
29        keys=exp.Array(expressions=keys),
30        values=exp.Array(expressions=values),
31    )
class Parser:
  42class Parser(metaclass=_Parser):
  43    """
  44    Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces
  45    a parsed syntax tree.
  46
  47    Args:
  48        error_level: the desired error level.
  49            Default: ErrorLevel.RAISE
  50        error_message_context: determines the amount of context to capture from a
  51            query string when displaying the error message (in number of characters).
  52            Default: 50.
  53        index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list.
  54            Default: 0
  55        alias_post_tablesample: If the table alias comes after tablesample.
  56            Default: False
  57        max_errors: Maximum number of error messages to include in a raised ParseError.
  58            This is only relevant if error_level is ErrorLevel.RAISE.
  59            Default: 3
  60        null_ordering: Indicates the default null ordering method to use if not explicitly set.
  61            Options are "nulls_are_small", "nulls_are_large", "nulls_are_last".
  62            Default: "nulls_are_small"
  63    """
  64
  65    FUNCTIONS: t.Dict[str, t.Callable] = {
  66        **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()},
  67        "DATE_TO_DATE_STR": lambda args: exp.Cast(
  68            this=seq_get(args, 0),
  69            to=exp.DataType(this=exp.DataType.Type.TEXT),
  70        ),
  71        "TIME_TO_TIME_STR": lambda args: exp.Cast(
  72            this=seq_get(args, 0),
  73            to=exp.DataType(this=exp.DataType.Type.TEXT),
  74        ),
  75        "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
  76            this=exp.Cast(
  77                this=seq_get(args, 0),
  78                to=exp.DataType(this=exp.DataType.Type.TEXT),
  79            ),
  80            start=exp.Literal.number(1),
  81            length=exp.Literal.number(10),
  82        ),
  83        "VAR_MAP": parse_var_map,
  84        "IFNULL": exp.Coalesce.from_arg_list,
  85    }
  86
  87    NO_PAREN_FUNCTIONS = {
  88        TokenType.CURRENT_DATE: exp.CurrentDate,
  89        TokenType.CURRENT_DATETIME: exp.CurrentDate,
  90        TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
  91    }
  92
  93    NESTED_TYPE_TOKENS = {
  94        TokenType.ARRAY,
  95        TokenType.MAP,
  96        TokenType.STRUCT,
  97        TokenType.NULLABLE,
  98    }
  99
 100    TYPE_TOKENS = {
 101        TokenType.BOOLEAN,
 102        TokenType.TINYINT,
 103        TokenType.SMALLINT,
 104        TokenType.INT,
 105        TokenType.BIGINT,
 106        TokenType.FLOAT,
 107        TokenType.DOUBLE,
 108        TokenType.CHAR,
 109        TokenType.NCHAR,
 110        TokenType.VARCHAR,
 111        TokenType.NVARCHAR,
 112        TokenType.TEXT,
 113        TokenType.MEDIUMTEXT,
 114        TokenType.LONGTEXT,
 115        TokenType.MEDIUMBLOB,
 116        TokenType.LONGBLOB,
 117        TokenType.BINARY,
 118        TokenType.VARBINARY,
 119        TokenType.JSON,
 120        TokenType.JSONB,
 121        TokenType.INTERVAL,
 122        TokenType.TIME,
 123        TokenType.TIMESTAMP,
 124        TokenType.TIMESTAMPTZ,
 125        TokenType.TIMESTAMPLTZ,
 126        TokenType.DATETIME,
 127        TokenType.DATE,
 128        TokenType.DECIMAL,
 129        TokenType.UUID,
 130        TokenType.GEOGRAPHY,
 131        TokenType.GEOMETRY,
 132        TokenType.HLLSKETCH,
 133        TokenType.HSTORE,
 134        TokenType.PSEUDO_TYPE,
 135        TokenType.SUPER,
 136        TokenType.SERIAL,
 137        TokenType.SMALLSERIAL,
 138        TokenType.BIGSERIAL,
 139        TokenType.XML,
 140        TokenType.UNIQUEIDENTIFIER,
 141        TokenType.MONEY,
 142        TokenType.SMALLMONEY,
 143        TokenType.ROWVERSION,
 144        TokenType.IMAGE,
 145        TokenType.VARIANT,
 146        TokenType.OBJECT,
 147        *NESTED_TYPE_TOKENS,
 148    }
 149
 150    SUBQUERY_PREDICATES = {
 151        TokenType.ANY: exp.Any,
 152        TokenType.ALL: exp.All,
 153        TokenType.EXISTS: exp.Exists,
 154        TokenType.SOME: exp.Any,
 155    }
 156
 157    RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT}
 158
 159    ID_VAR_TOKENS = {
 160        TokenType.VAR,
 161        TokenType.ALWAYS,
 162        TokenType.ANTI,
 163        TokenType.APPLY,
 164        TokenType.AUTO_INCREMENT,
 165        TokenType.BEGIN,
 166        TokenType.BOTH,
 167        TokenType.BUCKET,
 168        TokenType.CACHE,
 169        TokenType.CASCADE,
 170        TokenType.COLLATE,
 171        TokenType.COLUMN,
 172        TokenType.COMMAND,
 173        TokenType.COMMIT,
 174        TokenType.COMPOUND,
 175        TokenType.CONSTRAINT,
 176        TokenType.CURRENT_TIME,
 177        TokenType.DEFAULT,
 178        TokenType.DELETE,
 179        TokenType.DESCRIBE,
 180        TokenType.DIV,
 181        TokenType.END,
 182        TokenType.EXECUTE,
 183        TokenType.ESCAPE,
 184        TokenType.FALSE,
 185        TokenType.FIRST,
 186        TokenType.FILTER,
 187        TokenType.FOLLOWING,
 188        TokenType.FORMAT,
 189        TokenType.FUNCTION,
 190        TokenType.GENERATED,
 191        TokenType.IDENTITY,
 192        TokenType.IF,
 193        TokenType.INDEX,
 194        TokenType.ISNULL,
 195        TokenType.INTERVAL,
 196        TokenType.LAZY,
 197        TokenType.LEADING,
 198        TokenType.LEFT,
 199        TokenType.LOCAL,
 200        TokenType.MATERIALIZED,
 201        TokenType.MERGE,
 202        TokenType.NATURAL,
 203        TokenType.NEXT,
 204        TokenType.OFFSET,
 205        TokenType.ONLY,
 206        TokenType.OPTIONS,
 207        TokenType.ORDINALITY,
 208        TokenType.PERCENT,
 209        TokenType.PIVOT,
 210        TokenType.PRECEDING,
 211        TokenType.RANGE,
 212        TokenType.REFERENCES,
 213        TokenType.RIGHT,
 214        TokenType.ROW,
 215        TokenType.ROWS,
 216        TokenType.SCHEMA,
 217        TokenType.SCHEMA_COMMENT,
 218        TokenType.SEED,
 219        TokenType.SEMI,
 220        TokenType.SET,
 221        TokenType.SHOW,
 222        TokenType.SORTKEY,
 223        TokenType.TABLE,
 224        TokenType.TEMPORARY,
 225        TokenType.TOP,
 226        TokenType.TRAILING,
 227        TokenType.TRUE,
 228        TokenType.UNBOUNDED,
 229        TokenType.UNIQUE,
 230        TokenType.UNLOGGED,
 231        TokenType.UNPIVOT,
 232        TokenType.PROCEDURE,
 233        TokenType.VIEW,
 234        TokenType.VOLATILE,
 235        TokenType.WINDOW,
 236        *SUBQUERY_PREDICATES,
 237        *TYPE_TOKENS,
 238        *NO_PAREN_FUNCTIONS,
 239    }
 240
 241    TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {
 242        TokenType.APPLY,
 243        TokenType.LEFT,
 244        TokenType.NATURAL,
 245        TokenType.OFFSET,
 246        TokenType.RIGHT,
 247        TokenType.WINDOW,
 248    }
 249
 250    UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
 251
 252    TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH}
 253
 254    FUNC_TOKENS = {
 255        TokenType.COMMAND,
 256        TokenType.CURRENT_DATE,
 257        TokenType.CURRENT_DATETIME,
 258        TokenType.CURRENT_TIMESTAMP,
 259        TokenType.CURRENT_TIME,
 260        TokenType.FILTER,
 261        TokenType.FIRST,
 262        TokenType.FORMAT,
 263        TokenType.IDENTIFIER,
 264        TokenType.INDEX,
 265        TokenType.ISNULL,
 266        TokenType.ILIKE,
 267        TokenType.LIKE,
 268        TokenType.MERGE,
 269        TokenType.OFFSET,
 270        TokenType.PRIMARY_KEY,
 271        TokenType.REPLACE,
 272        TokenType.ROW,
 273        TokenType.UNNEST,
 274        TokenType.VAR,
 275        TokenType.LEFT,
 276        TokenType.RIGHT,
 277        TokenType.DATE,
 278        TokenType.DATETIME,
 279        TokenType.TABLE,
 280        TokenType.TIMESTAMP,
 281        TokenType.TIMESTAMPTZ,
 282        TokenType.WINDOW,
 283        *TYPE_TOKENS,
 284        *SUBQUERY_PREDICATES,
 285    }
 286
 287    CONJUNCTION = {
 288        TokenType.AND: exp.And,
 289        TokenType.OR: exp.Or,
 290    }
 291
 292    EQUALITY = {
 293        TokenType.EQ: exp.EQ,
 294        TokenType.NEQ: exp.NEQ,
 295        TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
 296    }
 297
 298    COMPARISON = {
 299        TokenType.GT: exp.GT,
 300        TokenType.GTE: exp.GTE,
 301        TokenType.LT: exp.LT,
 302        TokenType.LTE: exp.LTE,
 303    }
 304
 305    BITWISE = {
 306        TokenType.AMP: exp.BitwiseAnd,
 307        TokenType.CARET: exp.BitwiseXor,
 308        TokenType.PIPE: exp.BitwiseOr,
 309        TokenType.DPIPE: exp.DPipe,
 310    }
 311
 312    TERM = {
 313        TokenType.DASH: exp.Sub,
 314        TokenType.PLUS: exp.Add,
 315        TokenType.MOD: exp.Mod,
 316        TokenType.COLLATE: exp.Collate,
 317    }
 318
 319    FACTOR = {
 320        TokenType.DIV: exp.IntDiv,
 321        TokenType.LR_ARROW: exp.Distance,
 322        TokenType.SLASH: exp.Div,
 323        TokenType.STAR: exp.Mul,
 324    }
 325
 326    TIMESTAMPS = {
 327        TokenType.TIME,
 328        TokenType.TIMESTAMP,
 329        TokenType.TIMESTAMPTZ,
 330        TokenType.TIMESTAMPLTZ,
 331    }
 332
 333    SET_OPERATIONS = {
 334        TokenType.UNION,
 335        TokenType.INTERSECT,
 336        TokenType.EXCEPT,
 337    }
 338
 339    JOIN_SIDES = {
 340        TokenType.LEFT,
 341        TokenType.RIGHT,
 342        TokenType.FULL,
 343    }
 344
 345    JOIN_KINDS = {
 346        TokenType.INNER,
 347        TokenType.OUTER,
 348        TokenType.CROSS,
 349        TokenType.SEMI,
 350        TokenType.ANTI,
 351    }
 352
 353    LAMBDAS = {
 354        TokenType.ARROW: lambda self, expressions: self.expression(
 355            exp.Lambda,
 356            this=self._parse_conjunction().transform(
 357                self._replace_lambda, {node.name for node in expressions}
 358            ),
 359            expressions=expressions,
 360        ),
 361        TokenType.FARROW: lambda self, expressions: self.expression(
 362            exp.Kwarg,
 363            this=exp.Var(this=expressions[0].name),
 364            expression=self._parse_conjunction(),
 365        ),
 366    }
 367
 368    COLUMN_OPERATORS = {
 369        TokenType.DOT: None,
 370        TokenType.DCOLON: lambda self, this, to: self.expression(
 371            exp.Cast,
 372            this=this,
 373            to=to,
 374        ),
 375        TokenType.ARROW: lambda self, this, path: self.expression(
 376            exp.JSONExtract,
 377            this=this,
 378            expression=path,
 379        ),
 380        TokenType.DARROW: lambda self, this, path: self.expression(
 381            exp.JSONExtractScalar,
 382            this=this,
 383            expression=path,
 384        ),
 385        TokenType.HASH_ARROW: lambda self, this, path: self.expression(
 386            exp.JSONBExtract,
 387            this=this,
 388            expression=path,
 389        ),
 390        TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
 391            exp.JSONBExtractScalar,
 392            this=this,
 393            expression=path,
 394        ),
 395        TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
 396            exp.JSONBContains,
 397            this=this,
 398            expression=key,
 399        ),
 400    }
 401
 402    EXPRESSION_PARSERS = {
 403        exp.Column: lambda self: self._parse_column(),
 404        exp.DataType: lambda self: self._parse_types(),
 405        exp.From: lambda self: self._parse_from(),
 406        exp.Group: lambda self: self._parse_group(),
 407        exp.Identifier: lambda self: self._parse_id_var(),
 408        exp.Lateral: lambda self: self._parse_lateral(),
 409        exp.Join: lambda self: self._parse_join(),
 410        exp.Order: lambda self: self._parse_order(),
 411        exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
 412        exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
 413        exp.Lambda: lambda self: self._parse_lambda(),
 414        exp.Limit: lambda self: self._parse_limit(),
 415        exp.Offset: lambda self: self._parse_offset(),
 416        exp.TableAlias: lambda self: self._parse_table_alias(),
 417        exp.Table: lambda self: self._parse_table(),
 418        exp.Condition: lambda self: self._parse_conjunction(),
 419        exp.Expression: lambda self: self._parse_statement(),
 420        exp.Properties: lambda self: self._parse_properties(),
 421        exp.Where: lambda self: self._parse_where(),
 422        exp.Ordered: lambda self: self._parse_ordered(),
 423        exp.Having: lambda self: self._parse_having(),
 424        exp.With: lambda self: self._parse_with(),
 425        exp.Window: lambda self: self._parse_named_window(),
 426        "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(),
 427    }
 428
 429    STATEMENT_PARSERS = {
 430        TokenType.ALTER: lambda self: self._parse_alter(),
 431        TokenType.BEGIN: lambda self: self._parse_transaction(),
 432        TokenType.CACHE: lambda self: self._parse_cache(),
 433        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
 434        TokenType.CREATE: lambda self: self._parse_create(),
 435        TokenType.DELETE: lambda self: self._parse_delete(),
 436        TokenType.DESC: lambda self: self._parse_describe(),
 437        TokenType.DESCRIBE: lambda self: self._parse_describe(),
 438        TokenType.DROP: lambda self: self._parse_drop(),
 439        TokenType.END: lambda self: self._parse_commit_or_rollback(),
 440        TokenType.INSERT: lambda self: self._parse_insert(),
 441        TokenType.LOAD_DATA: lambda self: self._parse_load_data(),
 442        TokenType.MERGE: lambda self: self._parse_merge(),
 443        TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
 444        TokenType.UNCACHE: lambda self: self._parse_uncache(),
 445        TokenType.UPDATE: lambda self: self._parse_update(),
 446        TokenType.USE: lambda self: self.expression(
 447            exp.Use,
 448            kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"))
 449            and exp.Var(this=self._prev.text),
 450            this=self._parse_table(schema=False),
 451        ),
 452    }
 453
 454    UNARY_PARSERS = {
 455        TokenType.PLUS: lambda self: self._parse_unary(),  # Unary + is handled as a no-op
 456        TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()),
 457        TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()),
 458        TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()),
 459    }
 460
 461    PRIMARY_PARSERS = {
 462        TokenType.STRING: lambda self, token: self.expression(
 463            exp.Literal, this=token.text, is_string=True
 464        ),
 465        TokenType.NUMBER: lambda self, token: self.expression(
 466            exp.Literal, this=token.text, is_string=False
 467        ),
 468        TokenType.STAR: lambda self, _: self.expression(
 469            exp.Star,
 470            **{"except": self._parse_except(), "replace": self._parse_replace()},
 471        ),
 472        TokenType.NULL: lambda self, _: self.expression(exp.Null),
 473        TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True),
 474        TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False),
 475        TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text),
 476        TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text),
 477        TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text),
 478        TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
 479        TokenType.NATIONAL: lambda self, token: self._parse_national(token),
 480        TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
 481    }
 482
 483    PLACEHOLDER_PARSERS = {
 484        TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
 485        TokenType.PARAMETER: lambda self: self.expression(
 486            exp.Parameter, this=self._parse_var() or self._parse_primary()
 487        ),
 488        TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text)
 489        if self._match_set((TokenType.NUMBER, TokenType.VAR))
 490        else None,
 491    }
 492
 493    RANGE_PARSERS = {
 494        TokenType.BETWEEN: lambda self, this: self._parse_between(this),
 495        TokenType.GLOB: lambda self, this: self._parse_escape(
 496            self.expression(exp.Glob, this=this, expression=self._parse_bitwise())
 497        ),
 498        TokenType.IN: lambda self, this: self._parse_in(this),
 499        TokenType.IS: lambda self, this: self._parse_is(this),
 500        TokenType.LIKE: lambda self, this: self._parse_escape(
 501            self.expression(exp.Like, this=this, expression=self._parse_bitwise())
 502        ),
 503        TokenType.ILIKE: lambda self, this: self._parse_escape(
 504            self.expression(exp.ILike, this=this, expression=self._parse_bitwise())
 505        ),
 506        TokenType.IRLIKE: lambda self, this: self.expression(
 507            exp.RegexpILike, this=this, expression=self._parse_bitwise()
 508        ),
 509        TokenType.RLIKE: lambda self, this: self.expression(
 510            exp.RegexpLike, this=this, expression=self._parse_bitwise()
 511        ),
 512        TokenType.SIMILAR_TO: lambda self, this: self.expression(
 513            exp.SimilarTo, this=this, expression=self._parse_bitwise()
 514        ),
 515    }
 516
 517    PROPERTY_PARSERS = {
 518        "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
 519        "CHARACTER SET": lambda self: self._parse_character_set(),
 520        "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
 521        "PARTITION BY": lambda self: self._parse_partitioned_by(),
 522        "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
 523        "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
 524        "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
 525        "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 526        "DISTKEY": lambda self: self._parse_distkey(),
 527        "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
 528        "SORTKEY": lambda self: self._parse_sortkey(),
 529        "LIKE": lambda self: self._parse_create_like(),
 530        "RETURNS": lambda self: self._parse_returns(),
 531        "ROW": lambda self: self._parse_row(),
 532        "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty),
 533        "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 534        "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty),
 535        "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty),
 536        "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
 537        "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
 538        "DETERMINISTIC": lambda self: self.expression(
 539            exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE")
 540        ),
 541        "IMMUTABLE": lambda self: self.expression(
 542            exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE")
 543        ),
 544        "STABLE": lambda self: self.expression(
 545            exp.VolatilityProperty, this=exp.Literal.string("STABLE")
 546        ),
 547        "VOLATILE": lambda self: self.expression(
 548            exp.VolatilityProperty, this=exp.Literal.string("VOLATILE")
 549        ),
 550        "WITH": lambda self: self._parse_with_property(),
 551        "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property),
 552        "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"),
 553        "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"),
 554        "BEFORE": lambda self: self._parse_journal(
 555            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 556        ),
 557        "JOURNAL": lambda self: self._parse_journal(
 558            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 559        ),
 560        "AFTER": lambda self: self._parse_afterjournal(
 561            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 562        ),
 563        "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True),
 564        "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False),
 565        "CHECKSUM": lambda self: self._parse_checksum(),
 566        "FREESPACE": lambda self: self._parse_freespace(),
 567        "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio(
 568            no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT"
 569        ),
 570        "MIN": lambda self: self._parse_datablocksize(),
 571        "MINIMUM": lambda self: self._parse_datablocksize(),
 572        "MAX": lambda self: self._parse_datablocksize(),
 573        "MAXIMUM": lambda self: self._parse_datablocksize(),
 574        "DATABLOCKSIZE": lambda self: self._parse_datablocksize(
 575            default=self._prev.text.upper() == "DEFAULT"
 576        ),
 577        "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
 578        "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
 579        "DEFINER": lambda self: self._parse_definer(),
 580    }
 581
 582    CONSTRAINT_PARSERS = {
 583        TokenType.CHECK: lambda self: self.expression(
 584            exp.Check, this=self._parse_wrapped(self._parse_conjunction)
 585        ),
 586        TokenType.FOREIGN_KEY: lambda self: self._parse_foreign_key(),
 587        TokenType.UNIQUE: lambda self: self._parse_unique(),
 588        TokenType.LIKE: lambda self: self._parse_create_like(),
 589    }
 590
 591    NO_PAREN_FUNCTION_PARSERS = {
 592        TokenType.CASE: lambda self: self._parse_case(),
 593        TokenType.IF: lambda self: self._parse_if(),
 594    }
 595
 596    FUNCTION_PARSERS: t.Dict[str, t.Callable] = {
 597        "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
 598        "TRY_CONVERT": lambda self: self._parse_convert(False),
 599        "EXTRACT": lambda self: self._parse_extract(),
 600        "POSITION": lambda self: self._parse_position(),
 601        "SUBSTRING": lambda self: self._parse_substring(),
 602        "TRIM": lambda self: self._parse_trim(),
 603        "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
 604        "TRY_CAST": lambda self: self._parse_cast(False),
 605        "STRING_AGG": lambda self: self._parse_string_agg(),
 606    }
 607
 608    QUERY_MODIFIER_PARSERS = {
 609        "match": lambda self: self._parse_match_recognize(),
 610        "where": lambda self: self._parse_where(),
 611        "group": lambda self: self._parse_group(),
 612        "having": lambda self: self._parse_having(),
 613        "qualify": lambda self: self._parse_qualify(),
 614        "windows": lambda self: self._parse_window_clause(),
 615        "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute),
 616        "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
 617        "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
 618        "order": lambda self: self._parse_order(),
 619        "limit": lambda self: self._parse_limit(),
 620        "offset": lambda self: self._parse_offset(),
 621        "lock": lambda self: self._parse_lock(),
 622    }
 623
 624    SHOW_PARSERS: t.Dict[str, t.Callable] = {}
 625    SET_PARSERS: t.Dict[str, t.Callable] = {}
 626
 627    MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
 628
 629    CREATABLES = {
 630        TokenType.COLUMN,
 631        TokenType.FUNCTION,
 632        TokenType.INDEX,
 633        TokenType.PROCEDURE,
 634        TokenType.SCHEMA,
 635        TokenType.TABLE,
 636        TokenType.VIEW,
 637    }
 638
 639    TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
 640
 641    WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
 642
 643    ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY}
 644
 645    STRICT_CAST = True
 646
 647    __slots__ = (
 648        "error_level",
 649        "error_message_context",
 650        "sql",
 651        "errors",
 652        "index_offset",
 653        "unnest_column_only",
 654        "alias_post_tablesample",
 655        "max_errors",
 656        "null_ordering",
 657        "_tokens",
 658        "_index",
 659        "_curr",
 660        "_next",
 661        "_prev",
 662        "_prev_comments",
 663        "_show_trie",
 664        "_set_trie",
 665    )
 666
 667    def __init__(
 668        self,
 669        error_level: t.Optional[ErrorLevel] = None,
 670        error_message_context: int = 100,
 671        index_offset: int = 0,
 672        unnest_column_only: bool = False,
 673        alias_post_tablesample: bool = False,
 674        max_errors: int = 3,
 675        null_ordering: t.Optional[str] = None,
 676    ):
 677        self.error_level = error_level or ErrorLevel.IMMEDIATE
 678        self.error_message_context = error_message_context
 679        self.index_offset = index_offset
 680        self.unnest_column_only = unnest_column_only
 681        self.alias_post_tablesample = alias_post_tablesample
 682        self.max_errors = max_errors
 683        self.null_ordering = null_ordering
 684        self.reset()
 685
 686    def reset(self):
 687        self.sql = ""
 688        self.errors = []
 689        self._tokens = []
 690        self._index = 0
 691        self._curr = None
 692        self._next = None
 693        self._prev = None
 694        self._prev_comments = None
 695
 696    def parse(
 697        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
 698    ) -> t.List[t.Optional[exp.Expression]]:
 699        """
 700        Parses a list of tokens and returns a list of syntax trees, one tree
 701        per parsed SQL statement.
 702
 703        Args:
 704            raw_tokens: the list of tokens.
 705            sql: the original SQL string, used to produce helpful debug messages.
 706
 707        Returns:
 708            The list of syntax trees.
 709        """
 710        return self._parse(
 711            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
 712        )
 713
 714    def parse_into(
 715        self,
 716        expression_types: exp.IntoType,
 717        raw_tokens: t.List[Token],
 718        sql: t.Optional[str] = None,
 719    ) -> t.List[t.Optional[exp.Expression]]:
 720        """
 721        Parses a list of tokens into a given Expression type. If a collection of Expression
 722        types is given instead, this method will try to parse the token list into each one
 723        of them, stopping at the first for which the parsing succeeds.
 724
 725        Args:
 726            expression_types: the expression type(s) to try and parse the token list into.
 727            raw_tokens: the list of tokens.
 728            sql: the original SQL string, used to produce helpful debug messages.
 729
 730        Returns:
 731            The target Expression.
 732        """
 733        errors = []
 734        for expression_type in ensure_collection(expression_types):
 735            parser = self.EXPRESSION_PARSERS.get(expression_type)
 736            if not parser:
 737                raise TypeError(f"No parser registered for {expression_type}")
 738            try:
 739                return self._parse(parser, raw_tokens, sql)
 740            except ParseError as e:
 741                e.errors[0]["into_expression"] = expression_type
 742                errors.append(e)
 743        raise ParseError(
 744            f"Failed to parse into {expression_types}",
 745            errors=merge_errors(errors),
 746        ) from errors[-1]
 747
 748    def _parse(
 749        self,
 750        parse_method: t.Callable[[Parser], t.Optional[exp.Expression]],
 751        raw_tokens: t.List[Token],
 752        sql: t.Optional[str] = None,
 753    ) -> t.List[t.Optional[exp.Expression]]:
 754        self.reset()
 755        self.sql = sql or ""
 756        total = len(raw_tokens)
 757        chunks: t.List[t.List[Token]] = [[]]
 758
 759        for i, token in enumerate(raw_tokens):
 760            if token.token_type == TokenType.SEMICOLON:
 761                if i < total - 1:
 762                    chunks.append([])
 763            else:
 764                chunks[-1].append(token)
 765
 766        expressions = []
 767
 768        for tokens in chunks:
 769            self._index = -1
 770            self._tokens = tokens
 771            self._advance()
 772
 773            expressions.append(parse_method(self))
 774
 775            if self._index < len(self._tokens):
 776                self.raise_error("Invalid expression / Unexpected token")
 777
 778            self.check_errors()
 779
 780        return expressions
 781
 782    def check_errors(self) -> None:
 783        """
 784        Logs or raises any found errors, depending on the chosen error level setting.
 785        """
 786        if self.error_level == ErrorLevel.WARN:
 787            for error in self.errors:
 788                logger.error(str(error))
 789        elif self.error_level == ErrorLevel.RAISE and self.errors:
 790            raise ParseError(
 791                concat_messages(self.errors, self.max_errors),
 792                errors=merge_errors(self.errors),
 793            )
 794
 795    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
 796        """
 797        Appends an error in the list of recorded errors or raises it, depending on the chosen
 798        error level setting.
 799        """
 800        token = token or self._curr or self._prev or Token.string("")
 801        start = self._find_token(token)
 802        end = start + len(token.text)
 803        start_context = self.sql[max(start - self.error_message_context, 0) : start]
 804        highlight = self.sql[start:end]
 805        end_context = self.sql[end : end + self.error_message_context]
 806
 807        error = ParseError.new(
 808            f"{message}. Line {token.line}, Col: {token.col}.\n"
 809            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
 810            description=message,
 811            line=token.line,
 812            col=token.col,
 813            start_context=start_context,
 814            highlight=highlight,
 815            end_context=end_context,
 816        )
 817
 818        if self.error_level == ErrorLevel.IMMEDIATE:
 819            raise error
 820
 821        self.errors.append(error)
 822
 823    def expression(
 824        self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs
 825    ) -> exp.Expression:
 826        """
 827        Creates a new, validated Expression.
 828
 829        Args:
 830            exp_class: the expression class to instantiate.
 831            comments: an optional list of comments to attach to the expression.
 832            kwargs: the arguments to set for the expression along with their respective values.
 833
 834        Returns:
 835            The target expression.
 836        """
 837        instance = exp_class(**kwargs)
 838        if self._prev_comments:
 839            instance.comments = self._prev_comments
 840            self._prev_comments = None
 841        if comments:
 842            instance.comments = comments
 843        self.validate_expression(instance)
 844        return instance
 845
 846    def validate_expression(
 847        self, expression: exp.Expression, args: t.Optional[t.List] = None
 848    ) -> None:
 849        """
 850        Validates an already instantiated expression, making sure that all its mandatory arguments
 851        are set.
 852
 853        Args:
 854            expression: the expression to validate.
 855            args: an optional list of items that was used to instantiate the expression, if it's a Func.
 856        """
 857        if self.error_level == ErrorLevel.IGNORE:
 858            return
 859
 860        for error_message in expression.error_messages(args):
 861            self.raise_error(error_message)
 862
 863    def _find_sql(self, start: Token, end: Token) -> str:
 864        return self.sql[self._find_token(start) : self._find_token(end) + len(end.text)]
 865
 866    def _find_token(self, token: Token) -> int:
 867        line = 1
 868        col = 1
 869        index = 0
 870
 871        while line < token.line or col < token.col:
 872            if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK:
 873                line += 1
 874                col = 1
 875            else:
 876                col += 1
 877            index += 1
 878
 879        return index
 880
 881    def _advance(self, times: int = 1) -> None:
 882        self._index += times
 883        self._curr = seq_get(self._tokens, self._index)
 884        self._next = seq_get(self._tokens, self._index + 1)
 885        if self._index > 0:
 886            self._prev = self._tokens[self._index - 1]
 887            self._prev_comments = self._prev.comments
 888        else:
 889            self._prev = None
 890            self._prev_comments = None
 891
 892    def _retreat(self, index: int) -> None:
 893        self._advance(index - self._index)
 894
 895    def _parse_command(self) -> exp.Expression:
 896        return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string())
 897
 898    def _parse_statement(self) -> t.Optional[exp.Expression]:
 899        if self._curr is None:
 900            return None
 901
 902        if self._match_set(self.STATEMENT_PARSERS):
 903            return self.STATEMENT_PARSERS[self._prev.token_type](self)
 904
 905        if self._match_set(Tokenizer.COMMANDS):
 906            return self._parse_command()
 907
 908        expression = self._parse_expression()
 909        expression = self._parse_set_operations(expression) if expression else self._parse_select()
 910
 911        self._parse_query_modifiers(expression)
 912        return expression
 913
 914    def _parse_drop(self, default_kind: t.Optional[str] = None) -> t.Optional[exp.Expression]:
 915        start = self._prev
 916        temporary = self._match(TokenType.TEMPORARY)
 917        materialized = self._match(TokenType.MATERIALIZED)
 918        kind = self._match_set(self.CREATABLES) and self._prev.text
 919        if not kind:
 920            if default_kind:
 921                kind = default_kind
 922            else:
 923                return self._parse_as_command(start)
 924
 925        return self.expression(
 926            exp.Drop,
 927            exists=self._parse_exists(),
 928            this=self._parse_table(schema=True),
 929            kind=kind,
 930            temporary=temporary,
 931            materialized=materialized,
 932            cascade=self._match(TokenType.CASCADE),
 933        )
 934
 935    def _parse_exists(self, not_: bool = False) -> t.Optional[bool]:
 936        return (
 937            self._match(TokenType.IF)
 938            and (not not_ or self._match(TokenType.NOT))
 939            and self._match(TokenType.EXISTS)
 940        )
 941
 942    def _parse_create(self) -> t.Optional[exp.Expression]:
 943        start = self._prev
 944        replace = self._match_pair(TokenType.OR, TokenType.REPLACE)
 945        set_ = self._match(TokenType.SET)  # Teradata
 946        multiset = self._match_text_seq("MULTISET")  # Teradata
 947        global_temporary = self._match_text_seq("GLOBAL", "TEMPORARY")  # Teradata
 948        volatile = self._match(TokenType.VOLATILE)  # Teradata
 949        temporary = self._match(TokenType.TEMPORARY)
 950        transient = self._match_text_seq("TRANSIENT")
 951        external = self._match_text_seq("EXTERNAL")
 952        unique = self._match(TokenType.UNIQUE)
 953        materialized = self._match(TokenType.MATERIALIZED)
 954
 955        if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
 956            self._match(TokenType.TABLE)
 957
 958        properties = None
 959        create_token = self._match_set(self.CREATABLES) and self._prev
 960
 961        if not create_token:
 962            properties = self._parse_properties()
 963            create_token = self._match_set(self.CREATABLES) and self._prev
 964
 965            if not properties or not create_token:
 966                return self._parse_as_command(start)
 967
 968        exists = self._parse_exists(not_=True)
 969        this = None
 970        expression = None
 971        data = None
 972        statistics = None
 973        no_primary_index = None
 974        indexes = None
 975        no_schema_binding = None
 976        begin = None
 977
 978        if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
 979            this = self._parse_user_defined_function(kind=create_token.token_type)
 980            properties = self._parse_properties()
 981
 982            self._match(TokenType.ALIAS)
 983            begin = self._match(TokenType.BEGIN)
 984            return_ = self._match_text_seq("RETURN")
 985            expression = self._parse_statement()
 986
 987            if return_:
 988                expression = self.expression(exp.Return, this=expression)
 989        elif create_token.token_type == TokenType.INDEX:
 990            this = self._parse_index()
 991        elif create_token.token_type in (
 992            TokenType.TABLE,
 993            TokenType.VIEW,
 994            TokenType.SCHEMA,
 995        ):
 996            table_parts = self._parse_table_parts(schema=True)
 997
 998            if self._match(TokenType.COMMA):  # comma-separated properties before schema definition
 999                properties = self._parse_properties(before=True)
1000
1001            this = self._parse_schema(this=table_parts)
1002
1003            if not properties:  # properties after schema definition
1004                properties = self._parse_properties()
1005
1006            self._match(TokenType.ALIAS)
1007            expression = self._parse_ddl_select()
1008
1009            if create_token.token_type == TokenType.TABLE:
1010                if self._match_text_seq("WITH", "DATA"):
1011                    data = True
1012                elif self._match_text_seq("WITH", "NO", "DATA"):
1013                    data = False
1014
1015                if self._match_text_seq("AND", "STATISTICS"):
1016                    statistics = True
1017                elif self._match_text_seq("AND", "NO", "STATISTICS"):
1018                    statistics = False
1019
1020                no_primary_index = self._match_text_seq("NO", "PRIMARY", "INDEX")
1021
1022                indexes = []
1023                while True:
1024                    index = self._parse_create_table_index()
1025
1026                    # post index PARTITION BY property
1027                    if self._match(TokenType.PARTITION_BY, advance=False):
1028                        if properties:
1029                            properties.expressions.append(self._parse_property())
1030                        else:
1031                            properties = self._parse_properties()
1032
1033                    if not index:
1034                        break
1035                    else:
1036                        indexes.append(index)
1037            elif create_token.token_type == TokenType.VIEW:
1038                if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
1039                    no_schema_binding = True
1040
1041        return self.expression(
1042            exp.Create,
1043            this=this,
1044            kind=create_token.text,
1045            expression=expression,
1046            set=set_,
1047            multiset=multiset,
1048            global_temporary=global_temporary,
1049            volatile=volatile,
1050            exists=exists,
1051            properties=properties,
1052            temporary=temporary,
1053            transient=transient,
1054            external=external,
1055            replace=replace,
1056            unique=unique,
1057            materialized=materialized,
1058            data=data,
1059            statistics=statistics,
1060            no_primary_index=no_primary_index,
1061            indexes=indexes,
1062            no_schema_binding=no_schema_binding,
1063            begin=begin,
1064        )
1065
1066    def _parse_property_before(self) -> t.Optional[exp.Expression]:
1067        self._match(TokenType.COMMA)
1068
1069        # parsers look to _prev for no/dual/default, so need to consume first
1070        self._match_text_seq("NO")
1071        self._match_text_seq("DUAL")
1072        self._match_text_seq("DEFAULT")
1073
1074        if self.PROPERTY_PARSERS.get(self._curr.text.upper()):
1075            return self.PROPERTY_PARSERS[self._curr.text.upper()](self)
1076
1077        return None
1078
1079    def _parse_property(self) -> t.Optional[exp.Expression]:
1080        if self._match_texts(self.PROPERTY_PARSERS):
1081            return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
1082
1083        if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET):
1084            return self._parse_character_set(True)
1085
1086        if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY):
1087            return self._parse_sortkey(compound=True)
1088
1089        if self._match_text_seq("SQL", "SECURITY"):
1090            return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER"))
1091
1092        assignment = self._match_pair(
1093            TokenType.VAR, TokenType.EQ, advance=False
1094        ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False)
1095
1096        if assignment:
1097            key = self._parse_var_or_string()
1098            self._match(TokenType.EQ)
1099            return self.expression(exp.Property, this=key, value=self._parse_column())
1100
1101        return None
1102
1103    def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression:
1104        self._match(TokenType.EQ)
1105        self._match(TokenType.ALIAS)
1106        return self.expression(
1107            exp_class,
1108            this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
1109        )
1110
1111    def _parse_properties(self, before=None) -> t.Optional[exp.Expression]:
1112        properties = []
1113
1114        while True:
1115            if before:
1116                identified_property = self._parse_property_before()
1117            else:
1118                identified_property = self._parse_property()
1119
1120            if not identified_property:
1121                break
1122            for p in ensure_collection(identified_property):
1123                properties.append(p)
1124
1125        if properties:
1126            return self.expression(exp.Properties, expressions=properties)
1127
1128        return None
1129
1130    def _parse_fallback(self, no=False) -> exp.Expression:
1131        self._match_text_seq("FALLBACK")
1132        return self.expression(
1133            exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
1134        )
1135
1136    def _parse_with_property(
1137        self,
1138    ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]:
1139        if self._match(TokenType.L_PAREN, advance=False):
1140            return self._parse_wrapped_csv(self._parse_property)
1141
1142        if not self._next:
1143            return None
1144
1145        if self._next.text.upper() == "JOURNAL":
1146            return self._parse_withjournaltable()
1147
1148        return self._parse_withisolatedloading()
1149
1150    # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
1151    def _parse_definer(self) -> t.Optional[exp.Expression]:
1152        self._match(TokenType.EQ)
1153
1154        user = self._parse_id_var()
1155        self._match(TokenType.PARAMETER)
1156        host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
1157
1158        if not user or not host:
1159            return None
1160
1161        return exp.DefinerProperty(this=f"{user}@{host}")
1162
1163    def _parse_withjournaltable(self) -> exp.Expression:
1164        self._match_text_seq("WITH", "JOURNAL", "TABLE")
1165        self._match(TokenType.EQ)
1166        return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
1167
1168    def _parse_log(self, no=False) -> exp.Expression:
1169        self._match_text_seq("LOG")
1170        return self.expression(exp.LogProperty, no=no)
1171
1172    def _parse_journal(self, no=False, dual=False) -> exp.Expression:
1173        before = self._match_text_seq("BEFORE")
1174        self._match_text_seq("JOURNAL")
1175        return self.expression(exp.JournalProperty, no=no, dual=dual, before=before)
1176
1177    def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression:
1178        self._match_text_seq("NOT")
1179        self._match_text_seq("LOCAL")
1180        self._match_text_seq("AFTER", "JOURNAL")
1181        return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local)
1182
1183    def _parse_checksum(self) -> exp.Expression:
1184        self._match_text_seq("CHECKSUM")
1185        self._match(TokenType.EQ)
1186
1187        on = None
1188        if self._match(TokenType.ON):
1189            on = True
1190        elif self._match_text_seq("OFF"):
1191            on = False
1192        default = self._match(TokenType.DEFAULT)
1193
1194        return self.expression(
1195            exp.ChecksumProperty,
1196            on=on,
1197            default=default,
1198        )
1199
1200    def _parse_freespace(self) -> exp.Expression:
1201        self._match_text_seq("FREESPACE")
1202        self._match(TokenType.EQ)
1203        return self.expression(
1204            exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
1205        )
1206
1207    def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression:
1208        self._match_text_seq("MERGEBLOCKRATIO")
1209        if self._match(TokenType.EQ):
1210            return self.expression(
1211                exp.MergeBlockRatioProperty,
1212                this=self._parse_number(),
1213                percent=self._match(TokenType.PERCENT),
1214            )
1215        else:
1216            return self.expression(
1217                exp.MergeBlockRatioProperty,
1218                no=no,
1219                default=default,
1220            )
1221
1222    def _parse_datablocksize(self, default=None) -> exp.Expression:
1223        if default:
1224            self._match_text_seq("DATABLOCKSIZE")
1225            return self.expression(exp.DataBlocksizeProperty, default=True)
1226        elif self._match_texts(("MIN", "MINIMUM")):
1227            self._match_text_seq("DATABLOCKSIZE")
1228            return self.expression(exp.DataBlocksizeProperty, min=True)
1229        elif self._match_texts(("MAX", "MAXIMUM")):
1230            self._match_text_seq("DATABLOCKSIZE")
1231            return self.expression(exp.DataBlocksizeProperty, min=False)
1232
1233        self._match_text_seq("DATABLOCKSIZE")
1234        self._match(TokenType.EQ)
1235        size = self._parse_number()
1236        units = None
1237        if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
1238            units = self._prev.text
1239        return self.expression(exp.DataBlocksizeProperty, size=size, units=units)
1240
1241    def _parse_blockcompression(self) -> exp.Expression:
1242        self._match_text_seq("BLOCKCOMPRESSION")
1243        self._match(TokenType.EQ)
1244        always = self._match(TokenType.ALWAYS)
1245        manual = self._match_text_seq("MANUAL")
1246        never = self._match_text_seq("NEVER")
1247        default = self._match_text_seq("DEFAULT")
1248        autotemp = None
1249        if self._match_text_seq("AUTOTEMP"):
1250            autotemp = self._parse_schema()
1251
1252        return self.expression(
1253            exp.BlockCompressionProperty,
1254            always=always,
1255            manual=manual,
1256            never=never,
1257            default=default,
1258            autotemp=autotemp,
1259        )
1260
1261    def _parse_withisolatedloading(self) -> exp.Expression:
1262        self._match(TokenType.WITH)
1263        no = self._match_text_seq("NO")
1264        concurrent = self._match_text_seq("CONCURRENT")
1265        self._match_text_seq("ISOLATED", "LOADING")
1266        for_all = self._match_text_seq("FOR", "ALL")
1267        for_insert = self._match_text_seq("FOR", "INSERT")
1268        for_none = self._match_text_seq("FOR", "NONE")
1269        return self.expression(
1270            exp.IsolatedLoadingProperty,
1271            no=no,
1272            concurrent=concurrent,
1273            for_all=for_all,
1274            for_insert=for_insert,
1275            for_none=for_none,
1276        )
1277
1278    def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]:
1279        if self._match(TokenType.PARTITION_BY):
1280            return self._parse_csv(self._parse_conjunction)
1281        return []
1282
1283    def _parse_partitioned_by(self) -> exp.Expression:
1284        self._match(TokenType.EQ)
1285        return self.expression(
1286            exp.PartitionedByProperty,
1287            this=self._parse_schema() or self._parse_bracket(self._parse_field()),
1288        )
1289
1290    def _parse_distkey(self) -> exp.Expression:
1291        return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
1292
1293    def _parse_create_like(self) -> t.Optional[exp.Expression]:
1294        table = self._parse_table(schema=True)
1295        options = []
1296        while self._match_texts(("INCLUDING", "EXCLUDING")):
1297            this = self._prev.text.upper()
1298            id_var = self._parse_id_var()
1299
1300            if not id_var:
1301                return None
1302
1303            options.append(
1304                self.expression(
1305                    exp.Property,
1306                    this=this,
1307                    value=exp.Var(this=id_var.this.upper()),
1308                )
1309            )
1310        return self.expression(exp.LikeProperty, this=table, expressions=options)
1311
1312    def _parse_sortkey(self, compound: bool = False) -> exp.Expression:
1313        return self.expression(
1314            exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound
1315        )
1316
1317    def _parse_character_set(self, default: bool = False) -> exp.Expression:
1318        self._match(TokenType.EQ)
1319        return self.expression(
1320            exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default
1321        )
1322
1323    def _parse_returns(self) -> exp.Expression:
1324        value: t.Optional[exp.Expression]
1325        is_table = self._match(TokenType.TABLE)
1326
1327        if is_table:
1328            if self._match(TokenType.LT):
1329                value = self.expression(
1330                    exp.Schema,
1331                    this="TABLE",
1332                    expressions=self._parse_csv(self._parse_struct_kwargs),
1333                )
1334                if not self._match(TokenType.GT):
1335                    self.raise_error("Expecting >")
1336            else:
1337                value = self._parse_schema(exp.Var(this="TABLE"))
1338        else:
1339            value = self._parse_types()
1340
1341        return self.expression(exp.ReturnsProperty, this=value, is_table=is_table)
1342
1343    def _parse_describe(self) -> exp.Expression:
1344        kind = self._match_set(self.CREATABLES) and self._prev.text
1345        this = self._parse_table()
1346
1347        return self.expression(exp.Describe, this=this, kind=kind)
1348
1349    def _parse_insert(self) -> exp.Expression:
1350        overwrite = self._match(TokenType.OVERWRITE)
1351        local = self._match(TokenType.LOCAL)
1352
1353        this: t.Optional[exp.Expression]
1354
1355        if self._match_text_seq("DIRECTORY"):
1356            this = self.expression(
1357                exp.Directory,
1358                this=self._parse_var_or_string(),
1359                local=local,
1360                row_format=self._parse_row_format(match_row=True),
1361            )
1362        else:
1363            self._match(TokenType.INTO)
1364            self._match(TokenType.TABLE)
1365            this = self._parse_table(schema=True)
1366
1367        return self.expression(
1368            exp.Insert,
1369            this=this,
1370            exists=self._parse_exists(),
1371            partition=self._parse_partition(),
1372            expression=self._parse_ddl_select(),
1373            overwrite=overwrite,
1374        )
1375
1376    def _parse_row(self) -> t.Optional[exp.Expression]:
1377        if not self._match(TokenType.FORMAT):
1378            return None
1379        return self._parse_row_format()
1380
1381    def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]:
1382        if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
1383            return None
1384
1385        if self._match_text_seq("SERDE"):
1386            return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string())
1387
1388        self._match_text_seq("DELIMITED")
1389
1390        kwargs = {}
1391
1392        if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
1393            kwargs["fields"] = self._parse_string()
1394            if self._match_text_seq("ESCAPED", "BY"):
1395                kwargs["escaped"] = self._parse_string()
1396        if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
1397            kwargs["collection_items"] = self._parse_string()
1398        if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
1399            kwargs["map_keys"] = self._parse_string()
1400        if self._match_text_seq("LINES", "TERMINATED", "BY"):
1401            kwargs["lines"] = self._parse_string()
1402        if self._match_text_seq("NULL", "DEFINED", "AS"):
1403            kwargs["null"] = self._parse_string()
1404
1405        return self.expression(exp.RowFormatDelimitedProperty, **kwargs)  # type: ignore
1406
1407    def _parse_load_data(self) -> exp.Expression:
1408        local = self._match(TokenType.LOCAL)
1409        self._match_text_seq("INPATH")
1410        inpath = self._parse_string()
1411        overwrite = self._match(TokenType.OVERWRITE)
1412        self._match_pair(TokenType.INTO, TokenType.TABLE)
1413
1414        return self.expression(
1415            exp.LoadData,
1416            this=self._parse_table(schema=True),
1417            local=local,
1418            overwrite=overwrite,
1419            inpath=inpath,
1420            partition=self._parse_partition(),
1421            input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
1422            serde=self._match_text_seq("SERDE") and self._parse_string(),
1423        )
1424
1425    def _parse_delete(self) -> exp.Expression:
1426        self._match(TokenType.FROM)
1427
1428        return self.expression(
1429            exp.Delete,
1430            this=self._parse_table(schema=True),
1431            using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()),
1432            where=self._parse_where(),
1433        )
1434
1435    def _parse_update(self) -> exp.Expression:
1436        return self.expression(
1437            exp.Update,
1438            **{  # type: ignore
1439                "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS),
1440                "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality),
1441                "from": self._parse_from(),
1442                "where": self._parse_where(),
1443            },
1444        )
1445
1446    def _parse_uncache(self) -> exp.Expression:
1447        if not self._match(TokenType.TABLE):
1448            self.raise_error("Expecting TABLE after UNCACHE")
1449
1450        return self.expression(
1451            exp.Uncache,
1452            exists=self._parse_exists(),
1453            this=self._parse_table(schema=True),
1454        )
1455
1456    def _parse_cache(self) -> exp.Expression:
1457        lazy = self._match(TokenType.LAZY)
1458        self._match(TokenType.TABLE)
1459        table = self._parse_table(schema=True)
1460        options = []
1461
1462        if self._match(TokenType.OPTIONS):
1463            self._match_l_paren()
1464            k = self._parse_string()
1465            self._match(TokenType.EQ)
1466            v = self._parse_string()
1467            options = [k, v]
1468            self._match_r_paren()
1469
1470        self._match(TokenType.ALIAS)
1471        return self.expression(
1472            exp.Cache,
1473            this=table,
1474            lazy=lazy,
1475            options=options,
1476            expression=self._parse_select(nested=True),
1477        )
1478
1479    def _parse_partition(self) -> t.Optional[exp.Expression]:
1480        if not self._match(TokenType.PARTITION):
1481            return None
1482
1483        return self.expression(
1484            exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction)
1485        )
1486
1487    def _parse_value(self) -> exp.Expression:
1488        if self._match(TokenType.L_PAREN):
1489            expressions = self._parse_csv(self._parse_conjunction)
1490            self._match_r_paren()
1491            return self.expression(exp.Tuple, expressions=expressions)
1492
1493        # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows.
1494        # Source: https://prestodb.io/docs/current/sql/values.html
1495        return self.expression(exp.Tuple, expressions=[self._parse_conjunction()])
1496
1497    def _parse_select(
1498        self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True
1499    ) -> t.Optional[exp.Expression]:
1500        cte = self._parse_with()
1501        if cte:
1502            this = self._parse_statement()
1503
1504            if not this:
1505                self.raise_error("Failed to parse any statement following CTE")
1506                return cte
1507
1508            if "with" in this.arg_types:
1509                this.set("with", cte)
1510            else:
1511                self.raise_error(f"{this.key} does not support CTE")
1512                this = cte
1513        elif self._match(TokenType.SELECT):
1514            comments = self._prev_comments
1515
1516            hint = self._parse_hint()
1517            all_ = self._match(TokenType.ALL)
1518            distinct = self._match(TokenType.DISTINCT)
1519
1520            if distinct:
1521                distinct = self.expression(
1522                    exp.Distinct,
1523                    on=self._parse_value() if self._match(TokenType.ON) else None,
1524                )
1525
1526            if all_ and distinct:
1527                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
1528
1529            limit = self._parse_limit(top=True)
1530            expressions = self._parse_csv(self._parse_expression)
1531
1532            this = self.expression(
1533                exp.Select,
1534                hint=hint,
1535                distinct=distinct,
1536                expressions=expressions,
1537                limit=limit,
1538            )
1539            this.comments = comments
1540
1541            into = self._parse_into()
1542            if into:
1543                this.set("into", into)
1544
1545            from_ = self._parse_from()
1546            if from_:
1547                this.set("from", from_)
1548
1549            self._parse_query_modifiers(this)
1550        elif (table or nested) and self._match(TokenType.L_PAREN):
1551            this = self._parse_table() if table else self._parse_select(nested=True)
1552            self._parse_query_modifiers(this)
1553            this = self._parse_set_operations(this)
1554            self._match_r_paren()
1555
1556            # early return so that subquery unions aren't parsed again
1557            # SELECT * FROM (SELECT 1) UNION ALL SELECT 1
1558            # Union ALL should be a property of the top select node, not the subquery
1559            return self._parse_subquery(this, parse_alias=parse_subquery_alias)
1560        elif self._match(TokenType.VALUES):
1561            this = self.expression(
1562                exp.Values,
1563                expressions=self._parse_csv(self._parse_value),
1564                alias=self._parse_table_alias(),
1565            )
1566        else:
1567            this = None
1568
1569        return self._parse_set_operations(this)
1570
1571    def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]:
1572        if not skip_with_token and not self._match(TokenType.WITH):
1573            return None
1574
1575        recursive = self._match(TokenType.RECURSIVE)
1576
1577        expressions = []
1578        while True:
1579            expressions.append(self._parse_cte())
1580
1581            if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
1582                break
1583            else:
1584                self._match(TokenType.WITH)
1585
1586        return self.expression(exp.With, expressions=expressions, recursive=recursive)
1587
1588    def _parse_cte(self) -> exp.Expression:
1589        alias = self._parse_table_alias()
1590        if not alias or not alias.this:
1591            self.raise_error("Expected CTE to have alias")
1592
1593        self._match(TokenType.ALIAS)
1594
1595        return self.expression(
1596            exp.CTE,
1597            this=self._parse_wrapped(self._parse_statement),
1598            alias=alias,
1599        )
1600
1601    def _parse_table_alias(
1602        self, alias_tokens: t.Optional[t.Collection[TokenType]] = None
1603    ) -> t.Optional[exp.Expression]:
1604        any_token = self._match(TokenType.ALIAS)
1605        alias = self._parse_id_var(
1606            any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS
1607        )
1608        index = self._index
1609
1610        if self._match(TokenType.L_PAREN):
1611            columns = self._parse_csv(lambda: self._parse_column_def(self._parse_id_var()))
1612            self._match_r_paren() if columns else self._retreat(index)
1613        else:
1614            columns = None
1615
1616        if not alias and not columns:
1617            return None
1618
1619        return self.expression(exp.TableAlias, this=alias, columns=columns)
1620
1621    def _parse_subquery(
1622        self, this: t.Optional[exp.Expression], parse_alias: bool = True
1623    ) -> exp.Expression:
1624        return self.expression(
1625            exp.Subquery,
1626            this=this,
1627            pivots=self._parse_pivots(),
1628            alias=self._parse_table_alias() if parse_alias else None,
1629        )
1630
1631    def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None:
1632        if not isinstance(this, self.MODIFIABLES):
1633            return
1634
1635        table = isinstance(this, exp.Table)
1636
1637        while True:
1638            lateral = self._parse_lateral()
1639            join = self._parse_join()
1640            comma = None if table else self._match(TokenType.COMMA)
1641            if lateral:
1642                this.append("laterals", lateral)
1643            if join:
1644                this.append("joins", join)
1645            if comma:
1646                this.args["from"].append("expressions", self._parse_table())
1647            if not (lateral or join or comma):
1648                break
1649
1650        for key, parser in self.QUERY_MODIFIER_PARSERS.items():
1651            expression = parser(self)
1652
1653            if expression:
1654                this.set(key, expression)
1655
1656    def _parse_hint(self) -> t.Optional[exp.Expression]:
1657        if self._match(TokenType.HINT):
1658            hints = self._parse_csv(self._parse_function)
1659            if not self._match_pair(TokenType.STAR, TokenType.SLASH):
1660                self.raise_error("Expected */ after HINT")
1661            return self.expression(exp.Hint, expressions=hints)
1662
1663        return None
1664
1665    def _parse_into(self) -> t.Optional[exp.Expression]:
1666        if not self._match(TokenType.INTO):
1667            return None
1668
1669        temp = self._match(TokenType.TEMPORARY)
1670        unlogged = self._match(TokenType.UNLOGGED)
1671        self._match(TokenType.TABLE)
1672
1673        return self.expression(
1674            exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged
1675        )
1676
1677    def _parse_from(self) -> t.Optional[exp.Expression]:
1678        if not self._match(TokenType.FROM):
1679            return None
1680
1681        return self.expression(
1682            exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table)
1683        )
1684
1685    def _parse_match_recognize(self) -> t.Optional[exp.Expression]:
1686        if not self._match(TokenType.MATCH_RECOGNIZE):
1687            return None
1688        self._match_l_paren()
1689
1690        partition = self._parse_partition_by()
1691        order = self._parse_order()
1692        measures = (
1693            self._parse_alias(self._parse_conjunction())
1694            if self._match_text_seq("MEASURES")
1695            else None
1696        )
1697
1698        if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
1699            rows = exp.Var(this="ONE ROW PER MATCH")
1700        elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
1701            text = "ALL ROWS PER MATCH"
1702            if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
1703                text += f" SHOW EMPTY MATCHES"
1704            elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
1705                text += f" OMIT EMPTY MATCHES"
1706            elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
1707                text += f" WITH UNMATCHED ROWS"
1708            rows = exp.Var(this=text)
1709        else:
1710            rows = None
1711
1712        if self._match_text_seq("AFTER", "MATCH", "SKIP"):
1713            text = "AFTER MATCH SKIP"
1714            if self._match_text_seq("PAST", "LAST", "ROW"):
1715                text += f" PAST LAST ROW"
1716            elif self._match_text_seq("TO", "NEXT", "ROW"):
1717                text += f" TO NEXT ROW"
1718            elif self._match_text_seq("TO", "FIRST"):
1719                text += f" TO FIRST {self._advance_any().text}"  # type: ignore
1720            elif self._match_text_seq("TO", "LAST"):
1721                text += f" TO LAST {self._advance_any().text}"  # type: ignore
1722            after = exp.Var(this=text)
1723        else:
1724            after = None
1725
1726        if self._match_text_seq("PATTERN"):
1727            self._match_l_paren()
1728
1729            if not self._curr:
1730                self.raise_error("Expecting )", self._curr)
1731
1732            paren = 1
1733            start = self._curr
1734
1735            while self._curr and paren > 0:
1736                if self._curr.token_type == TokenType.L_PAREN:
1737                    paren += 1
1738                if self._curr.token_type == TokenType.R_PAREN:
1739                    paren -= 1
1740                end = self._prev
1741                self._advance()
1742            if paren > 0:
1743                self.raise_error("Expecting )", self._curr)
1744            pattern = exp.Var(this=self._find_sql(start, end))
1745        else:
1746            pattern = None
1747
1748        define = (
1749            self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None
1750        )
1751        self._match_r_paren()
1752
1753        return self.expression(
1754            exp.MatchRecognize,
1755            partition_by=partition,
1756            order=order,
1757            measures=measures,
1758            rows=rows,
1759            after=after,
1760            pattern=pattern,
1761            define=define,
1762        )
1763
1764    def _parse_lateral(self) -> t.Optional[exp.Expression]:
1765        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY)
1766        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
1767
1768        if outer_apply or cross_apply:
1769            this = self._parse_select(table=True)
1770            view = None
1771            outer = not cross_apply
1772        elif self._match(TokenType.LATERAL):
1773            this = self._parse_select(table=True)
1774            view = self._match(TokenType.VIEW)
1775            outer = self._match(TokenType.OUTER)
1776        else:
1777            return None
1778
1779        if not this:
1780            this = self._parse_function() or self._parse_id_var(any_token=False)
1781            while self._match(TokenType.DOT):
1782                this = exp.Dot(
1783                    this=this,
1784                    expression=self._parse_function() or self._parse_id_var(any_token=False),
1785                )
1786
1787        table_alias: t.Optional[exp.Expression]
1788
1789        if view:
1790            table = self._parse_id_var(any_token=False)
1791            columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
1792            table_alias = self.expression(exp.TableAlias, this=table, columns=columns)
1793        else:
1794            table_alias = self._parse_table_alias()
1795
1796        expression = self.expression(
1797            exp.Lateral,
1798            this=this,
1799            view=view,
1800            outer=outer,
1801            alias=table_alias,
1802        )
1803
1804        if outer_apply or cross_apply:
1805            return self.expression(exp.Join, this=expression, side=None if cross_apply else "LEFT")
1806
1807        return expression
1808
1809    def _parse_join_side_and_kind(
1810        self,
1811    ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
1812        return (
1813            self._match(TokenType.NATURAL) and self._prev,
1814            self._match_set(self.JOIN_SIDES) and self._prev,
1815            self._match_set(self.JOIN_KINDS) and self._prev,
1816        )
1817
1818    def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]:
1819        natural, side, kind = self._parse_join_side_and_kind()
1820
1821        if not skip_join_token and not self._match(TokenType.JOIN):
1822            return None
1823
1824        kwargs: t.Dict[
1825            str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]]
1826        ] = {"this": self._parse_table()}
1827
1828        if natural:
1829            kwargs["natural"] = True
1830        if side:
1831            kwargs["side"] = side.text
1832        if kind:
1833            kwargs["kind"] = kind.text
1834
1835        if self._match(TokenType.ON):
1836            kwargs["on"] = self._parse_conjunction()
1837        elif self._match(TokenType.USING):
1838            kwargs["using"] = self._parse_wrapped_id_vars()
1839
1840        return self.expression(exp.Join, **kwargs)  # type: ignore
1841
1842    def _parse_index(self) -> exp.Expression:
1843        index = self._parse_id_var()
1844        self._match(TokenType.ON)
1845        self._match(TokenType.TABLE)  # hive
1846
1847        return self.expression(
1848            exp.Index,
1849            this=index,
1850            table=self.expression(exp.Table, this=self._parse_id_var()),
1851            columns=self._parse_expression(),
1852        )
1853
1854    def _parse_create_table_index(self) -> t.Optional[exp.Expression]:
1855        unique = self._match(TokenType.UNIQUE)
1856        primary = self._match_text_seq("PRIMARY")
1857        amp = self._match_text_seq("AMP")
1858        if not self._match(TokenType.INDEX):
1859            return None
1860        index = self._parse_id_var()
1861        columns = None
1862        if self._match(TokenType.L_PAREN, advance=False):
1863            columns = self._parse_wrapped_csv(self._parse_column)
1864        return self.expression(
1865            exp.Index,
1866            this=index,
1867            columns=columns,
1868            unique=unique,
1869            primary=primary,
1870            amp=amp,
1871        )
1872
1873    def _parse_table_parts(self, schema: bool = False) -> exp.Expression:
1874        catalog = None
1875        db = None
1876        table = (not schema and self._parse_function()) or self._parse_id_var(any_token=False)
1877
1878        while self._match(TokenType.DOT):
1879            if catalog:
1880                # This allows nesting the table in arbitrarily many dot expressions if needed
1881                table = self.expression(exp.Dot, this=table, expression=self._parse_id_var())
1882            else:
1883                catalog = db
1884                db = table
1885                table = self._parse_id_var()
1886
1887        if not table:
1888            self.raise_error(f"Expected table name but got {self._curr}")
1889
1890        return self.expression(
1891            exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots()
1892        )
1893
1894    def _parse_table(
1895        self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
1896    ) -> t.Optional[exp.Expression]:
1897        lateral = self._parse_lateral()
1898
1899        if lateral:
1900            return lateral
1901
1902        unnest = self._parse_unnest()
1903
1904        if unnest:
1905            return unnest
1906
1907        values = self._parse_derived_table_values()
1908
1909        if values:
1910            return values
1911
1912        subquery = self._parse_select(table=True)
1913
1914        if subquery:
1915            return subquery
1916
1917        this = self._parse_table_parts(schema=schema)
1918
1919        if schema:
1920            return self._parse_schema(this=this)
1921
1922        if self.alias_post_tablesample:
1923            table_sample = self._parse_table_sample()
1924
1925        alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
1926
1927        if alias:
1928            this.set("alias", alias)
1929
1930        if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
1931            this.set(
1932                "hints",
1933                self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)),
1934            )
1935            self._match_r_paren()
1936
1937        if not self.alias_post_tablesample:
1938            table_sample = self._parse_table_sample()
1939
1940        if table_sample:
1941            table_sample.set("this", this)
1942            this = table_sample
1943
1944        return this
1945
1946    def _parse_unnest(self) -> t.Optional[exp.Expression]:
1947        if not self._match(TokenType.UNNEST):
1948            return None
1949
1950        expressions = self._parse_wrapped_csv(self._parse_column)
1951        ordinality = bool(self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY))
1952        alias = self._parse_table_alias()
1953
1954        if alias and self.unnest_column_only:
1955            if alias.args.get("columns"):
1956                self.raise_error("Unexpected extra column alias in unnest.")
1957            alias.set("columns", [alias.this])
1958            alias.set("this", None)
1959
1960        offset = None
1961        if self._match_pair(TokenType.WITH, TokenType.OFFSET):
1962            self._match(TokenType.ALIAS)
1963            offset = self._parse_conjunction()
1964
1965        return self.expression(
1966            exp.Unnest,
1967            expressions=expressions,
1968            ordinality=ordinality,
1969            alias=alias,
1970            offset=offset,
1971        )
1972
1973    def _parse_derived_table_values(self) -> t.Optional[exp.Expression]:
1974        is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES)
1975        if not is_derived and not self._match(TokenType.VALUES):
1976            return None
1977
1978        expressions = self._parse_csv(self._parse_value)
1979
1980        if is_derived:
1981            self._match_r_paren()
1982
1983        return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias())
1984
1985    def _parse_table_sample(self) -> t.Optional[exp.Expression]:
1986        if not self._match(TokenType.TABLE_SAMPLE):
1987            return None
1988
1989        method = self._parse_var()
1990        bucket_numerator = None
1991        bucket_denominator = None
1992        bucket_field = None
1993        percent = None
1994        rows = None
1995        size = None
1996        seed = None
1997
1998        self._match_l_paren()
1999
2000        if self._match(TokenType.BUCKET):
2001            bucket_numerator = self._parse_number()
2002            self._match(TokenType.OUT_OF)
2003            bucket_denominator = bucket_denominator = self._parse_number()
2004            self._match(TokenType.ON)
2005            bucket_field = self._parse_field()
2006        else:
2007            num = self._parse_number()
2008
2009            if self._match(TokenType.PERCENT):
2010                percent = num
2011            elif self._match(TokenType.ROWS):
2012                rows = num
2013            else:
2014                size = num
2015
2016        self._match_r_paren()
2017
2018        if self._match(TokenType.SEED):
2019            seed = self._parse_wrapped(self._parse_number)
2020
2021        return self.expression(
2022            exp.TableSample,
2023            method=method,
2024            bucket_numerator=bucket_numerator,
2025            bucket_denominator=bucket_denominator,
2026            bucket_field=bucket_field,
2027            percent=percent,
2028            rows=rows,
2029            size=size,
2030            seed=seed,
2031        )
2032
2033    def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]:
2034        return list(iter(self._parse_pivot, None))
2035
2036    def _parse_pivot(self) -> t.Optional[exp.Expression]:
2037        index = self._index
2038
2039        if self._match(TokenType.PIVOT):
2040            unpivot = False
2041        elif self._match(TokenType.UNPIVOT):
2042            unpivot = True
2043        else:
2044            return None
2045
2046        expressions = []
2047        field = None
2048
2049        if not self._match(TokenType.L_PAREN):
2050            self._retreat(index)
2051            return None
2052
2053        if unpivot:
2054            expressions = self._parse_csv(self._parse_column)
2055        else:
2056            expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function()))
2057
2058        if not self._match(TokenType.FOR):
2059            self.raise_error("Expecting FOR")
2060
2061        value = self._parse_column()
2062
2063        if not self._match(TokenType.IN):
2064            self.raise_error("Expecting IN")
2065
2066        field = self._parse_in(value)
2067
2068        self._match_r_paren()
2069
2070        return self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot)
2071
2072    def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]:
2073        if not skip_where_token and not self._match(TokenType.WHERE):
2074            return None
2075
2076        return self.expression(
2077            exp.Where, comments=self._prev_comments, this=self._parse_conjunction()
2078        )
2079
2080    def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]:
2081        if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
2082            return None
2083
2084        expressions = self._parse_csv(self._parse_conjunction)
2085        grouping_sets = self._parse_grouping_sets()
2086
2087        self._match(TokenType.COMMA)
2088        with_ = self._match(TokenType.WITH)
2089        cube = self._match(TokenType.CUBE) and (
2090            with_ or self._parse_wrapped_csv(self._parse_column)
2091        )
2092
2093        self._match(TokenType.COMMA)
2094        rollup = self._match(TokenType.ROLLUP) and (
2095            with_ or self._parse_wrapped_csv(self._parse_column)
2096        )
2097
2098        return self.expression(
2099            exp.Group,
2100            expressions=expressions,
2101            grouping_sets=grouping_sets,
2102            cube=cube,
2103            rollup=rollup,
2104        )
2105
2106    def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
2107        if not self._match(TokenType.GROUPING_SETS):
2108            return None
2109
2110        return self._parse_wrapped_csv(self._parse_grouping_set)
2111
2112    def _parse_grouping_set(self) -> t.Optional[exp.Expression]:
2113        if self._match(TokenType.L_PAREN):
2114            grouping_set = self._parse_csv(self._parse_column)
2115            self._match_r_paren()
2116            return self.expression(exp.Tuple, expressions=grouping_set)
2117
2118        return self._parse_column()
2119
2120    def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]:
2121        if not skip_having_token and not self._match(TokenType.HAVING):
2122            return None
2123        return self.expression(exp.Having, this=self._parse_conjunction())
2124
2125    def _parse_qualify(self) -> t.Optional[exp.Expression]:
2126        if not self._match(TokenType.QUALIFY):
2127            return None
2128        return self.expression(exp.Qualify, this=self._parse_conjunction())
2129
2130    def _parse_order(
2131        self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
2132    ) -> t.Optional[exp.Expression]:
2133        if not skip_order_token and not self._match(TokenType.ORDER_BY):
2134            return this
2135
2136        return self.expression(
2137            exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
2138        )
2139
2140    def _parse_sort(
2141        self, token_type: TokenType, exp_class: t.Type[exp.Expression]
2142    ) -> t.Optional[exp.Expression]:
2143        if not self._match(token_type):
2144            return None
2145        return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered))
2146
2147    def _parse_ordered(self) -> exp.Expression:
2148        this = self._parse_conjunction()
2149        self._match(TokenType.ASC)
2150        is_desc = self._match(TokenType.DESC)
2151        is_nulls_first = self._match(TokenType.NULLS_FIRST)
2152        is_nulls_last = self._match(TokenType.NULLS_LAST)
2153        desc = is_desc or False
2154        asc = not desc
2155        nulls_first = is_nulls_first or False
2156        explicitly_null_ordered = is_nulls_first or is_nulls_last
2157        if (
2158            not explicitly_null_ordered
2159            and (
2160                (asc and self.null_ordering == "nulls_are_small")
2161                or (desc and self.null_ordering != "nulls_are_small")
2162            )
2163            and self.null_ordering != "nulls_are_last"
2164        ):
2165            nulls_first = True
2166
2167        return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first)
2168
2169    def _parse_limit(
2170        self, this: t.Optional[exp.Expression] = None, top: bool = False
2171    ) -> t.Optional[exp.Expression]:
2172        if self._match(TokenType.TOP if top else TokenType.LIMIT):
2173            limit_paren = self._match(TokenType.L_PAREN)
2174            limit_exp = self.expression(
2175                exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term()
2176            )
2177
2178            if limit_paren:
2179                self._match_r_paren()
2180
2181            return limit_exp
2182
2183        if self._match(TokenType.FETCH):
2184            direction = self._match_set((TokenType.FIRST, TokenType.NEXT))
2185            direction = self._prev.text if direction else "FIRST"
2186            count = self._parse_number()
2187            self._match_set((TokenType.ROW, TokenType.ROWS))
2188            self._match(TokenType.ONLY)
2189            return self.expression(exp.Fetch, direction=direction, count=count)
2190
2191        return this
2192
2193    def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2194        if not self._match_set((TokenType.OFFSET, TokenType.COMMA)):
2195            return this
2196
2197        count = self._parse_number()
2198        self._match_set((TokenType.ROW, TokenType.ROWS))
2199        return self.expression(exp.Offset, this=this, expression=count)
2200
2201    def _parse_lock(self) -> t.Optional[exp.Expression]:
2202        if self._match_text_seq("FOR", "UPDATE"):
2203            return self.expression(exp.Lock, update=True)
2204        if self._match_text_seq("FOR", "SHARE"):
2205            return self.expression(exp.Lock, update=False)
2206
2207        return None
2208
2209    def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2210        if not self._match_set(self.SET_OPERATIONS):
2211            return this
2212
2213        token_type = self._prev.token_type
2214
2215        if token_type == TokenType.UNION:
2216            expression = exp.Union
2217        elif token_type == TokenType.EXCEPT:
2218            expression = exp.Except
2219        else:
2220            expression = exp.Intersect
2221
2222        return self.expression(
2223            expression,
2224            this=this,
2225            distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
2226            expression=self._parse_set_operations(self._parse_select(nested=True)),
2227        )
2228
2229    def _parse_expression(self) -> t.Optional[exp.Expression]:
2230        return self._parse_alias(self._parse_conjunction())
2231
2232    def _parse_conjunction(self) -> t.Optional[exp.Expression]:
2233        return self._parse_tokens(self._parse_equality, self.CONJUNCTION)
2234
2235    def _parse_equality(self) -> t.Optional[exp.Expression]:
2236        return self._parse_tokens(self._parse_comparison, self.EQUALITY)
2237
2238    def _parse_comparison(self) -> t.Optional[exp.Expression]:
2239        return self._parse_tokens(self._parse_range, self.COMPARISON)
2240
2241    def _parse_range(self) -> t.Optional[exp.Expression]:
2242        this = self._parse_bitwise()
2243        negate = self._match(TokenType.NOT)
2244
2245        if self._match_set(self.RANGE_PARSERS):
2246            this = self.RANGE_PARSERS[self._prev.token_type](self, this)
2247        elif self._match(TokenType.ISNULL):
2248            this = self.expression(exp.Is, this=this, expression=exp.Null())
2249
2250        # Postgres supports ISNULL and NOTNULL for conditions.
2251        # https://blog.andreiavram.ro/postgresql-null-composite-type/
2252        if self._match(TokenType.NOTNULL):
2253            this = self.expression(exp.Is, this=this, expression=exp.Null())
2254            this = self.expression(exp.Not, this=this)
2255
2256        if negate:
2257            this = self.expression(exp.Not, this=this)
2258
2259        if self._match(TokenType.IS):
2260            this = self._parse_is(this)
2261
2262        return this
2263
2264    def _parse_is(self, this: t.Optional[exp.Expression]) -> exp.Expression:
2265        negate = self._match(TokenType.NOT)
2266        if self._match(TokenType.DISTINCT_FROM):
2267            klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ
2268            return self.expression(klass, this=this, expression=self._parse_expression())
2269
2270        this = self.expression(
2271            exp.Is,
2272            this=this,
2273            expression=self._parse_null() or self._parse_boolean(),
2274        )
2275        return self.expression(exp.Not, this=this) if negate else this
2276
2277    def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression:
2278        unnest = self._parse_unnest()
2279        if unnest:
2280            this = self.expression(exp.In, this=this, unnest=unnest)
2281        elif self._match(TokenType.L_PAREN):
2282            expressions = self._parse_csv(self._parse_select_or_expression)
2283
2284            if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable):
2285                this = self.expression(exp.In, this=this, query=expressions[0])
2286            else:
2287                this = self.expression(exp.In, this=this, expressions=expressions)
2288
2289            self._match_r_paren()
2290        else:
2291            this = self.expression(exp.In, this=this, field=self._parse_field())
2292
2293        return this
2294
2295    def _parse_between(self, this: exp.Expression) -> exp.Expression:
2296        low = self._parse_bitwise()
2297        self._match(TokenType.AND)
2298        high = self._parse_bitwise()
2299        return self.expression(exp.Between, this=this, low=low, high=high)
2300
2301    def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2302        if not self._match(TokenType.ESCAPE):
2303            return this
2304        return self.expression(exp.Escape, this=this, expression=self._parse_string())
2305
2306    def _parse_bitwise(self) -> t.Optional[exp.Expression]:
2307        this = self._parse_term()
2308
2309        while True:
2310            if self._match_set(self.BITWISE):
2311                this = self.expression(
2312                    self.BITWISE[self._prev.token_type],
2313                    this=this,
2314                    expression=self._parse_term(),
2315                )
2316            elif self._match_pair(TokenType.LT, TokenType.LT):
2317                this = self.expression(
2318                    exp.BitwiseLeftShift, this=this, expression=self._parse_term()
2319                )
2320            elif self._match_pair(TokenType.GT, TokenType.GT):
2321                this = self.expression(
2322                    exp.BitwiseRightShift, this=this, expression=self._parse_term()
2323                )
2324            else:
2325                break
2326
2327        return this
2328
2329    def _parse_term(self) -> t.Optional[exp.Expression]:
2330        return self._parse_tokens(self._parse_factor, self.TERM)
2331
2332    def _parse_factor(self) -> t.Optional[exp.Expression]:
2333        return self._parse_tokens(self._parse_unary, self.FACTOR)
2334
2335    def _parse_unary(self) -> t.Optional[exp.Expression]:
2336        if self._match_set(self.UNARY_PARSERS):
2337            return self.UNARY_PARSERS[self._prev.token_type](self)
2338        return self._parse_at_time_zone(self._parse_type())
2339
2340    def _parse_type(self) -> t.Optional[exp.Expression]:
2341        if self._match(TokenType.INTERVAL):
2342            return self.expression(exp.Interval, this=self._parse_term(), unit=self._parse_var())
2343
2344        index = self._index
2345        type_token = self._parse_types(check_func=True)
2346        this = self._parse_column()
2347
2348        if type_token:
2349            if this and not isinstance(this, exp.Star):
2350                return self.expression(exp.Cast, this=this, to=type_token)
2351            if not type_token.args.get("expressions"):
2352                self._retreat(index)
2353                return self._parse_column()
2354            return type_token
2355
2356        return this
2357
2358    def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]:
2359        index = self._index
2360
2361        if not self._match_set(self.TYPE_TOKENS):
2362            return None
2363
2364        type_token = self._prev.token_type
2365
2366        if type_token == TokenType.PSEUDO_TYPE:
2367            return self.expression(exp.PseudoType, this=self._prev.text)
2368
2369        nested = type_token in self.NESTED_TYPE_TOKENS
2370        is_struct = type_token == TokenType.STRUCT
2371        expressions = None
2372        maybe_func = False
2373
2374        if self._match(TokenType.L_PAREN):
2375            if is_struct:
2376                expressions = self._parse_csv(self._parse_struct_kwargs)
2377            elif nested:
2378                expressions = self._parse_csv(self._parse_types)
2379            else:
2380                expressions = self._parse_csv(self._parse_conjunction)
2381
2382            if not expressions:
2383                self._retreat(index)
2384                return None
2385
2386            self._match_r_paren()
2387            maybe_func = True
2388
2389        if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2390            this = exp.DataType(
2391                this=exp.DataType.Type.ARRAY,
2392                expressions=[exp.DataType.build(type_token.value, expressions=expressions)],
2393                nested=True,
2394            )
2395
2396            while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2397                this = exp.DataType(
2398                    this=exp.DataType.Type.ARRAY,
2399                    expressions=[this],
2400                    nested=True,
2401                )
2402
2403            return this
2404
2405        if self._match(TokenType.L_BRACKET):
2406            self._retreat(index)
2407            return None
2408
2409        values: t.Optional[t.List[t.Optional[exp.Expression]]] = None
2410        if nested and self._match(TokenType.LT):
2411            if is_struct:
2412                expressions = self._parse_csv(self._parse_struct_kwargs)
2413            else:
2414                expressions = self._parse_csv(self._parse_types)
2415
2416            if not self._match(TokenType.GT):
2417                self.raise_error("Expecting >")
2418
2419            if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)):
2420                values = self._parse_csv(self._parse_conjunction)
2421                self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN))
2422
2423        value: t.Optional[exp.Expression] = None
2424        if type_token in self.TIMESTAMPS:
2425            if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ:
2426                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions)
2427            elif (
2428                self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ
2429            ):
2430                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions)
2431            elif self._match(TokenType.WITHOUT_TIME_ZONE):
2432                if type_token == TokenType.TIME:
2433                    value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions)
2434                else:
2435                    value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2436
2437            maybe_func = maybe_func and value is None
2438
2439            if value is None:
2440                value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2441        elif type_token == TokenType.INTERVAL:
2442            value = self.expression(exp.Interval, unit=self._parse_var())
2443
2444        if maybe_func and check_func:
2445            index2 = self._index
2446            peek = self._parse_string()
2447
2448            if not peek:
2449                self._retreat(index)
2450                return None
2451
2452            self._retreat(index2)
2453
2454        if value:
2455            return value
2456
2457        return exp.DataType(
2458            this=exp.DataType.Type[type_token.value.upper()],
2459            expressions=expressions,
2460            nested=nested,
2461            values=values,
2462        )
2463
2464    def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]:
2465        if self._curr and self._curr.token_type in self.TYPE_TOKENS:
2466            return self._parse_types()
2467
2468        this = self._parse_id_var()
2469        self._match(TokenType.COLON)
2470        data_type = self._parse_types()
2471
2472        if not data_type:
2473            return None
2474        return self.expression(exp.StructKwarg, this=this, expression=data_type)
2475
2476    def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2477        if not self._match(TokenType.AT_TIME_ZONE):
2478            return this
2479        return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary())
2480
2481    def _parse_column(self) -> t.Optional[exp.Expression]:
2482        this = self._parse_field()
2483        if isinstance(this, exp.Identifier):
2484            this = self.expression(exp.Column, this=this)
2485        elif not this:
2486            return self._parse_bracket(this)
2487        this = self._parse_bracket(this)
2488
2489        while self._match_set(self.COLUMN_OPERATORS):
2490            op_token = self._prev.token_type
2491            op = self.COLUMN_OPERATORS.get(op_token)
2492
2493            if op_token == TokenType.DCOLON:
2494                field = self._parse_types()
2495                if not field:
2496                    self.raise_error("Expected type")
2497            elif op:
2498                self._advance()
2499                value = self._prev.text
2500                field = (
2501                    exp.Literal.number(value)
2502                    if self._prev.token_type == TokenType.NUMBER
2503                    else exp.Literal.string(value)
2504                )
2505            else:
2506                field = self._parse_star() or self._parse_function() or self._parse_id_var()
2507
2508            if isinstance(field, exp.Func):
2509                # bigquery allows function calls like x.y.count(...)
2510                # SAFE.SUBSTR(...)
2511                # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
2512                this = self._replace_columns_with_dots(this)
2513
2514            if op:
2515                this = op(self, this, field)
2516            elif isinstance(this, exp.Column) and not this.table:
2517                this = self.expression(exp.Column, this=field, table=this.this)
2518            else:
2519                this = self.expression(exp.Dot, this=this, expression=field)
2520            this = self._parse_bracket(this)
2521
2522        return this
2523
2524    def _parse_primary(self) -> t.Optional[exp.Expression]:
2525        if self._match_set(self.PRIMARY_PARSERS):
2526            token_type = self._prev.token_type
2527            primary = self.PRIMARY_PARSERS[token_type](self, self._prev)
2528
2529            if token_type == TokenType.STRING:
2530                expressions = [primary]
2531                while self._match(TokenType.STRING):
2532                    expressions.append(exp.Literal.string(self._prev.text))
2533                if len(expressions) > 1:
2534                    return self.expression(exp.Concat, expressions=expressions)
2535            return primary
2536
2537        if self._match_pair(TokenType.DOT, TokenType.NUMBER):
2538            return exp.Literal.number(f"0.{self._prev.text}")
2539
2540        if self._match(TokenType.L_PAREN):
2541            comments = self._prev_comments
2542            query = self._parse_select()
2543
2544            if query:
2545                expressions = [query]
2546            else:
2547                expressions = self._parse_csv(
2548                    lambda: self._parse_alias(self._parse_conjunction(), explicit=True)
2549                )
2550
2551            this = seq_get(expressions, 0)
2552            self._parse_query_modifiers(this)
2553            self._match_r_paren()
2554
2555            if isinstance(this, exp.Subqueryable):
2556                this = self._parse_set_operations(
2557                    self._parse_subquery(this=this, parse_alias=False)
2558                )
2559            elif len(expressions) > 1:
2560                this = self.expression(exp.Tuple, expressions=expressions)
2561            else:
2562                this = self.expression(exp.Paren, this=this)
2563
2564            if this and comments:
2565                this.comments = comments
2566
2567            return this
2568
2569        return None
2570
2571    def _parse_field(self, any_token: bool = False) -> t.Optional[exp.Expression]:
2572        return self._parse_primary() or self._parse_function() or self._parse_id_var(any_token)
2573
2574    def _parse_function(
2575        self, functions: t.Optional[t.Dict[str, t.Callable]] = None
2576    ) -> t.Optional[exp.Expression]:
2577        if not self._curr:
2578            return None
2579
2580        token_type = self._curr.token_type
2581
2582        if self._match_set(self.NO_PAREN_FUNCTION_PARSERS):
2583            return self.NO_PAREN_FUNCTION_PARSERS[token_type](self)
2584
2585        if not self._next or self._next.token_type != TokenType.L_PAREN:
2586            if token_type in self.NO_PAREN_FUNCTIONS:
2587                self._advance()
2588                return self.expression(self.NO_PAREN_FUNCTIONS[token_type])
2589
2590            return None
2591
2592        if token_type not in self.FUNC_TOKENS:
2593            return None
2594
2595        this = self._curr.text
2596        upper = this.upper()
2597        self._advance(2)
2598
2599        parser = self.FUNCTION_PARSERS.get(upper)
2600
2601        if parser:
2602            this = parser(self)
2603        else:
2604            subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
2605
2606            if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH):
2607                this = self.expression(subquery_predicate, this=self._parse_select())
2608                self._match_r_paren()
2609                return this
2610
2611            if functions is None:
2612                functions = self.FUNCTIONS
2613
2614            function = functions.get(upper)
2615            args = self._parse_csv(self._parse_lambda)
2616
2617            if function:
2618                # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the
2619                # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists.
2620                if count_params(function) == 2:
2621                    params = None
2622                    if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN):
2623                        params = self._parse_csv(self._parse_lambda)
2624
2625                    this = function(args, params)
2626                else:
2627                    this = function(args)
2628
2629                self.validate_expression(this, args)
2630            else:
2631                this = self.expression(exp.Anonymous, this=this, expressions=args)
2632
2633        self._match_r_paren(this)
2634        return self._parse_window(this)
2635
2636    def _parse_user_defined_function(
2637        self, kind: t.Optional[TokenType] = None
2638    ) -> t.Optional[exp.Expression]:
2639        this = self._parse_id_var()
2640
2641        while self._match(TokenType.DOT):
2642            this = self.expression(exp.Dot, this=this, expression=self._parse_id_var())
2643
2644        if not self._match(TokenType.L_PAREN):
2645            return this
2646
2647        expressions = self._parse_csv(self._parse_udf_kwarg)
2648        self._match_r_paren()
2649        return self.expression(
2650            exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True
2651        )
2652
2653    def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]:
2654        literal = self._parse_primary()
2655        if literal:
2656            return self.expression(exp.Introducer, this=token.text, expression=literal)
2657
2658        return self.expression(exp.Identifier, this=token.text)
2659
2660    def _parse_national(self, token: Token) -> exp.Expression:
2661        return self.expression(exp.National, this=exp.Literal.string(token.text))
2662
2663    def _parse_session_parameter(self) -> exp.Expression:
2664        kind = None
2665        this = self._parse_id_var() or self._parse_primary()
2666
2667        if this and self._match(TokenType.DOT):
2668            kind = this.name
2669            this = self._parse_var() or self._parse_primary()
2670
2671        return self.expression(exp.SessionParameter, this=this, kind=kind)
2672
2673    def _parse_udf_kwarg(self) -> t.Optional[exp.Expression]:
2674        this = self._parse_id_var()
2675        kind = self._parse_types()
2676
2677        if not kind:
2678            return this
2679
2680        return self.expression(exp.UserDefinedFunctionKwarg, this=this, kind=kind)
2681
2682    def _parse_lambda(self) -> t.Optional[exp.Expression]:
2683        index = self._index
2684
2685        if self._match(TokenType.L_PAREN):
2686            expressions = self._parse_csv(self._parse_id_var)
2687
2688            if not self._match(TokenType.R_PAREN):
2689                self._retreat(index)
2690        else:
2691            expressions = [self._parse_id_var()]
2692
2693        if self._match_set(self.LAMBDAS):
2694            return self.LAMBDAS[self._prev.token_type](self, expressions)
2695
2696        self._retreat(index)
2697
2698        this: t.Optional[exp.Expression]
2699
2700        if self._match(TokenType.DISTINCT):
2701            this = self.expression(
2702                exp.Distinct, expressions=self._parse_csv(self._parse_conjunction)
2703            )
2704        else:
2705            this = self._parse_select_or_expression()
2706
2707        if self._match(TokenType.IGNORE_NULLS):
2708            this = self.expression(exp.IgnoreNulls, this=this)
2709        else:
2710            self._match(TokenType.RESPECT_NULLS)
2711
2712        return self._parse_limit(self._parse_order(this))
2713
2714    def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2715        index = self._index
2716        if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT):
2717            self._retreat(index)
2718            return this
2719
2720        args = self._parse_csv(
2721            lambda: self._parse_constraint()
2722            or self._parse_column_def(self._parse_field(any_token=True))
2723        )
2724        self._match_r_paren()
2725        return self.expression(exp.Schema, this=this, expressions=args)
2726
2727    def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2728        kind = self._parse_types()
2729
2730        constraints = []
2731        while True:
2732            constraint = self._parse_column_constraint()
2733            if not constraint:
2734                break
2735            constraints.append(constraint)
2736
2737        if not kind and not constraints:
2738            return this
2739
2740        return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints)
2741
2742    def _parse_column_constraint(self) -> t.Optional[exp.Expression]:
2743        this = self._parse_references()
2744
2745        if this:
2746            return this
2747
2748        if self._match(TokenType.CONSTRAINT):
2749            this = self._parse_id_var()
2750
2751        kind: exp.Expression
2752
2753        if self._match_set((TokenType.AUTO_INCREMENT, TokenType.IDENTITY)):
2754            start = None
2755            increment = None
2756
2757            if self._match(TokenType.L_PAREN, advance=False):
2758                args = self._parse_wrapped_csv(self._parse_bitwise)
2759                start = seq_get(args, 0)
2760                increment = seq_get(args, 1)
2761            elif self._match_text_seq("START"):
2762                start = self._parse_bitwise()
2763                self._match_text_seq("INCREMENT")
2764                increment = self._parse_bitwise()
2765
2766            if start and increment:
2767                kind = exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment)
2768            else:
2769                kind = exp.AutoIncrementColumnConstraint()
2770        elif self._match(TokenType.CHECK):
2771            constraint = self._parse_wrapped(self._parse_conjunction)
2772            kind = self.expression(exp.CheckColumnConstraint, this=constraint)
2773        elif self._match(TokenType.COLLATE):
2774            kind = self.expression(exp.CollateColumnConstraint, this=self._parse_var())
2775        elif self._match(TokenType.ENCODE):
2776            kind = self.expression(exp.EncodeColumnConstraint, this=self._parse_var())
2777        elif self._match(TokenType.DEFAULT):
2778            kind = self.expression(exp.DefaultColumnConstraint, this=self._parse_bitwise())
2779        elif self._match_pair(TokenType.NOT, TokenType.NULL):
2780            kind = exp.NotNullColumnConstraint()
2781        elif self._match(TokenType.NULL):
2782            kind = exp.NotNullColumnConstraint(allow_null=True)
2783        elif self._match(TokenType.SCHEMA_COMMENT):
2784            kind = self.expression(exp.CommentColumnConstraint, this=self._parse_string())
2785        elif self._match(TokenType.PRIMARY_KEY):
2786            desc = None
2787            if self._match(TokenType.ASC) or self._match(TokenType.DESC):
2788                desc = self._prev.token_type == TokenType.DESC
2789            kind = exp.PrimaryKeyColumnConstraint(desc=desc)
2790        elif self._match(TokenType.UNIQUE):
2791            kind = exp.UniqueColumnConstraint()
2792        elif self._match(TokenType.GENERATED):
2793            if self._match(TokenType.BY_DEFAULT):
2794                kind = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False)
2795            else:
2796                self._match(TokenType.ALWAYS)
2797                kind = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True)
2798            self._match_pair(TokenType.ALIAS, TokenType.IDENTITY)
2799
2800            if self._match(TokenType.L_PAREN):
2801                if self._match_text_seq("START", "WITH"):
2802                    kind.set("start", self._parse_bitwise())
2803                if self._match_text_seq("INCREMENT", "BY"):
2804                    kind.set("increment", self._parse_bitwise())
2805
2806                self._match_r_paren()
2807        else:
2808            return this
2809
2810        return self.expression(exp.ColumnConstraint, this=this, kind=kind)
2811
2812    def _parse_constraint(self) -> t.Optional[exp.Expression]:
2813        if not self._match(TokenType.CONSTRAINT):
2814            return self._parse_unnamed_constraint()
2815
2816        this = self._parse_id_var()
2817        expressions = []
2818
2819        while True:
2820            constraint = self._parse_unnamed_constraint() or self._parse_function()
2821            if not constraint:
2822                break
2823            expressions.append(constraint)
2824
2825        return self.expression(exp.Constraint, this=this, expressions=expressions)
2826
2827    def _parse_unnamed_constraint(self) -> t.Optional[exp.Expression]:
2828        if not self._match_set(self.CONSTRAINT_PARSERS):
2829            return None
2830        return self.CONSTRAINT_PARSERS[self._prev.token_type](self)
2831
2832    def _parse_unique(self) -> exp.Expression:
2833        return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars())
2834
2835    def _parse_key_constraint_options(self) -> t.List[str]:
2836        options = []
2837        while True:
2838            if not self._curr:
2839                break
2840
2841            if self._match(TokenType.ON):
2842                action = None
2843                on = self._advance_any() and self._prev.text
2844
2845                if self._match(TokenType.NO_ACTION):
2846                    action = "NO ACTION"
2847                elif self._match(TokenType.CASCADE):
2848                    action = "CASCADE"
2849                elif self._match_pair(TokenType.SET, TokenType.NULL):
2850                    action = "SET NULL"
2851                elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
2852                    action = "SET DEFAULT"
2853                else:
2854                    self.raise_error("Invalid key constraint")
2855
2856                options.append(f"ON {on} {action}")
2857            elif self._match_text_seq("NOT", "ENFORCED"):
2858                options.append("NOT ENFORCED")
2859            elif self._match_text_seq("DEFERRABLE"):
2860                options.append("DEFERRABLE")
2861            elif self._match_text_seq("INITIALLY", "DEFERRED"):
2862                options.append("INITIALLY DEFERRED")
2863            elif self._match_text_seq("NORELY"):
2864                options.append("NORELY")
2865            elif self._match_text_seq("MATCH", "FULL"):
2866                options.append("MATCH FULL")
2867            else:
2868                break
2869
2870        return options
2871
2872    def _parse_references(self) -> t.Optional[exp.Expression]:
2873        if not self._match(TokenType.REFERENCES):
2874            return None
2875
2876        expressions = None
2877        this = self._parse_id_var()
2878
2879        if self._match(TokenType.L_PAREN, advance=False):
2880            expressions = self._parse_wrapped_id_vars()
2881
2882        options = self._parse_key_constraint_options()
2883        return self.expression(exp.Reference, this=this, expressions=expressions, options=options)
2884
2885    def _parse_foreign_key(self) -> exp.Expression:
2886        expressions = self._parse_wrapped_id_vars()
2887        reference = self._parse_references()
2888        options = {}
2889
2890        while self._match(TokenType.ON):
2891            if not self._match_set((TokenType.DELETE, TokenType.UPDATE)):
2892                self.raise_error("Expected DELETE or UPDATE")
2893
2894            kind = self._prev.text.lower()
2895
2896            if self._match(TokenType.NO_ACTION):
2897                action = "NO ACTION"
2898            elif self._match(TokenType.SET):
2899                self._match_set((TokenType.NULL, TokenType.DEFAULT))
2900                action = "SET " + self._prev.text.upper()
2901            else:
2902                self._advance()
2903                action = self._prev.text.upper()
2904
2905            options[kind] = action
2906
2907        return self.expression(
2908            exp.ForeignKey, expressions=expressions, reference=reference, **options  # type: ignore
2909        )
2910
2911    def _parse_primary_key(self) -> exp.Expression:
2912        expressions = self._parse_wrapped_id_vars()
2913        options = self._parse_key_constraint_options()
2914        return self.expression(exp.PrimaryKey, expressions=expressions, options=options)
2915
2916    def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2917        if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)):
2918            return this
2919
2920        bracket_kind = self._prev.token_type
2921        expressions: t.List[t.Optional[exp.Expression]]
2922
2923        if self._match(TokenType.COLON):
2924            expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())]
2925        else:
2926            expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction()))
2927
2928        # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
2929        if bracket_kind == TokenType.L_BRACE:
2930            this = self.expression(exp.Struct, expressions=expressions)
2931        elif not this or this.name.upper() == "ARRAY":
2932            this = self.expression(exp.Array, expressions=expressions)
2933        else:
2934            expressions = apply_index_offset(expressions, -self.index_offset)
2935            this = self.expression(exp.Bracket, this=this, expressions=expressions)
2936
2937        if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET:
2938            self.raise_error("Expected ]")
2939        elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE:
2940            self.raise_error("Expected }")
2941
2942        this.comments = self._prev_comments
2943        return self._parse_bracket(this)
2944
2945    def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2946        if self._match(TokenType.COLON):
2947            return self.expression(exp.Slice, this=this, expression=self._parse_conjunction())
2948        return this
2949
2950    def _parse_case(self) -> t.Optional[exp.Expression]:
2951        ifs = []
2952        default = None
2953
2954        expression = self._parse_conjunction()
2955
2956        while self._match(TokenType.WHEN):
2957            this = self._parse_conjunction()
2958            self._match(TokenType.THEN)
2959            then = self._parse_conjunction()
2960            ifs.append(self.expression(exp.If, this=this, true=then))
2961
2962        if self._match(TokenType.ELSE):
2963            default = self._parse_conjunction()
2964
2965        if not self._match(TokenType.END):
2966            self.raise_error("Expected END after CASE", self._prev)
2967
2968        return self._parse_window(
2969            self.expression(exp.Case, this=expression, ifs=ifs, default=default)
2970        )
2971
2972    def _parse_if(self) -> t.Optional[exp.Expression]:
2973        if self._match(TokenType.L_PAREN):
2974            args = self._parse_csv(self._parse_conjunction)
2975            this = exp.If.from_arg_list(args)
2976            self.validate_expression(this, args)
2977            self._match_r_paren()
2978        else:
2979            condition = self._parse_conjunction()
2980            self._match(TokenType.THEN)
2981            true = self._parse_conjunction()
2982            false = self._parse_conjunction() if self._match(TokenType.ELSE) else None
2983            self._match(TokenType.END)
2984            this = self.expression(exp.If, this=condition, true=true, false=false)
2985
2986        return self._parse_window(this)
2987
2988    def _parse_extract(self) -> exp.Expression:
2989        this = self._parse_function() or self._parse_var() or self._parse_type()
2990
2991        if self._match(TokenType.FROM):
2992            return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
2993
2994        if not self._match(TokenType.COMMA):
2995            self.raise_error("Expected FROM or comma after EXTRACT", self._prev)
2996
2997        return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
2998
2999    def _parse_cast(self, strict: bool) -> exp.Expression:
3000        this = self._parse_conjunction()
3001
3002        if not self._match(TokenType.ALIAS):
3003            self.raise_error("Expected AS after CAST")
3004
3005        to = self._parse_types()
3006
3007        if not to:
3008            self.raise_error("Expected TYPE after CAST")
3009        elif to.this == exp.DataType.Type.CHAR:
3010            if self._match(TokenType.CHARACTER_SET):
3011                to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())
3012
3013        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3014
3015    def _parse_string_agg(self) -> exp.Expression:
3016        expression: t.Optional[exp.Expression]
3017
3018        if self._match(TokenType.DISTINCT):
3019            args = self._parse_csv(self._parse_conjunction)
3020            expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)])
3021        else:
3022            args = self._parse_csv(self._parse_conjunction)
3023            expression = seq_get(args, 0)
3024
3025        index = self._index
3026        if not self._match(TokenType.R_PAREN):
3027            # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]])
3028            order = self._parse_order(this=expression)
3029            return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3030
3031        # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]).
3032        # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that
3033        # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them.
3034        if not self._match(TokenType.WITHIN_GROUP):
3035            self._retreat(index)
3036            this = exp.GroupConcat.from_arg_list(args)
3037            self.validate_expression(this, args)
3038            return this
3039
3040        self._match_l_paren()  # The corresponding match_r_paren will be called in parse_function (caller)
3041        order = self._parse_order(this=expression)
3042        return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3043
3044    def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]:
3045        to: t.Optional[exp.Expression]
3046        this = self._parse_column()
3047
3048        if self._match(TokenType.USING):
3049            to = self.expression(exp.CharacterSet, this=self._parse_var())
3050        elif self._match(TokenType.COMMA):
3051            to = self._parse_types()
3052        else:
3053            to = None
3054
3055        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3056
3057    def _parse_position(self, haystack_first: bool = False) -> exp.Expression:
3058        args = self._parse_csv(self._parse_bitwise)
3059
3060        if self._match(TokenType.IN):
3061            return self.expression(
3062                exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0)
3063            )
3064
3065        if haystack_first:
3066            haystack = seq_get(args, 0)
3067            needle = seq_get(args, 1)
3068        else:
3069            needle = seq_get(args, 0)
3070            haystack = seq_get(args, 1)
3071
3072        this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2))
3073
3074        self.validate_expression(this, args)
3075
3076        return this
3077
3078    def _parse_join_hint(self, func_name: str) -> exp.Expression:
3079        args = self._parse_csv(self._parse_table)
3080        return exp.JoinHint(this=func_name.upper(), expressions=args)
3081
3082    def _parse_substring(self) -> exp.Expression:
3083        # Postgres supports the form: substring(string [from int] [for int])
3084        # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
3085
3086        args = self._parse_csv(self._parse_bitwise)
3087
3088        if self._match(TokenType.FROM):
3089            args.append(self._parse_bitwise())
3090            if self._match(TokenType.FOR):
3091                args.append(self._parse_bitwise())
3092
3093        this = exp.Substring.from_arg_list(args)
3094        self.validate_expression(this, args)
3095
3096        return this
3097
3098    def _parse_trim(self) -> exp.Expression:
3099        # https://www.w3resource.com/sql/character-functions/trim.php
3100        # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html
3101
3102        position = None
3103        collation = None
3104
3105        if self._match_set(self.TRIM_TYPES):
3106            position = self._prev.text.upper()
3107
3108        expression = self._parse_term()
3109        if self._match_set((TokenType.FROM, TokenType.COMMA)):
3110            this = self._parse_term()
3111        else:
3112            this = expression
3113            expression = None
3114
3115        if self._match(TokenType.COLLATE):
3116            collation = self._parse_term()
3117
3118        return self.expression(
3119            exp.Trim,
3120            this=this,
3121            position=position,
3122            expression=expression,
3123            collation=collation,
3124        )
3125
3126    def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3127        return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window)
3128
3129    def _parse_named_window(self) -> t.Optional[exp.Expression]:
3130        return self._parse_window(self._parse_id_var(), alias=True)
3131
3132    def _parse_window(
3133        self, this: t.Optional[exp.Expression], alias: bool = False
3134    ) -> t.Optional[exp.Expression]:
3135        if self._match(TokenType.FILTER):
3136            where = self._parse_wrapped(self._parse_where)
3137            this = self.expression(exp.Filter, this=this, expression=where)
3138
3139        # T-SQL allows the OVER (...) syntax after WITHIN GROUP.
3140        # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
3141        if self._match(TokenType.WITHIN_GROUP):
3142            order = self._parse_wrapped(self._parse_order)
3143            this = self.expression(exp.WithinGroup, this=this, expression=order)
3144
3145        # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
3146        # Some dialects choose to implement and some do not.
3147        # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
3148
3149        # There is some code above in _parse_lambda that handles
3150        #   SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ...
3151
3152        # The below changes handle
3153        #   SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ...
3154
3155        # Oracle allows both formats
3156        #   (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html)
3157        #   and Snowflake chose to do the same for familiarity
3158        #   https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes
3159        if self._match(TokenType.IGNORE_NULLS):
3160            this = self.expression(exp.IgnoreNulls, this=this)
3161        elif self._match(TokenType.RESPECT_NULLS):
3162            this = self.expression(exp.RespectNulls, this=this)
3163
3164        # bigquery select from window x AS (partition by ...)
3165        if alias:
3166            self._match(TokenType.ALIAS)
3167        elif not self._match(TokenType.OVER):
3168            return this
3169
3170        if not self._match(TokenType.L_PAREN):
3171            return self.expression(exp.Window, this=this, alias=self._parse_id_var(False))
3172
3173        window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
3174        partition = self._parse_partition_by()
3175        order = self._parse_order()
3176        kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text
3177
3178        if kind:
3179            self._match(TokenType.BETWEEN)
3180            start = self._parse_window_spec()
3181            self._match(TokenType.AND)
3182            end = self._parse_window_spec()
3183
3184            spec = self.expression(
3185                exp.WindowSpec,
3186                kind=kind,
3187                start=start["value"],
3188                start_side=start["side"],
3189                end=end["value"],
3190                end_side=end["side"],
3191            )
3192        else:
3193            spec = None
3194
3195        self._match_r_paren()
3196
3197        return self.expression(
3198            exp.Window,
3199            this=this,
3200            partition_by=partition,
3201            order=order,
3202            spec=spec,
3203            alias=window_alias,
3204        )
3205
3206    def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]:
3207        self._match(TokenType.BETWEEN)
3208
3209        return {
3210            "value": (
3211                self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text
3212            )
3213            or self._parse_bitwise(),
3214            "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text,
3215        }
3216
3217    def _parse_alias(
3218        self, this: t.Optional[exp.Expression], explicit: bool = False
3219    ) -> t.Optional[exp.Expression]:
3220        any_token = self._match(TokenType.ALIAS)
3221
3222        if explicit and not any_token:
3223            return this
3224
3225        if self._match(TokenType.L_PAREN):
3226            aliases = self.expression(
3227                exp.Aliases,
3228                this=this,
3229                expressions=self._parse_csv(lambda: self._parse_id_var(any_token)),
3230            )
3231            self._match_r_paren(aliases)
3232            return aliases
3233
3234        alias = self._parse_id_var(any_token)
3235
3236        if alias:
3237            return self.expression(exp.Alias, this=this, alias=alias)
3238
3239        return this
3240
3241    def _parse_id_var(
3242        self,
3243        any_token: bool = True,
3244        tokens: t.Optional[t.Collection[TokenType]] = None,
3245        prefix_tokens: t.Optional[t.Collection[TokenType]] = None,
3246    ) -> t.Optional[exp.Expression]:
3247        identifier = self._parse_identifier()
3248
3249        if identifier:
3250            return identifier
3251
3252        prefix = ""
3253
3254        if prefix_tokens:
3255            while self._match_set(prefix_tokens):
3256                prefix += self._prev.text
3257
3258        if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS):
3259            quoted = self._prev.token_type == TokenType.STRING
3260            return exp.Identifier(this=prefix + self._prev.text, quoted=quoted)
3261
3262        return None
3263
3264    def _parse_string(self) -> t.Optional[exp.Expression]:
3265        if self._match(TokenType.STRING):
3266            return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev)
3267        return self._parse_placeholder()
3268
3269    def _parse_number(self) -> t.Optional[exp.Expression]:
3270        if self._match(TokenType.NUMBER):
3271            return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev)
3272        return self._parse_placeholder()
3273
3274    def _parse_identifier(self) -> t.Optional[exp.Expression]:
3275        if self._match(TokenType.IDENTIFIER):
3276            return self.expression(exp.Identifier, this=self._prev.text, quoted=True)
3277        return self._parse_placeholder()
3278
3279    def _parse_var(self, any_token: bool = False) -> t.Optional[exp.Expression]:
3280        if (any_token and self._advance_any()) or self._match(TokenType.VAR):
3281            return self.expression(exp.Var, this=self._prev.text)
3282        return self._parse_placeholder()
3283
3284    def _advance_any(self) -> t.Optional[Token]:
3285        if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS:
3286            self._advance()
3287            return self._prev
3288        return None
3289
3290    def _parse_var_or_string(self) -> t.Optional[exp.Expression]:
3291        return self._parse_var() or self._parse_string()
3292
3293    def _parse_null(self) -> t.Optional[exp.Expression]:
3294        if self._match(TokenType.NULL):
3295            return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev)
3296        return None
3297
3298    def _parse_boolean(self) -> t.Optional[exp.Expression]:
3299        if self._match(TokenType.TRUE):
3300            return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev)
3301        if self._match(TokenType.FALSE):
3302            return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev)
3303        return None
3304
3305    def _parse_star(self) -> t.Optional[exp.Expression]:
3306        if self._match(TokenType.STAR):
3307            return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev)
3308        return None
3309
3310    def _parse_placeholder(self) -> t.Optional[exp.Expression]:
3311        if self._match_set(self.PLACEHOLDER_PARSERS):
3312            placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self)
3313            if placeholder:
3314                return placeholder
3315            self._advance(-1)
3316        return None
3317
3318    def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3319        if not self._match(TokenType.EXCEPT):
3320            return None
3321        if self._match(TokenType.L_PAREN, advance=False):
3322            return self._parse_wrapped_csv(self._parse_column)
3323        return self._parse_csv(self._parse_column)
3324
3325    def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3326        if not self._match(TokenType.REPLACE):
3327            return None
3328        if self._match(TokenType.L_PAREN, advance=False):
3329            return self._parse_wrapped_csv(self._parse_expression)
3330        return self._parse_csv(self._parse_expression)
3331
3332    def _parse_csv(
3333        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
3334    ) -> t.List[t.Optional[exp.Expression]]:
3335        parse_result = parse_method()
3336        items = [parse_result] if parse_result is not None else []
3337
3338        while self._match(sep):
3339            if parse_result and self._prev_comments:
3340                parse_result.comments = self._prev_comments
3341
3342            parse_result = parse_method()
3343            if parse_result is not None:
3344                items.append(parse_result)
3345
3346        return items
3347
3348    def _parse_tokens(
3349        self, parse_method: t.Callable, expressions: t.Dict
3350    ) -> t.Optional[exp.Expression]:
3351        this = parse_method()
3352
3353        while self._match_set(expressions):
3354            this = self.expression(
3355                expressions[self._prev.token_type],
3356                this=this,
3357                comments=self._prev_comments,
3358                expression=parse_method(),
3359            )
3360
3361        return this
3362
3363    def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]:
3364        return self._parse_wrapped_csv(self._parse_id_var)
3365
3366    def _parse_wrapped_csv(
3367        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
3368    ) -> t.List[t.Optional[exp.Expression]]:
3369        return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep))
3370
3371    def _parse_wrapped(self, parse_method: t.Callable) -> t.Any:
3372        self._match_l_paren()
3373        parse_result = parse_method()
3374        self._match_r_paren()
3375        return parse_result
3376
3377    def _parse_select_or_expression(self) -> t.Optional[exp.Expression]:
3378        return self._parse_select() or self._parse_expression()
3379
3380    def _parse_ddl_select(self) -> t.Optional[exp.Expression]:
3381        return self._parse_set_operations(
3382            self._parse_select(nested=True, parse_subquery_alias=False)
3383        )
3384
3385    def _parse_transaction(self) -> exp.Expression:
3386        this = None
3387        if self._match_texts(self.TRANSACTION_KIND):
3388            this = self._prev.text
3389
3390        self._match_texts({"TRANSACTION", "WORK"})
3391
3392        modes = []
3393        while True:
3394            mode = []
3395            while self._match(TokenType.VAR):
3396                mode.append(self._prev.text)
3397
3398            if mode:
3399                modes.append(" ".join(mode))
3400            if not self._match(TokenType.COMMA):
3401                break
3402
3403        return self.expression(exp.Transaction, this=this, modes=modes)
3404
3405    def _parse_commit_or_rollback(self) -> exp.Expression:
3406        chain = None
3407        savepoint = None
3408        is_rollback = self._prev.token_type == TokenType.ROLLBACK
3409
3410        self._match_texts({"TRANSACTION", "WORK"})
3411
3412        if self._match_text_seq("TO"):
3413            self._match_text_seq("SAVEPOINT")
3414            savepoint = self._parse_id_var()
3415
3416        if self._match(TokenType.AND):
3417            chain = not self._match_text_seq("NO")
3418            self._match_text_seq("CHAIN")
3419
3420        if is_rollback:
3421            return self.expression(exp.Rollback, savepoint=savepoint)
3422        return self.expression(exp.Commit, chain=chain)
3423
3424    def _parse_add_column(self) -> t.Optional[exp.Expression]:
3425        if not self._match_text_seq("ADD"):
3426            return None
3427
3428        self._match(TokenType.COLUMN)
3429        exists_column = self._parse_exists(not_=True)
3430        expression = self._parse_column_def(self._parse_field(any_token=True))
3431
3432        if expression:
3433            expression.set("exists", exists_column)
3434
3435        return expression
3436
3437    def _parse_drop_column(self) -> t.Optional[exp.Expression]:
3438        return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN")
3439
3440    # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html
3441    def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression:
3442        return self.expression(
3443            exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists
3444        )
3445
3446    def _parse_add_constraint(self) -> t.Optional[exp.Expression]:
3447        this = None
3448        kind = self._prev.token_type
3449
3450        if kind == TokenType.CONSTRAINT:
3451            this = self._parse_id_var()
3452
3453            if self._match(TokenType.CHECK):
3454                expression = self._parse_wrapped(self._parse_conjunction)
3455                enforced = self._match_text_seq("ENFORCED")
3456
3457                return self.expression(
3458                    exp.AddConstraint, this=this, expression=expression, enforced=enforced
3459                )
3460
3461        if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY):
3462            expression = self._parse_foreign_key()
3463        elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY):
3464            expression = self._parse_primary_key()
3465
3466        return self.expression(exp.AddConstraint, this=this, expression=expression)
3467
3468    def _parse_alter(self) -> t.Optional[exp.Expression]:
3469        if not self._match(TokenType.TABLE):
3470            return self._parse_as_command(self._prev)
3471
3472        exists = self._parse_exists()
3473        this = self._parse_table(schema=True)
3474
3475        actions: t.Optional[exp.Expression | t.List[t.Optional[exp.Expression]]] = None
3476
3477        index = self._index
3478        if self._match(TokenType.DELETE):
3479            actions = [self.expression(exp.Delete, where=self._parse_where())]
3480        elif self._match_text_seq("ADD"):
3481            if self._match_set(self.ADD_CONSTRAINT_TOKENS):
3482                actions = self._parse_csv(self._parse_add_constraint)
3483            else:
3484                self._retreat(index)
3485                actions = self._parse_csv(self._parse_add_column)
3486        elif self._match_text_seq("DROP"):
3487            partition_exists = self._parse_exists()
3488
3489            if self._match(TokenType.PARTITION, advance=False):
3490                actions = self._parse_csv(
3491                    lambda: self._parse_drop_partition(exists=partition_exists)
3492                )
3493            else:
3494                self._retreat(index)
3495                actions = self._parse_csv(self._parse_drop_column)
3496        elif self._match_text_seq("RENAME", "TO"):
3497            actions = self.expression(exp.RenameTable, this=self._parse_table(schema=True))
3498        elif self._match_text_seq("ALTER"):
3499            self._match(TokenType.COLUMN)
3500            column = self._parse_field(any_token=True)
3501
3502            if self._match_pair(TokenType.DROP, TokenType.DEFAULT):
3503                actions = self.expression(exp.AlterColumn, this=column, drop=True)
3504            elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
3505                actions = self.expression(
3506                    exp.AlterColumn, this=column, default=self._parse_conjunction()
3507                )
3508            else:
3509                self._match_text_seq("SET", "DATA")
3510                actions = self.expression(
3511                    exp.AlterColumn,
3512                    this=column,
3513                    dtype=self._match_text_seq("TYPE") and self._parse_types(),
3514                    collate=self._match(TokenType.COLLATE) and self._parse_term(),
3515                    using=self._match(TokenType.USING) and self._parse_conjunction(),
3516                )
3517
3518        actions = ensure_list(actions)
3519        return self.expression(exp.AlterTable, this=this, exists=exists, actions=actions)
3520
3521    def _parse_show(self) -> t.Optional[exp.Expression]:
3522        parser = self._find_parser(self.SHOW_PARSERS, self._show_trie)  # type: ignore
3523        if parser:
3524            return parser(self)
3525        self._advance()
3526        return self.expression(exp.Show, this=self._prev.text.upper())
3527
3528    def _default_parse_set_item(self) -> exp.Expression:
3529        return self.expression(
3530            exp.SetItem,
3531            this=self._parse_statement(),
3532        )
3533
3534    def _parse_set_item(self) -> t.Optional[exp.Expression]:
3535        parser = self._find_parser(self.SET_PARSERS, self._set_trie)  # type: ignore
3536        return parser(self) if parser else self._default_parse_set_item()
3537
3538    def _parse_merge(self) -> exp.Expression:
3539        self._match(TokenType.INTO)
3540        target = self._parse_table()
3541
3542        self._match(TokenType.USING)
3543        using = self._parse_table()
3544
3545        self._match(TokenType.ON)
3546        on = self._parse_conjunction()
3547
3548        whens = []
3549        while self._match(TokenType.WHEN):
3550            this = self._parse_conjunction()
3551            self._match(TokenType.THEN)
3552
3553            if self._match(TokenType.INSERT):
3554                _this = self._parse_star()
3555                if _this:
3556                    then = self.expression(exp.Insert, this=_this)
3557                else:
3558                    then = self.expression(
3559                        exp.Insert,
3560                        this=self._parse_value(),
3561                        expression=self._match(TokenType.VALUES) and self._parse_value(),
3562                    )
3563            elif self._match(TokenType.UPDATE):
3564                expressions = self._parse_star()
3565                if expressions:
3566                    then = self.expression(exp.Update, expressions=expressions)
3567                else:
3568                    then = self.expression(
3569                        exp.Update,
3570                        expressions=self._match(TokenType.SET)
3571                        and self._parse_csv(self._parse_equality),
3572                    )
3573            elif self._match(TokenType.DELETE):
3574                then = self.expression(exp.Var, this=self._prev.text)
3575
3576            whens.append(self.expression(exp.When, this=this, then=then))
3577
3578        return self.expression(
3579            exp.Merge,
3580            this=target,
3581            using=using,
3582            on=on,
3583            expressions=whens,
3584        )
3585
3586    def _parse_set(self) -> exp.Expression:
3587        return self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item))
3588
3589    def _parse_as_command(self, start: Token) -> exp.Command:
3590        while self._curr:
3591            self._advance()
3592        return exp.Command(this=self._find_sql(start, self._prev))
3593
3594    def _find_parser(
3595        self, parsers: t.Dict[str, t.Callable], trie: t.Dict
3596    ) -> t.Optional[t.Callable]:
3597        index = self._index
3598        this = []
3599        while True:
3600            # The current token might be multiple words
3601            curr = self._curr.text.upper()
3602            key = curr.split(" ")
3603            this.append(curr)
3604            self._advance()
3605            result, trie = in_trie(trie, key)
3606            if result == 0:
3607                break
3608            if result == 2:
3609                subparser = parsers[" ".join(this)]
3610                return subparser
3611        self._retreat(index)
3612        return None
3613
3614    def _match(self, token_type, advance=True):
3615        if not self._curr:
3616            return None
3617
3618        if self._curr.token_type == token_type:
3619            if advance:
3620                self._advance()
3621            return True
3622
3623        return None
3624
3625    def _match_set(self, types):
3626        if not self._curr:
3627            return None
3628
3629        if self._curr.token_type in types:
3630            self._advance()
3631            return True
3632
3633        return None
3634
3635    def _match_pair(self, token_type_a, token_type_b, advance=True):
3636        if not self._curr or not self._next:
3637            return None
3638
3639        if self._curr.token_type == token_type_a and self._next.token_type == token_type_b:
3640            if advance:
3641                self._advance(2)
3642            return True
3643
3644        return None
3645
3646    def _match_l_paren(self, expression=None):
3647        if not self._match(TokenType.L_PAREN):
3648            self.raise_error("Expecting (")
3649        if expression and self._prev_comments:
3650            expression.comments = self._prev_comments
3651
3652    def _match_r_paren(self, expression=None):
3653        if not self._match(TokenType.R_PAREN):
3654            self.raise_error("Expecting )")
3655        if expression and self._prev_comments:
3656            expression.comments = self._prev_comments
3657
3658    def _match_texts(self, texts):
3659        if self._curr and self._curr.text.upper() in texts:
3660            self._advance()
3661            return True
3662        return False
3663
3664    def _match_text_seq(self, *texts, advance=True):
3665        index = self._index
3666        for text in texts:
3667            if self._curr and self._curr.text.upper() == text:
3668                self._advance()
3669            else:
3670                self._retreat(index)
3671                return False
3672
3673        if not advance:
3674            self._retreat(index)
3675
3676        return True
3677
3678    def _replace_columns_with_dots(self, this):
3679        if isinstance(this, exp.Dot):
3680            exp.replace_children(this, self._replace_columns_with_dots)
3681        elif isinstance(this, exp.Column):
3682            exp.replace_children(this, self._replace_columns_with_dots)
3683            table = this.args.get("table")
3684            this = (
3685                self.expression(exp.Dot, this=table, expression=this.this)
3686                if table
3687                else self.expression(exp.Var, this=this.name)
3688            )
3689        elif isinstance(this, exp.Identifier):
3690            this = self.expression(exp.Var, this=this.name)
3691        return this
3692
3693    def _replace_lambda(self, node, lambda_variables):
3694        if isinstance(node, exp.Column):
3695            if node.name in lambda_variables:
3696                return node.this
3697        return node

Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer and produces a parsed syntax tree.

Arguments:
  • error_level: the desired error level. Default: ErrorLevel.RAISE
  • error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
  • index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
  • alias_post_tablesample: If the table alias comes after tablesample. Default: False
  • max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
  • null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
Parser( error_level: Optional[sqlglot.errors.ErrorLevel] = None, error_message_context: int = 100, index_offset: int = 0, unnest_column_only: bool = False, alias_post_tablesample: bool = False, max_errors: int = 3, null_ordering: Optional[str] = None)
667    def __init__(
668        self,
669        error_level: t.Optional[ErrorLevel] = None,
670        error_message_context: int = 100,
671        index_offset: int = 0,
672        unnest_column_only: bool = False,
673        alias_post_tablesample: bool = False,
674        max_errors: int = 3,
675        null_ordering: t.Optional[str] = None,
676    ):
677        self.error_level = error_level or ErrorLevel.IMMEDIATE
678        self.error_message_context = error_message_context
679        self.index_offset = index_offset
680        self.unnest_column_only = unnest_column_only
681        self.alias_post_tablesample = alias_post_tablesample
682        self.max_errors = max_errors
683        self.null_ordering = null_ordering
684        self.reset()
def reset(self):
686    def reset(self):
687        self.sql = ""
688        self.errors = []
689        self._tokens = []
690        self._index = 0
691        self._curr = None
692        self._next = None
693        self._prev = None
694        self._prev_comments = None
def parse( self, raw_tokens: List[sqlglot.tokens.Token], sql: Optional[str] = None) -> List[Optional[sqlglot.expressions.Expression]]:
696    def parse(
697        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
698    ) -> t.List[t.Optional[exp.Expression]]:
699        """
700        Parses a list of tokens and returns a list of syntax trees, one tree
701        per parsed SQL statement.
702
703        Args:
704            raw_tokens: the list of tokens.
705            sql: the original SQL string, used to produce helpful debug messages.
706
707        Returns:
708            The list of syntax trees.
709        """
710        return self._parse(
711            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
712        )

Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.

Arguments:
  • raw_tokens: the list of tokens.
  • sql: the original SQL string, used to produce helpful debug messages.
Returns:

The list of syntax trees.

def parse_into( self, expression_types: Union[str, Type[sqlglot.expressions.Expression], Collection[Union[str, Type[sqlglot.expressions.Expression]]]], raw_tokens: List[sqlglot.tokens.Token], sql: Optional[str] = None) -> List[Optional[sqlglot.expressions.Expression]]:
714    def parse_into(
715        self,
716        expression_types: exp.IntoType,
717        raw_tokens: t.List[Token],
718        sql: t.Optional[str] = None,
719    ) -> t.List[t.Optional[exp.Expression]]:
720        """
721        Parses a list of tokens into a given Expression type. If a collection of Expression
722        types is given instead, this method will try to parse the token list into each one
723        of them, stopping at the first for which the parsing succeeds.
724
725        Args:
726            expression_types: the expression type(s) to try and parse the token list into.
727            raw_tokens: the list of tokens.
728            sql: the original SQL string, used to produce helpful debug messages.
729
730        Returns:
731            The target Expression.
732        """
733        errors = []
734        for expression_type in ensure_collection(expression_types):
735            parser = self.EXPRESSION_PARSERS.get(expression_type)
736            if not parser:
737                raise TypeError(f"No parser registered for {expression_type}")
738            try:
739                return self._parse(parser, raw_tokens, sql)
740            except ParseError as e:
741                e.errors[0]["into_expression"] = expression_type
742                errors.append(e)
743        raise ParseError(
744            f"Failed to parse into {expression_types}",
745            errors=merge_errors(errors),
746        ) from errors[-1]

Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.

Arguments:
  • expression_types: the expression type(s) to try and parse the token list into.
  • raw_tokens: the list of tokens.
  • sql: the original SQL string, used to produce helpful debug messages.
Returns:

The target Expression.

def check_errors(self) -> None:
782    def check_errors(self) -> None:
783        """
784        Logs or raises any found errors, depending on the chosen error level setting.
785        """
786        if self.error_level == ErrorLevel.WARN:
787            for error in self.errors:
788                logger.error(str(error))
789        elif self.error_level == ErrorLevel.RAISE and self.errors:
790            raise ParseError(
791                concat_messages(self.errors, self.max_errors),
792                errors=merge_errors(self.errors),
793            )

Logs or raises any found errors, depending on the chosen error level setting.

def raise_error(self, message: str, token: Optional[sqlglot.tokens.Token] = None) -> None:
795    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
796        """
797        Appends an error in the list of recorded errors or raises it, depending on the chosen
798        error level setting.
799        """
800        token = token or self._curr or self._prev or Token.string("")
801        start = self._find_token(token)
802        end = start + len(token.text)
803        start_context = self.sql[max(start - self.error_message_context, 0) : start]
804        highlight = self.sql[start:end]
805        end_context = self.sql[end : end + self.error_message_context]
806
807        error = ParseError.new(
808            f"{message}. Line {token.line}, Col: {token.col}.\n"
809            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
810            description=message,
811            line=token.line,
812            col=token.col,
813            start_context=start_context,
814            highlight=highlight,
815            end_context=end_context,
816        )
817
818        if self.error_level == ErrorLevel.IMMEDIATE:
819            raise error
820
821        self.errors.append(error)

Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.

def expression( self, exp_class: Type[sqlglot.expressions.Expression], comments: Optional[List[str]] = None, **kwargs) -> sqlglot.expressions.Expression:
823    def expression(
824        self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs
825    ) -> exp.Expression:
826        """
827        Creates a new, validated Expression.
828
829        Args:
830            exp_class: the expression class to instantiate.
831            comments: an optional list of comments to attach to the expression.
832            kwargs: the arguments to set for the expression along with their respective values.
833
834        Returns:
835            The target expression.
836        """
837        instance = exp_class(**kwargs)
838        if self._prev_comments:
839            instance.comments = self._prev_comments
840            self._prev_comments = None
841        if comments:
842            instance.comments = comments
843        self.validate_expression(instance)
844        return instance

Creates a new, validated Expression.

Arguments:
  • exp_class: the expression class to instantiate.
  • comments: an optional list of comments to attach to the expression.
  • kwargs: the arguments to set for the expression along with their respective values.
Returns:

The target expression.

def validate_expression( self, expression: sqlglot.expressions.Expression, args: Optional[List] = None) -> None:
846    def validate_expression(
847        self, expression: exp.Expression, args: t.Optional[t.List] = None
848    ) -> None:
849        """
850        Validates an already instantiated expression, making sure that all its mandatory arguments
851        are set.
852
853        Args:
854            expression: the expression to validate.
855            args: an optional list of items that was used to instantiate the expression, if it's a Func.
856        """
857        if self.error_level == ErrorLevel.IGNORE:
858            return
859
860        for error_message in expression.error_messages(args):
861            self.raise_error(error_message)

Validates an already instantiated expression, making sure that all its mandatory arguments are set.

Arguments:
  • expression: the expression to validate.
  • args: an optional list of items that was used to instantiate the expression, if it's a Func.