Edit on GitHub

sqlglot.parser

   1from __future__ import annotations
   2
   3import logging
   4import typing as t
   5from collections import defaultdict
   6
   7from sqlglot import exp
   8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors
   9from sqlglot.helper import (
  10    apply_index_offset,
  11    count_params,
  12    ensure_collection,
  13    ensure_list,
  14    seq_get,
  15)
  16from sqlglot.tokens import Token, Tokenizer, TokenType
  17from sqlglot.trie import in_trie, new_trie
  18
  19logger = logging.getLogger("sqlglot")
  20
  21
  22def parse_var_map(args):
  23    keys = []
  24    values = []
  25    for i in range(0, len(args), 2):
  26        keys.append(args[i])
  27        values.append(args[i + 1])
  28    return exp.VarMap(
  29        keys=exp.Array(expressions=keys),
  30        values=exp.Array(expressions=values),
  31    )
  32
  33
  34def binary_range_parser(
  35    expr_type: t.Type[exp.Expression],
  36) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]:
  37    return lambda self, this: self._parse_escape(
  38        self.expression(expr_type, this=this, expression=self._parse_bitwise())
  39    )
  40
  41
  42class _Parser(type):
  43    def __new__(cls, clsname, bases, attrs):
  44        klass = super().__new__(cls, clsname, bases, attrs)
  45        klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS)
  46        klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS)
  47
  48        if not klass.INTEGER_DIVISION:
  49            klass.FACTOR = {**klass.FACTOR, TokenType.SLASH: exp.FloatDiv}
  50
  51        return klass
  52
  53
  54class Parser(metaclass=_Parser):
  55    """
  56    Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces
  57    a parsed syntax tree.
  58
  59    Args:
  60        error_level: the desired error level.
  61            Default: ErrorLevel.RAISE
  62        error_message_context: determines the amount of context to capture from a
  63            query string when displaying the error message (in number of characters).
  64            Default: 50.
  65        index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list.
  66            Default: 0
  67        alias_post_tablesample: If the table alias comes after tablesample.
  68            Default: False
  69        max_errors: Maximum number of error messages to include in a raised ParseError.
  70            This is only relevant if error_level is ErrorLevel.RAISE.
  71            Default: 3
  72        null_ordering: Indicates the default null ordering method to use if not explicitly set.
  73            Options are "nulls_are_small", "nulls_are_large", "nulls_are_last".
  74            Default: "nulls_are_small"
  75    """
  76
  77    FUNCTIONS: t.Dict[str, t.Callable] = {
  78        **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()},
  79        "DATE_TO_DATE_STR": lambda args: exp.Cast(
  80            this=seq_get(args, 0),
  81            to=exp.DataType(this=exp.DataType.Type.TEXT),
  82        ),
  83        "TIME_TO_TIME_STR": lambda args: exp.Cast(
  84            this=seq_get(args, 0),
  85            to=exp.DataType(this=exp.DataType.Type.TEXT),
  86        ),
  87        "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
  88            this=exp.Cast(
  89                this=seq_get(args, 0),
  90                to=exp.DataType(this=exp.DataType.Type.TEXT),
  91            ),
  92            start=exp.Literal.number(1),
  93            length=exp.Literal.number(10),
  94        ),
  95        "VAR_MAP": parse_var_map,
  96        "IFNULL": exp.Coalesce.from_arg_list,
  97    }
  98
  99    NO_PAREN_FUNCTIONS = {
 100        TokenType.CURRENT_DATE: exp.CurrentDate,
 101        TokenType.CURRENT_DATETIME: exp.CurrentDate,
 102        TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
 103    }
 104
 105    NESTED_TYPE_TOKENS = {
 106        TokenType.ARRAY,
 107        TokenType.MAP,
 108        TokenType.STRUCT,
 109        TokenType.NULLABLE,
 110    }
 111
 112    TYPE_TOKENS = {
 113        TokenType.BIT,
 114        TokenType.BOOLEAN,
 115        TokenType.TINYINT,
 116        TokenType.SMALLINT,
 117        TokenType.INT,
 118        TokenType.BIGINT,
 119        TokenType.FLOAT,
 120        TokenType.DOUBLE,
 121        TokenType.CHAR,
 122        TokenType.NCHAR,
 123        TokenType.VARCHAR,
 124        TokenType.NVARCHAR,
 125        TokenType.TEXT,
 126        TokenType.MEDIUMTEXT,
 127        TokenType.LONGTEXT,
 128        TokenType.MEDIUMBLOB,
 129        TokenType.LONGBLOB,
 130        TokenType.BINARY,
 131        TokenType.VARBINARY,
 132        TokenType.JSON,
 133        TokenType.JSONB,
 134        TokenType.INTERVAL,
 135        TokenType.TIME,
 136        TokenType.TIMESTAMP,
 137        TokenType.TIMESTAMPTZ,
 138        TokenType.TIMESTAMPLTZ,
 139        TokenType.DATETIME,
 140        TokenType.DATE,
 141        TokenType.DECIMAL,
 142        TokenType.UUID,
 143        TokenType.GEOGRAPHY,
 144        TokenType.GEOMETRY,
 145        TokenType.HLLSKETCH,
 146        TokenType.HSTORE,
 147        TokenType.PSEUDO_TYPE,
 148        TokenType.SUPER,
 149        TokenType.SERIAL,
 150        TokenType.SMALLSERIAL,
 151        TokenType.BIGSERIAL,
 152        TokenType.XML,
 153        TokenType.UNIQUEIDENTIFIER,
 154        TokenType.MONEY,
 155        TokenType.SMALLMONEY,
 156        TokenType.ROWVERSION,
 157        TokenType.IMAGE,
 158        TokenType.VARIANT,
 159        TokenType.OBJECT,
 160        TokenType.INET,
 161        *NESTED_TYPE_TOKENS,
 162    }
 163
 164    SUBQUERY_PREDICATES = {
 165        TokenType.ANY: exp.Any,
 166        TokenType.ALL: exp.All,
 167        TokenType.EXISTS: exp.Exists,
 168        TokenType.SOME: exp.Any,
 169    }
 170
 171    RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT}
 172
 173    DB_CREATABLES = {
 174        TokenType.DATABASE,
 175        TokenType.SCHEMA,
 176        TokenType.TABLE,
 177        TokenType.VIEW,
 178    }
 179
 180    CREATABLES = {
 181        TokenType.COLUMN,
 182        TokenType.FUNCTION,
 183        TokenType.INDEX,
 184        TokenType.PROCEDURE,
 185        *DB_CREATABLES,
 186    }
 187
 188    ID_VAR_TOKENS = {
 189        TokenType.VAR,
 190        TokenType.ANTI,
 191        TokenType.APPLY,
 192        TokenType.AUTO_INCREMENT,
 193        TokenType.BEGIN,
 194        TokenType.BOTH,
 195        TokenType.BUCKET,
 196        TokenType.CACHE,
 197        TokenType.CASCADE,
 198        TokenType.COLLATE,
 199        TokenType.COMMAND,
 200        TokenType.COMMENT,
 201        TokenType.COMMIT,
 202        TokenType.COMPOUND,
 203        TokenType.CONSTRAINT,
 204        TokenType.CURRENT_TIME,
 205        TokenType.DEFAULT,
 206        TokenType.DELETE,
 207        TokenType.DESCRIBE,
 208        TokenType.DIV,
 209        TokenType.END,
 210        TokenType.EXECUTE,
 211        TokenType.ESCAPE,
 212        TokenType.FALSE,
 213        TokenType.FIRST,
 214        TokenType.FILTER,
 215        TokenType.FOLLOWING,
 216        TokenType.FORMAT,
 217        TokenType.IF,
 218        TokenType.ISNULL,
 219        TokenType.INTERVAL,
 220        TokenType.LAZY,
 221        TokenType.LEADING,
 222        TokenType.LEFT,
 223        TokenType.LOCAL,
 224        TokenType.MATERIALIZED,
 225        TokenType.MERGE,
 226        TokenType.NATURAL,
 227        TokenType.NEXT,
 228        TokenType.OFFSET,
 229        TokenType.ONLY,
 230        TokenType.OPTIONS,
 231        TokenType.ORDINALITY,
 232        TokenType.PERCENT,
 233        TokenType.PIVOT,
 234        TokenType.PRECEDING,
 235        TokenType.RANGE,
 236        TokenType.REFERENCES,
 237        TokenType.RIGHT,
 238        TokenType.ROW,
 239        TokenType.ROWS,
 240        TokenType.SEED,
 241        TokenType.SEMI,
 242        TokenType.SET,
 243        TokenType.SHOW,
 244        TokenType.SORTKEY,
 245        TokenType.TEMPORARY,
 246        TokenType.TOP,
 247        TokenType.TRAILING,
 248        TokenType.TRUE,
 249        TokenType.UNBOUNDED,
 250        TokenType.UNIQUE,
 251        TokenType.UNLOGGED,
 252        TokenType.UNPIVOT,
 253        TokenType.VOLATILE,
 254        TokenType.WINDOW,
 255        *CREATABLES,
 256        *SUBQUERY_PREDICATES,
 257        *TYPE_TOKENS,
 258        *NO_PAREN_FUNCTIONS,
 259    }
 260
 261    TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {
 262        TokenType.APPLY,
 263        TokenType.LEFT,
 264        TokenType.NATURAL,
 265        TokenType.OFFSET,
 266        TokenType.RIGHT,
 267        TokenType.WINDOW,
 268    }
 269
 270    UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
 271
 272    TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH}
 273
 274    FUNC_TOKENS = {
 275        TokenType.COMMAND,
 276        TokenType.CURRENT_DATE,
 277        TokenType.CURRENT_DATETIME,
 278        TokenType.CURRENT_TIMESTAMP,
 279        TokenType.CURRENT_TIME,
 280        TokenType.FILTER,
 281        TokenType.FIRST,
 282        TokenType.FORMAT,
 283        TokenType.IDENTIFIER,
 284        TokenType.INDEX,
 285        TokenType.ISNULL,
 286        TokenType.ILIKE,
 287        TokenType.LIKE,
 288        TokenType.MERGE,
 289        TokenType.OFFSET,
 290        TokenType.PRIMARY_KEY,
 291        TokenType.REPLACE,
 292        TokenType.ROW,
 293        TokenType.UNNEST,
 294        TokenType.VAR,
 295        TokenType.LEFT,
 296        TokenType.RIGHT,
 297        TokenType.DATE,
 298        TokenType.DATETIME,
 299        TokenType.TABLE,
 300        TokenType.TIMESTAMP,
 301        TokenType.TIMESTAMPTZ,
 302        TokenType.WINDOW,
 303        *TYPE_TOKENS,
 304        *SUBQUERY_PREDICATES,
 305    }
 306
 307    CONJUNCTION = {
 308        TokenType.AND: exp.And,
 309        TokenType.OR: exp.Or,
 310    }
 311
 312    EQUALITY = {
 313        TokenType.EQ: exp.EQ,
 314        TokenType.NEQ: exp.NEQ,
 315        TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
 316    }
 317
 318    COMPARISON = {
 319        TokenType.GT: exp.GT,
 320        TokenType.GTE: exp.GTE,
 321        TokenType.LT: exp.LT,
 322        TokenType.LTE: exp.LTE,
 323    }
 324
 325    BITWISE = {
 326        TokenType.AMP: exp.BitwiseAnd,
 327        TokenType.CARET: exp.BitwiseXor,
 328        TokenType.PIPE: exp.BitwiseOr,
 329        TokenType.DPIPE: exp.DPipe,
 330    }
 331
 332    TERM = {
 333        TokenType.DASH: exp.Sub,
 334        TokenType.PLUS: exp.Add,
 335        TokenType.MOD: exp.Mod,
 336        TokenType.COLLATE: exp.Collate,
 337    }
 338
 339    FACTOR = {
 340        TokenType.DIV: exp.IntDiv,
 341        TokenType.LR_ARROW: exp.Distance,
 342        TokenType.SLASH: exp.Div,
 343        TokenType.STAR: exp.Mul,
 344    }
 345
 346    TIMESTAMPS = {
 347        TokenType.TIME,
 348        TokenType.TIMESTAMP,
 349        TokenType.TIMESTAMPTZ,
 350        TokenType.TIMESTAMPLTZ,
 351    }
 352
 353    SET_OPERATIONS = {
 354        TokenType.UNION,
 355        TokenType.INTERSECT,
 356        TokenType.EXCEPT,
 357    }
 358
 359    JOIN_SIDES = {
 360        TokenType.LEFT,
 361        TokenType.RIGHT,
 362        TokenType.FULL,
 363    }
 364
 365    JOIN_KINDS = {
 366        TokenType.INNER,
 367        TokenType.OUTER,
 368        TokenType.CROSS,
 369        TokenType.SEMI,
 370        TokenType.ANTI,
 371    }
 372
 373    LAMBDAS = {
 374        TokenType.ARROW: lambda self, expressions: self.expression(
 375            exp.Lambda,
 376            this=self._parse_conjunction().transform(
 377                self._replace_lambda, {node.name for node in expressions}
 378            ),
 379            expressions=expressions,
 380        ),
 381        TokenType.FARROW: lambda self, expressions: self.expression(
 382            exp.Kwarg,
 383            this=exp.Var(this=expressions[0].name),
 384            expression=self._parse_conjunction(),
 385        ),
 386    }
 387
 388    COLUMN_OPERATORS = {
 389        TokenType.DOT: None,
 390        TokenType.DCOLON: lambda self, this, to: self.expression(
 391            exp.Cast,
 392            this=this,
 393            to=to,
 394        ),
 395        TokenType.ARROW: lambda self, this, path: self.expression(
 396            exp.JSONExtract,
 397            this=this,
 398            expression=path,
 399        ),
 400        TokenType.DARROW: lambda self, this, path: self.expression(
 401            exp.JSONExtractScalar,
 402            this=this,
 403            expression=path,
 404        ),
 405        TokenType.HASH_ARROW: lambda self, this, path: self.expression(
 406            exp.JSONBExtract,
 407            this=this,
 408            expression=path,
 409        ),
 410        TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
 411            exp.JSONBExtractScalar,
 412            this=this,
 413            expression=path,
 414        ),
 415        TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
 416            exp.JSONBContains,
 417            this=this,
 418            expression=key,
 419        ),
 420    }
 421
 422    EXPRESSION_PARSERS = {
 423        exp.Column: lambda self: self._parse_column(),
 424        exp.DataType: lambda self: self._parse_types(),
 425        exp.From: lambda self: self._parse_from(),
 426        exp.Group: lambda self: self._parse_group(),
 427        exp.Identifier: lambda self: self._parse_id_var(),
 428        exp.Lateral: lambda self: self._parse_lateral(),
 429        exp.Join: lambda self: self._parse_join(),
 430        exp.Order: lambda self: self._parse_order(),
 431        exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
 432        exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
 433        exp.Lambda: lambda self: self._parse_lambda(),
 434        exp.Limit: lambda self: self._parse_limit(),
 435        exp.Offset: lambda self: self._parse_offset(),
 436        exp.TableAlias: lambda self: self._parse_table_alias(),
 437        exp.Table: lambda self: self._parse_table(),
 438        exp.Condition: lambda self: self._parse_conjunction(),
 439        exp.Expression: lambda self: self._parse_statement(),
 440        exp.Properties: lambda self: self._parse_properties(),
 441        exp.Where: lambda self: self._parse_where(),
 442        exp.Ordered: lambda self: self._parse_ordered(),
 443        exp.Having: lambda self: self._parse_having(),
 444        exp.With: lambda self: self._parse_with(),
 445        exp.Window: lambda self: self._parse_named_window(),
 446        exp.Qualify: lambda self: self._parse_qualify(),
 447        "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(),
 448    }
 449
 450    STATEMENT_PARSERS = {
 451        TokenType.ALTER: lambda self: self._parse_alter(),
 452        TokenType.BEGIN: lambda self: self._parse_transaction(),
 453        TokenType.CACHE: lambda self: self._parse_cache(),
 454        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
 455        TokenType.COMMENT: lambda self: self._parse_comment(),
 456        TokenType.CREATE: lambda self: self._parse_create(),
 457        TokenType.DELETE: lambda self: self._parse_delete(),
 458        TokenType.DESC: lambda self: self._parse_describe(),
 459        TokenType.DESCRIBE: lambda self: self._parse_describe(),
 460        TokenType.DROP: lambda self: self._parse_drop(),
 461        TokenType.END: lambda self: self._parse_commit_or_rollback(),
 462        TokenType.INSERT: lambda self: self._parse_insert(),
 463        TokenType.LOAD_DATA: lambda self: self._parse_load_data(),
 464        TokenType.MERGE: lambda self: self._parse_merge(),
 465        TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
 466        TokenType.UNCACHE: lambda self: self._parse_uncache(),
 467        TokenType.UPDATE: lambda self: self._parse_update(),
 468        TokenType.USE: lambda self: self.expression(
 469            exp.Use,
 470            kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"))
 471            and exp.Var(this=self._prev.text),
 472            this=self._parse_table(schema=False),
 473        ),
 474    }
 475
 476    UNARY_PARSERS = {
 477        TokenType.PLUS: lambda self: self._parse_unary(),  # Unary + is handled as a no-op
 478        TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()),
 479        TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()),
 480        TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()),
 481    }
 482
 483    PRIMARY_PARSERS = {
 484        TokenType.STRING: lambda self, token: self.expression(
 485            exp.Literal, this=token.text, is_string=True
 486        ),
 487        TokenType.NUMBER: lambda self, token: self.expression(
 488            exp.Literal, this=token.text, is_string=False
 489        ),
 490        TokenType.STAR: lambda self, _: self.expression(
 491            exp.Star,
 492            **{"except": self._parse_except(), "replace": self._parse_replace()},
 493        ),
 494        TokenType.NULL: lambda self, _: self.expression(exp.Null),
 495        TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True),
 496        TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False),
 497        TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text),
 498        TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text),
 499        TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text),
 500        TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
 501        TokenType.NATIONAL: lambda self, token: self._parse_national(token),
 502        TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
 503    }
 504
 505    PLACEHOLDER_PARSERS = {
 506        TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
 507        TokenType.PARAMETER: lambda self: self._parse_parameter(),
 508        TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text)
 509        if self._match_set((TokenType.NUMBER, TokenType.VAR))
 510        else None,
 511    }
 512
 513    RANGE_PARSERS = {
 514        TokenType.BETWEEN: lambda self, this: self._parse_between(this),
 515        TokenType.GLOB: binary_range_parser(exp.Glob),
 516        TokenType.OVERLAPS: binary_range_parser(exp.Overlaps),
 517        TokenType.IN: lambda self, this: self._parse_in(this),
 518        TokenType.IS: lambda self, this: self._parse_is(this),
 519        TokenType.LIKE: binary_range_parser(exp.Like),
 520        TokenType.ILIKE: binary_range_parser(exp.ILike),
 521        TokenType.IRLIKE: binary_range_parser(exp.RegexpILike),
 522        TokenType.RLIKE: binary_range_parser(exp.RegexpLike),
 523        TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo),
 524    }
 525
 526    PROPERTY_PARSERS = {
 527        "AFTER": lambda self: self._parse_afterjournal(
 528            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 529        ),
 530        "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
 531        "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
 532        "BEFORE": lambda self: self._parse_journal(
 533            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 534        ),
 535        "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
 536        "CHARACTER SET": lambda self: self._parse_character_set(),
 537        "CHECKSUM": lambda self: self._parse_checksum(),
 538        "CLUSTER BY": lambda self: self.expression(
 539            exp.Cluster, expressions=self._parse_csv(self._parse_ordered)
 540        ),
 541        "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty),
 542        "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
 543        "DATABLOCKSIZE": lambda self: self._parse_datablocksize(
 544            default=self._prev.text.upper() == "DEFAULT"
 545        ),
 546        "DEFINER": lambda self: self._parse_definer(),
 547        "DETERMINISTIC": lambda self: self.expression(
 548            exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE")
 549        ),
 550        "DISTKEY": lambda self: self._parse_distkey(),
 551        "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
 552        "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
 553        "EXTERNAL": lambda self: self.expression(exp.ExternalProperty),
 554        "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"),
 555        "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 556        "FREESPACE": lambda self: self._parse_freespace(),
 557        "GLOBAL": lambda self: self._parse_temporary(global_=True),
 558        "IMMUTABLE": lambda self: self.expression(
 559            exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE")
 560        ),
 561        "JOURNAL": lambda self: self._parse_journal(
 562            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 563        ),
 564        "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
 565        "LIKE": lambda self: self._parse_create_like(),
 566        "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True),
 567        "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
 568        "LOCK": lambda self: self._parse_locking(),
 569        "LOCKING": lambda self: self._parse_locking(),
 570        "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"),
 571        "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty),
 572        "MAX": lambda self: self._parse_datablocksize(),
 573        "MAXIMUM": lambda self: self._parse_datablocksize(),
 574        "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio(
 575            no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT"
 576        ),
 577        "MIN": lambda self: self._parse_datablocksize(),
 578        "MINIMUM": lambda self: self._parse_datablocksize(),
 579        "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True),
 580        "NO": lambda self: self._parse_noprimaryindex(),
 581        "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False),
 582        "ON": lambda self: self._parse_oncommit(),
 583        "PARTITION BY": lambda self: self._parse_partitioned_by(),
 584        "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
 585        "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
 586        "RETURNS": lambda self: self._parse_returns(),
 587        "ROW": lambda self: self._parse_row(),
 588        "SET": lambda self: self.expression(exp.SetProperty, multi=False),
 589        "SORTKEY": lambda self: self._parse_sortkey(),
 590        "STABLE": lambda self: self.expression(
 591            exp.VolatilityProperty, this=exp.Literal.string("STABLE")
 592        ),
 593        "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 594        "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty),
 595        "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property),
 596        "TEMPORARY": lambda self: self._parse_temporary(global_=False),
 597        "TRANSIENT": lambda self: self.expression(exp.TransientProperty),
 598        "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty),
 599        "VOLATILE": lambda self: self.expression(
 600            exp.VolatilityProperty, this=exp.Literal.string("VOLATILE")
 601        ),
 602        "WITH": lambda self: self._parse_with_property(),
 603    }
 604
 605    CONSTRAINT_PARSERS = {
 606        "AUTOINCREMENT": lambda self: self._parse_auto_increment(),
 607        "AUTO_INCREMENT": lambda self: self._parse_auto_increment(),
 608        "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False),
 609        "CHARACTER SET": lambda self: self.expression(
 610            exp.CharacterSetColumnConstraint, this=self._parse_var_or_string()
 611        ),
 612        "CHECK": lambda self: self.expression(
 613            exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction)
 614        ),
 615        "COLLATE": lambda self: self.expression(
 616            exp.CollateColumnConstraint, this=self._parse_var()
 617        ),
 618        "COMMENT": lambda self: self.expression(
 619            exp.CommentColumnConstraint, this=self._parse_string()
 620        ),
 621        "COMPRESS": lambda self: self._parse_compress(),
 622        "DEFAULT": lambda self: self.expression(
 623            exp.DefaultColumnConstraint, this=self._parse_bitwise()
 624        ),
 625        "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()),
 626        "FOREIGN KEY": lambda self: self._parse_foreign_key(),
 627        "FORMAT": lambda self: self.expression(
 628            exp.DateFormatColumnConstraint, this=self._parse_var_or_string()
 629        ),
 630        "GENERATED": lambda self: self._parse_generated_as_identity(),
 631        "IDENTITY": lambda self: self._parse_auto_increment(),
 632        "INLINE": lambda self: self._parse_inline(),
 633        "LIKE": lambda self: self._parse_create_like(),
 634        "NOT": lambda self: self._parse_not_constraint(),
 635        "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True),
 636        "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()),
 637        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 638        "TITLE": lambda self: self.expression(
 639            exp.TitleColumnConstraint, this=self._parse_var_or_string()
 640        ),
 641        "UNIQUE": lambda self: self._parse_unique(),
 642        "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint),
 643    }
 644
 645    ALTER_PARSERS = {
 646        "ADD": lambda self: self._parse_alter_table_add(),
 647        "ALTER": lambda self: self._parse_alter_table_alter(),
 648        "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()),
 649        "DROP": lambda self: self._parse_alter_table_drop(),
 650        "RENAME": lambda self: self._parse_alter_table_rename(),
 651    }
 652
 653    SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"}
 654
 655    NO_PAREN_FUNCTION_PARSERS = {
 656        TokenType.CASE: lambda self: self._parse_case(),
 657        TokenType.IF: lambda self: self._parse_if(),
 658        TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()),
 659    }
 660
 661    FUNCTION_PARSERS: t.Dict[str, t.Callable] = {
 662        "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
 663        "TRY_CONVERT": lambda self: self._parse_convert(False),
 664        "EXTRACT": lambda self: self._parse_extract(),
 665        "POSITION": lambda self: self._parse_position(),
 666        "SUBSTRING": lambda self: self._parse_substring(),
 667        "TRIM": lambda self: self._parse_trim(),
 668        "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
 669        "TRY_CAST": lambda self: self._parse_cast(False),
 670        "STRING_AGG": lambda self: self._parse_string_agg(),
 671    }
 672
 673    QUERY_MODIFIER_PARSERS = {
 674        "match": lambda self: self._parse_match_recognize(),
 675        "where": lambda self: self._parse_where(),
 676        "group": lambda self: self._parse_group(),
 677        "having": lambda self: self._parse_having(),
 678        "qualify": lambda self: self._parse_qualify(),
 679        "windows": lambda self: self._parse_window_clause(),
 680        "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute),
 681        "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
 682        "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
 683        "order": lambda self: self._parse_order(),
 684        "limit": lambda self: self._parse_limit(),
 685        "offset": lambda self: self._parse_offset(),
 686        "lock": lambda self: self._parse_lock(),
 687        "sample": lambda self: self._parse_table_sample(as_modifier=True),
 688    }
 689
 690    SHOW_PARSERS: t.Dict[str, t.Callable] = {}
 691    SET_PARSERS: t.Dict[str, t.Callable] = {}
 692
 693    MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
 694
 695    TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
 696
 697    INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
 698
 699    WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
 700
 701    ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY}
 702
 703    STRICT_CAST = True
 704
 705    INTEGER_DIVISION = True
 706
 707    CONVERT_TYPE_FIRST = False
 708
 709    __slots__ = (
 710        "error_level",
 711        "error_message_context",
 712        "sql",
 713        "errors",
 714        "index_offset",
 715        "unnest_column_only",
 716        "alias_post_tablesample",
 717        "max_errors",
 718        "null_ordering",
 719        "_tokens",
 720        "_index",
 721        "_curr",
 722        "_next",
 723        "_prev",
 724        "_prev_comments",
 725        "_show_trie",
 726        "_set_trie",
 727    )
 728
 729    def __init__(
 730        self,
 731        error_level: t.Optional[ErrorLevel] = None,
 732        error_message_context: int = 100,
 733        index_offset: int = 0,
 734        unnest_column_only: bool = False,
 735        alias_post_tablesample: bool = False,
 736        max_errors: int = 3,
 737        null_ordering: t.Optional[str] = None,
 738    ):
 739        self.error_level = error_level or ErrorLevel.IMMEDIATE
 740        self.error_message_context = error_message_context
 741        self.index_offset = index_offset
 742        self.unnest_column_only = unnest_column_only
 743        self.alias_post_tablesample = alias_post_tablesample
 744        self.max_errors = max_errors
 745        self.null_ordering = null_ordering
 746        self.reset()
 747
 748    def reset(self):
 749        self.sql = ""
 750        self.errors = []
 751        self._tokens = []
 752        self._index = 0
 753        self._curr = None
 754        self._next = None
 755        self._prev = None
 756        self._prev_comments = None
 757
 758    def parse(
 759        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
 760    ) -> t.List[t.Optional[exp.Expression]]:
 761        """
 762        Parses a list of tokens and returns a list of syntax trees, one tree
 763        per parsed SQL statement.
 764
 765        Args:
 766            raw_tokens: the list of tokens.
 767            sql: the original SQL string, used to produce helpful debug messages.
 768
 769        Returns:
 770            The list of syntax trees.
 771        """
 772        return self._parse(
 773            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
 774        )
 775
 776    def parse_into(
 777        self,
 778        expression_types: exp.IntoType,
 779        raw_tokens: t.List[Token],
 780        sql: t.Optional[str] = None,
 781    ) -> t.List[t.Optional[exp.Expression]]:
 782        """
 783        Parses a list of tokens into a given Expression type. If a collection of Expression
 784        types is given instead, this method will try to parse the token list into each one
 785        of them, stopping at the first for which the parsing succeeds.
 786
 787        Args:
 788            expression_types: the expression type(s) to try and parse the token list into.
 789            raw_tokens: the list of tokens.
 790            sql: the original SQL string, used to produce helpful debug messages.
 791
 792        Returns:
 793            The target Expression.
 794        """
 795        errors = []
 796        for expression_type in ensure_collection(expression_types):
 797            parser = self.EXPRESSION_PARSERS.get(expression_type)
 798            if not parser:
 799                raise TypeError(f"No parser registered for {expression_type}")
 800            try:
 801                return self._parse(parser, raw_tokens, sql)
 802            except ParseError as e:
 803                e.errors[0]["into_expression"] = expression_type
 804                errors.append(e)
 805        raise ParseError(
 806            f"Failed to parse into {expression_types}",
 807            errors=merge_errors(errors),
 808        ) from errors[-1]
 809
 810    def _parse(
 811        self,
 812        parse_method: t.Callable[[Parser], t.Optional[exp.Expression]],
 813        raw_tokens: t.List[Token],
 814        sql: t.Optional[str] = None,
 815    ) -> t.List[t.Optional[exp.Expression]]:
 816        self.reset()
 817        self.sql = sql or ""
 818        total = len(raw_tokens)
 819        chunks: t.List[t.List[Token]] = [[]]
 820
 821        for i, token in enumerate(raw_tokens):
 822            if token.token_type == TokenType.SEMICOLON:
 823                if i < total - 1:
 824                    chunks.append([])
 825            else:
 826                chunks[-1].append(token)
 827
 828        expressions = []
 829
 830        for tokens in chunks:
 831            self._index = -1
 832            self._tokens = tokens
 833            self._advance()
 834
 835            expressions.append(parse_method(self))
 836
 837            if self._index < len(self._tokens):
 838                self.raise_error("Invalid expression / Unexpected token")
 839
 840            self.check_errors()
 841
 842        return expressions
 843
 844    def check_errors(self) -> None:
 845        """
 846        Logs or raises any found errors, depending on the chosen error level setting.
 847        """
 848        if self.error_level == ErrorLevel.WARN:
 849            for error in self.errors:
 850                logger.error(str(error))
 851        elif self.error_level == ErrorLevel.RAISE and self.errors:
 852            raise ParseError(
 853                concat_messages(self.errors, self.max_errors),
 854                errors=merge_errors(self.errors),
 855            )
 856
 857    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
 858        """
 859        Appends an error in the list of recorded errors or raises it, depending on the chosen
 860        error level setting.
 861        """
 862        token = token or self._curr or self._prev or Token.string("")
 863        start = self._find_token(token)
 864        end = start + len(token.text)
 865        start_context = self.sql[max(start - self.error_message_context, 0) : start]
 866        highlight = self.sql[start:end]
 867        end_context = self.sql[end : end + self.error_message_context]
 868
 869        error = ParseError.new(
 870            f"{message}. Line {token.line}, Col: {token.col}.\n"
 871            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
 872            description=message,
 873            line=token.line,
 874            col=token.col,
 875            start_context=start_context,
 876            highlight=highlight,
 877            end_context=end_context,
 878        )
 879
 880        if self.error_level == ErrorLevel.IMMEDIATE:
 881            raise error
 882
 883        self.errors.append(error)
 884
 885    def expression(
 886        self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs
 887    ) -> exp.Expression:
 888        """
 889        Creates a new, validated Expression.
 890
 891        Args:
 892            exp_class: the expression class to instantiate.
 893            comments: an optional list of comments to attach to the expression.
 894            kwargs: the arguments to set for the expression along with their respective values.
 895
 896        Returns:
 897            The target expression.
 898        """
 899        instance = exp_class(**kwargs)
 900        if self._prev_comments:
 901            instance.comments = self._prev_comments
 902            self._prev_comments = None
 903        if comments:
 904            instance.comments = comments
 905        self.validate_expression(instance)
 906        return instance
 907
 908    def validate_expression(
 909        self, expression: exp.Expression, args: t.Optional[t.List] = None
 910    ) -> None:
 911        """
 912        Validates an already instantiated expression, making sure that all its mandatory arguments
 913        are set.
 914
 915        Args:
 916            expression: the expression to validate.
 917            args: an optional list of items that was used to instantiate the expression, if it's a Func.
 918        """
 919        if self.error_level == ErrorLevel.IGNORE:
 920            return
 921
 922        for error_message in expression.error_messages(args):
 923            self.raise_error(error_message)
 924
 925    def _find_sql(self, start: Token, end: Token) -> str:
 926        return self.sql[self._find_token(start) : self._find_token(end) + len(end.text)]
 927
 928    def _find_token(self, token: Token) -> int:
 929        line = 1
 930        col = 1
 931        index = 0
 932
 933        while line < token.line or col < token.col:
 934            if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK:
 935                line += 1
 936                col = 1
 937            else:
 938                col += 1
 939            index += 1
 940
 941        return index
 942
 943    def _advance(self, times: int = 1) -> None:
 944        self._index += times
 945        self._curr = seq_get(self._tokens, self._index)
 946        self._next = seq_get(self._tokens, self._index + 1)
 947        if self._index > 0:
 948            self._prev = self._tokens[self._index - 1]
 949            self._prev_comments = self._prev.comments
 950        else:
 951            self._prev = None
 952            self._prev_comments = None
 953
 954    def _retreat(self, index: int) -> None:
 955        if index != self._index:
 956            self._advance(index - self._index)
 957
 958    def _parse_command(self) -> exp.Expression:
 959        return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string())
 960
 961    def _parse_comment(self, allow_exists: bool = True) -> exp.Expression:
 962        start = self._prev
 963        exists = self._parse_exists() if allow_exists else None
 964
 965        self._match(TokenType.ON)
 966
 967        kind = self._match_set(self.CREATABLES) and self._prev
 968
 969        if not kind:
 970            return self._parse_as_command(start)
 971
 972        if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
 973            this = self._parse_user_defined_function(kind=kind.token_type)
 974        elif kind.token_type == TokenType.TABLE:
 975            this = self._parse_table()
 976        elif kind.token_type == TokenType.COLUMN:
 977            this = self._parse_column()
 978        else:
 979            this = self._parse_id_var()
 980
 981        self._match(TokenType.IS)
 982
 983        return self.expression(
 984            exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists
 985        )
 986
 987    def _parse_statement(self) -> t.Optional[exp.Expression]:
 988        if self._curr is None:
 989            return None
 990
 991        if self._match_set(self.STATEMENT_PARSERS):
 992            return self.STATEMENT_PARSERS[self._prev.token_type](self)
 993
 994        if self._match_set(Tokenizer.COMMANDS):
 995            return self._parse_command()
 996
 997        expression = self._parse_expression()
 998        expression = self._parse_set_operations(expression) if expression else self._parse_select()
 999
1000        self._parse_query_modifiers(expression)
1001        return expression
1002
1003    def _parse_drop(self, default_kind: t.Optional[str] = None) -> t.Optional[exp.Expression]:
1004        start = self._prev
1005        temporary = self._match(TokenType.TEMPORARY)
1006        materialized = self._match(TokenType.MATERIALIZED)
1007        kind = self._match_set(self.CREATABLES) and self._prev.text
1008        if not kind:
1009            if default_kind:
1010                kind = default_kind
1011            else:
1012                return self._parse_as_command(start)
1013
1014        return self.expression(
1015            exp.Drop,
1016            exists=self._parse_exists(),
1017            this=self._parse_table(schema=True),
1018            kind=kind,
1019            temporary=temporary,
1020            materialized=materialized,
1021            cascade=self._match(TokenType.CASCADE),
1022        )
1023
1024    def _parse_exists(self, not_: bool = False) -> t.Optional[bool]:
1025        return (
1026            self._match(TokenType.IF)
1027            and (not not_ or self._match(TokenType.NOT))
1028            and self._match(TokenType.EXISTS)
1029        )
1030
1031    def _parse_create(self) -> t.Optional[exp.Expression]:
1032        start = self._prev
1033        replace = self._prev.text.upper() == "REPLACE" or self._match_pair(
1034            TokenType.OR, TokenType.REPLACE
1035        )
1036        unique = self._match(TokenType.UNIQUE)
1037        volatile = self._match(TokenType.VOLATILE)
1038
1039        if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
1040            self._match(TokenType.TABLE)
1041
1042        properties = None
1043        create_token = self._match_set(self.CREATABLES) and self._prev
1044
1045        if not create_token:
1046            properties = self._parse_properties()  # exp.Properties.Location.POST_CREATE
1047            create_token = self._match_set(self.CREATABLES) and self._prev
1048
1049            if not properties or not create_token:
1050                return self._parse_as_command(start)
1051
1052        exists = self._parse_exists(not_=True)
1053        this = None
1054        expression = None
1055        indexes = None
1056        no_schema_binding = None
1057        begin = None
1058
1059        if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1060            this = self._parse_user_defined_function(kind=create_token.token_type)
1061            temp_properties = self._parse_properties()
1062            if properties and temp_properties:
1063                properties.expressions.extend(temp_properties.expressions)
1064            elif temp_properties:
1065                properties = temp_properties
1066
1067            self._match(TokenType.ALIAS)
1068            begin = self._match(TokenType.BEGIN)
1069            return_ = self._match_text_seq("RETURN")
1070            expression = self._parse_statement()
1071
1072            if return_:
1073                expression = self.expression(exp.Return, this=expression)
1074        elif create_token.token_type == TokenType.INDEX:
1075            this = self._parse_index()
1076        elif create_token.token_type in self.DB_CREATABLES:
1077            table_parts = self._parse_table_parts(schema=True)
1078
1079            # exp.Properties.Location.POST_NAME
1080            if self._match(TokenType.COMMA):
1081                temp_properties = self._parse_properties(before=True)
1082                if properties and temp_properties:
1083                    properties.expressions.extend(temp_properties.expressions)
1084                elif temp_properties:
1085                    properties = temp_properties
1086
1087            this = self._parse_schema(this=table_parts)
1088
1089            # exp.Properties.Location.POST_SCHEMA and POST_WITH
1090            temp_properties = self._parse_properties()
1091            if properties and temp_properties:
1092                properties.expressions.extend(temp_properties.expressions)
1093            elif temp_properties:
1094                properties = temp_properties
1095
1096            self._match(TokenType.ALIAS)
1097
1098            # exp.Properties.Location.POST_ALIAS
1099            if not (
1100                self._match(TokenType.SELECT, advance=False)
1101                or self._match(TokenType.WITH, advance=False)
1102                or self._match(TokenType.L_PAREN, advance=False)
1103            ):
1104                temp_properties = self._parse_properties()
1105                if properties and temp_properties:
1106                    properties.expressions.extend(temp_properties.expressions)
1107                elif temp_properties:
1108                    properties = temp_properties
1109
1110            expression = self._parse_ddl_select()
1111
1112            if create_token.token_type == TokenType.TABLE:
1113                # exp.Properties.Location.POST_EXPRESSION
1114                temp_properties = self._parse_properties()
1115                if properties and temp_properties:
1116                    properties.expressions.extend(temp_properties.expressions)
1117                elif temp_properties:
1118                    properties = temp_properties
1119
1120                indexes = []
1121                while True:
1122                    index = self._parse_create_table_index()
1123
1124                    # exp.Properties.Location.POST_INDEX
1125                    if self._match(TokenType.PARTITION_BY, advance=False):
1126                        temp_properties = self._parse_properties()
1127                        if properties and temp_properties:
1128                            properties.expressions.extend(temp_properties.expressions)
1129                        elif temp_properties:
1130                            properties = temp_properties
1131
1132                    if not index:
1133                        break
1134                    else:
1135                        indexes.append(index)
1136            elif create_token.token_type == TokenType.VIEW:
1137                if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
1138                    no_schema_binding = True
1139
1140        return self.expression(
1141            exp.Create,
1142            this=this,
1143            kind=create_token.text,
1144            replace=replace,
1145            unique=unique,
1146            volatile=volatile,
1147            expression=expression,
1148            exists=exists,
1149            properties=properties,
1150            indexes=indexes,
1151            no_schema_binding=no_schema_binding,
1152            begin=begin,
1153        )
1154
1155    def _parse_property_before(self) -> t.Optional[exp.Expression]:
1156        self._match(TokenType.COMMA)
1157
1158        # parsers look to _prev for no/dual/default, so need to consume first
1159        self._match_text_seq("NO")
1160        self._match_text_seq("DUAL")
1161        self._match_text_seq("DEFAULT")
1162
1163        if self.PROPERTY_PARSERS.get(self._curr.text.upper()):
1164            return self.PROPERTY_PARSERS[self._curr.text.upper()](self)
1165
1166        return None
1167
1168    def _parse_property(self) -> t.Optional[exp.Expression]:
1169        if self._match_texts(self.PROPERTY_PARSERS):
1170            return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
1171
1172        if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET):
1173            return self._parse_character_set(default=True)
1174
1175        if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY):
1176            return self._parse_sortkey(compound=True)
1177
1178        if self._match_text_seq("SQL", "SECURITY"):
1179            return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER"))
1180
1181        assignment = self._match_pair(
1182            TokenType.VAR, TokenType.EQ, advance=False
1183        ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False)
1184
1185        if assignment:
1186            key = self._parse_var_or_string()
1187            self._match(TokenType.EQ)
1188            return self.expression(exp.Property, this=key, value=self._parse_column())
1189
1190        return None
1191
1192    def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression:
1193        self._match(TokenType.EQ)
1194        self._match(TokenType.ALIAS)
1195        return self.expression(
1196            exp_class,
1197            this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
1198        )
1199
1200    def _parse_properties(self, before=None) -> t.Optional[exp.Expression]:
1201        properties = []
1202
1203        while True:
1204            if before:
1205                identified_property = self._parse_property_before()
1206            else:
1207                identified_property = self._parse_property()
1208
1209            if not identified_property:
1210                break
1211            for p in ensure_collection(identified_property):
1212                properties.append(p)
1213
1214        if properties:
1215            return self.expression(exp.Properties, expressions=properties)
1216
1217        return None
1218
1219    def _parse_fallback(self, no=False) -> exp.Expression:
1220        self._match_text_seq("FALLBACK")
1221        return self.expression(
1222            exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
1223        )
1224
1225    def _parse_with_property(
1226        self,
1227    ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]:
1228        self._match(TokenType.WITH)
1229        if self._match(TokenType.L_PAREN, advance=False):
1230            return self._parse_wrapped_csv(self._parse_property)
1231
1232        if self._match_text_seq("JOURNAL"):
1233            return self._parse_withjournaltable()
1234
1235        if self._match_text_seq("DATA"):
1236            return self._parse_withdata(no=False)
1237        elif self._match_text_seq("NO", "DATA"):
1238            return self._parse_withdata(no=True)
1239
1240        if not self._next:
1241            return None
1242
1243        return self._parse_withisolatedloading()
1244
1245    # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
1246    def _parse_definer(self) -> t.Optional[exp.Expression]:
1247        self._match(TokenType.EQ)
1248
1249        user = self._parse_id_var()
1250        self._match(TokenType.PARAMETER)
1251        host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
1252
1253        if not user or not host:
1254            return None
1255
1256        return exp.DefinerProperty(this=f"{user}@{host}")
1257
1258    def _parse_withjournaltable(self) -> exp.Expression:
1259        self._match(TokenType.TABLE)
1260        self._match(TokenType.EQ)
1261        return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
1262
1263    def _parse_log(self, no=False) -> exp.Expression:
1264        self._match_text_seq("LOG")
1265        return self.expression(exp.LogProperty, no=no)
1266
1267    def _parse_journal(self, no=False, dual=False) -> exp.Expression:
1268        before = self._match_text_seq("BEFORE")
1269        self._match_text_seq("JOURNAL")
1270        return self.expression(exp.JournalProperty, no=no, dual=dual, before=before)
1271
1272    def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression:
1273        self._match_text_seq("NOT")
1274        self._match_text_seq("LOCAL")
1275        self._match_text_seq("AFTER", "JOURNAL")
1276        return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local)
1277
1278    def _parse_checksum(self) -> exp.Expression:
1279        self._match_text_seq("CHECKSUM")
1280        self._match(TokenType.EQ)
1281
1282        on = None
1283        if self._match(TokenType.ON):
1284            on = True
1285        elif self._match_text_seq("OFF"):
1286            on = False
1287        default = self._match(TokenType.DEFAULT)
1288
1289        return self.expression(
1290            exp.ChecksumProperty,
1291            on=on,
1292            default=default,
1293        )
1294
1295    def _parse_freespace(self) -> exp.Expression:
1296        self._match_text_seq("FREESPACE")
1297        self._match(TokenType.EQ)
1298        return self.expression(
1299            exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
1300        )
1301
1302    def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression:
1303        self._match_text_seq("MERGEBLOCKRATIO")
1304        if self._match(TokenType.EQ):
1305            return self.expression(
1306                exp.MergeBlockRatioProperty,
1307                this=self._parse_number(),
1308                percent=self._match(TokenType.PERCENT),
1309            )
1310        else:
1311            return self.expression(
1312                exp.MergeBlockRatioProperty,
1313                no=no,
1314                default=default,
1315            )
1316
1317    def _parse_datablocksize(self, default=None) -> exp.Expression:
1318        if default:
1319            self._match_text_seq("DATABLOCKSIZE")
1320            return self.expression(exp.DataBlocksizeProperty, default=True)
1321        elif self._match_texts(("MIN", "MINIMUM")):
1322            self._match_text_seq("DATABLOCKSIZE")
1323            return self.expression(exp.DataBlocksizeProperty, min=True)
1324        elif self._match_texts(("MAX", "MAXIMUM")):
1325            self._match_text_seq("DATABLOCKSIZE")
1326            return self.expression(exp.DataBlocksizeProperty, min=False)
1327
1328        self._match_text_seq("DATABLOCKSIZE")
1329        self._match(TokenType.EQ)
1330        size = self._parse_number()
1331        units = None
1332        if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
1333            units = self._prev.text
1334        return self.expression(exp.DataBlocksizeProperty, size=size, units=units)
1335
1336    def _parse_blockcompression(self) -> exp.Expression:
1337        self._match_text_seq("BLOCKCOMPRESSION")
1338        self._match(TokenType.EQ)
1339        always = self._match_text_seq("ALWAYS")
1340        manual = self._match_text_seq("MANUAL")
1341        never = self._match_text_seq("NEVER")
1342        default = self._match_text_seq("DEFAULT")
1343        autotemp = None
1344        if self._match_text_seq("AUTOTEMP"):
1345            autotemp = self._parse_schema()
1346
1347        return self.expression(
1348            exp.BlockCompressionProperty,
1349            always=always,
1350            manual=manual,
1351            never=never,
1352            default=default,
1353            autotemp=autotemp,
1354        )
1355
1356    def _parse_withisolatedloading(self) -> exp.Expression:
1357        no = self._match_text_seq("NO")
1358        concurrent = self._match_text_seq("CONCURRENT")
1359        self._match_text_seq("ISOLATED", "LOADING")
1360        for_all = self._match_text_seq("FOR", "ALL")
1361        for_insert = self._match_text_seq("FOR", "INSERT")
1362        for_none = self._match_text_seq("FOR", "NONE")
1363        return self.expression(
1364            exp.IsolatedLoadingProperty,
1365            no=no,
1366            concurrent=concurrent,
1367            for_all=for_all,
1368            for_insert=for_insert,
1369            for_none=for_none,
1370        )
1371
1372    def _parse_locking(self) -> exp.Expression:
1373        if self._match(TokenType.TABLE):
1374            kind = "TABLE"
1375        elif self._match(TokenType.VIEW):
1376            kind = "VIEW"
1377        elif self._match(TokenType.ROW):
1378            kind = "ROW"
1379        elif self._match_text_seq("DATABASE"):
1380            kind = "DATABASE"
1381        else:
1382            kind = None
1383
1384        if kind in ("DATABASE", "TABLE", "VIEW"):
1385            this = self._parse_table_parts()
1386        else:
1387            this = None
1388
1389        if self._match(TokenType.FOR):
1390            for_or_in = "FOR"
1391        elif self._match(TokenType.IN):
1392            for_or_in = "IN"
1393        else:
1394            for_or_in = None
1395
1396        if self._match_text_seq("ACCESS"):
1397            lock_type = "ACCESS"
1398        elif self._match_texts(("EXCL", "EXCLUSIVE")):
1399            lock_type = "EXCLUSIVE"
1400        elif self._match_text_seq("SHARE"):
1401            lock_type = "SHARE"
1402        elif self._match_text_seq("READ"):
1403            lock_type = "READ"
1404        elif self._match_text_seq("WRITE"):
1405            lock_type = "WRITE"
1406        elif self._match_text_seq("CHECKSUM"):
1407            lock_type = "CHECKSUM"
1408        else:
1409            lock_type = None
1410
1411        override = self._match_text_seq("OVERRIDE")
1412
1413        return self.expression(
1414            exp.LockingProperty,
1415            this=this,
1416            kind=kind,
1417            for_or_in=for_or_in,
1418            lock_type=lock_type,
1419            override=override,
1420        )
1421
1422    def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]:
1423        if self._match(TokenType.PARTITION_BY):
1424            return self._parse_csv(self._parse_conjunction)
1425        return []
1426
1427    def _parse_partitioned_by(self) -> exp.Expression:
1428        self._match(TokenType.EQ)
1429        return self.expression(
1430            exp.PartitionedByProperty,
1431            this=self._parse_schema() or self._parse_bracket(self._parse_field()),
1432        )
1433
1434    def _parse_withdata(self, no=False) -> exp.Expression:
1435        if self._match_text_seq("AND", "STATISTICS"):
1436            statistics = True
1437        elif self._match_text_seq("AND", "NO", "STATISTICS"):
1438            statistics = False
1439        else:
1440            statistics = None
1441
1442        return self.expression(exp.WithDataProperty, no=no, statistics=statistics)
1443
1444    def _parse_noprimaryindex(self) -> exp.Expression:
1445        self._match_text_seq("PRIMARY", "INDEX")
1446        return exp.NoPrimaryIndexProperty()
1447
1448    def _parse_oncommit(self) -> exp.Expression:
1449        self._match_text_seq("COMMIT", "PRESERVE", "ROWS")
1450        return exp.OnCommitProperty()
1451
1452    def _parse_distkey(self) -> exp.Expression:
1453        return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
1454
1455    def _parse_create_like(self) -> t.Optional[exp.Expression]:
1456        table = self._parse_table(schema=True)
1457        options = []
1458        while self._match_texts(("INCLUDING", "EXCLUDING")):
1459            this = self._prev.text.upper()
1460            id_var = self._parse_id_var()
1461
1462            if not id_var:
1463                return None
1464
1465            options.append(
1466                self.expression(
1467                    exp.Property,
1468                    this=this,
1469                    value=exp.Var(this=id_var.this.upper()),
1470                )
1471            )
1472        return self.expression(exp.LikeProperty, this=table, expressions=options)
1473
1474    def _parse_sortkey(self, compound: bool = False) -> exp.Expression:
1475        return self.expression(
1476            exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound
1477        )
1478
1479    def _parse_character_set(self, default: bool = False) -> exp.Expression:
1480        self._match(TokenType.EQ)
1481        return self.expression(
1482            exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default
1483        )
1484
1485    def _parse_returns(self) -> exp.Expression:
1486        value: t.Optional[exp.Expression]
1487        is_table = self._match(TokenType.TABLE)
1488
1489        if is_table:
1490            if self._match(TokenType.LT):
1491                value = self.expression(
1492                    exp.Schema,
1493                    this="TABLE",
1494                    expressions=self._parse_csv(self._parse_struct_kwargs),
1495                )
1496                if not self._match(TokenType.GT):
1497                    self.raise_error("Expecting >")
1498            else:
1499                value = self._parse_schema(exp.Var(this="TABLE"))
1500        else:
1501            value = self._parse_types()
1502
1503        return self.expression(exp.ReturnsProperty, this=value, is_table=is_table)
1504
1505    def _parse_temporary(self, global_=False) -> exp.Expression:
1506        self._match(TokenType.TEMPORARY)  # in case calling from "GLOBAL"
1507        return self.expression(exp.TemporaryProperty, global_=global_)
1508
1509    def _parse_describe(self) -> exp.Expression:
1510        kind = self._match_set(self.CREATABLES) and self._prev.text
1511        this = self._parse_table()
1512
1513        return self.expression(exp.Describe, this=this, kind=kind)
1514
1515    def _parse_insert(self) -> exp.Expression:
1516        overwrite = self._match(TokenType.OVERWRITE)
1517        local = self._match(TokenType.LOCAL)
1518        alternative = None
1519
1520        if self._match_text_seq("DIRECTORY"):
1521            this: t.Optional[exp.Expression] = self.expression(
1522                exp.Directory,
1523                this=self._parse_var_or_string(),
1524                local=local,
1525                row_format=self._parse_row_format(match_row=True),
1526            )
1527        else:
1528            if self._match(TokenType.OR):
1529                alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text
1530
1531            self._match(TokenType.INTO)
1532            self._match(TokenType.TABLE)
1533            this = self._parse_table(schema=True)
1534
1535        return self.expression(
1536            exp.Insert,
1537            this=this,
1538            exists=self._parse_exists(),
1539            partition=self._parse_partition(),
1540            expression=self._parse_ddl_select(),
1541            returning=self._parse_returning(),
1542            overwrite=overwrite,
1543            alternative=alternative,
1544        )
1545
1546    def _parse_returning(self) -> t.Optional[exp.Expression]:
1547        if not self._match(TokenType.RETURNING):
1548            return None
1549
1550        return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column))
1551
1552    def _parse_row(self) -> t.Optional[exp.Expression]:
1553        if not self._match(TokenType.FORMAT):
1554            return None
1555        return self._parse_row_format()
1556
1557    def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]:
1558        if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
1559            return None
1560
1561        if self._match_text_seq("SERDE"):
1562            return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string())
1563
1564        self._match_text_seq("DELIMITED")
1565
1566        kwargs = {}
1567
1568        if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
1569            kwargs["fields"] = self._parse_string()
1570            if self._match_text_seq("ESCAPED", "BY"):
1571                kwargs["escaped"] = self._parse_string()
1572        if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
1573            kwargs["collection_items"] = self._parse_string()
1574        if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
1575            kwargs["map_keys"] = self._parse_string()
1576        if self._match_text_seq("LINES", "TERMINATED", "BY"):
1577            kwargs["lines"] = self._parse_string()
1578        if self._match_text_seq("NULL", "DEFINED", "AS"):
1579            kwargs["null"] = self._parse_string()
1580
1581        return self.expression(exp.RowFormatDelimitedProperty, **kwargs)  # type: ignore
1582
1583    def _parse_load_data(self) -> exp.Expression:
1584        local = self._match(TokenType.LOCAL)
1585        self._match_text_seq("INPATH")
1586        inpath = self._parse_string()
1587        overwrite = self._match(TokenType.OVERWRITE)
1588        self._match_pair(TokenType.INTO, TokenType.TABLE)
1589
1590        return self.expression(
1591            exp.LoadData,
1592            this=self._parse_table(schema=True),
1593            local=local,
1594            overwrite=overwrite,
1595            inpath=inpath,
1596            partition=self._parse_partition(),
1597            input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
1598            serde=self._match_text_seq("SERDE") and self._parse_string(),
1599        )
1600
1601    def _parse_delete(self) -> exp.Expression:
1602        self._match(TokenType.FROM)
1603
1604        return self.expression(
1605            exp.Delete,
1606            this=self._parse_table(schema=True),
1607            using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()),
1608            where=self._parse_where(),
1609            returning=self._parse_returning(),
1610        )
1611
1612    def _parse_update(self) -> exp.Expression:
1613        return self.expression(
1614            exp.Update,
1615            **{  # type: ignore
1616                "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS),
1617                "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality),
1618                "from": self._parse_from(),
1619                "where": self._parse_where(),
1620                "returning": self._parse_returning(),
1621            },
1622        )
1623
1624    def _parse_uncache(self) -> exp.Expression:
1625        if not self._match(TokenType.TABLE):
1626            self.raise_error("Expecting TABLE after UNCACHE")
1627
1628        return self.expression(
1629            exp.Uncache,
1630            exists=self._parse_exists(),
1631            this=self._parse_table(schema=True),
1632        )
1633
1634    def _parse_cache(self) -> exp.Expression:
1635        lazy = self._match(TokenType.LAZY)
1636        self._match(TokenType.TABLE)
1637        table = self._parse_table(schema=True)
1638        options = []
1639
1640        if self._match(TokenType.OPTIONS):
1641            self._match_l_paren()
1642            k = self._parse_string()
1643            self._match(TokenType.EQ)
1644            v = self._parse_string()
1645            options = [k, v]
1646            self._match_r_paren()
1647
1648        self._match(TokenType.ALIAS)
1649        return self.expression(
1650            exp.Cache,
1651            this=table,
1652            lazy=lazy,
1653            options=options,
1654            expression=self._parse_select(nested=True),
1655        )
1656
1657    def _parse_partition(self) -> t.Optional[exp.Expression]:
1658        if not self._match(TokenType.PARTITION):
1659            return None
1660
1661        return self.expression(
1662            exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction)
1663        )
1664
1665    def _parse_value(self) -> exp.Expression:
1666        if self._match(TokenType.L_PAREN):
1667            expressions = self._parse_csv(self._parse_conjunction)
1668            self._match_r_paren()
1669            return self.expression(exp.Tuple, expressions=expressions)
1670
1671        # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows.
1672        # Source: https://prestodb.io/docs/current/sql/values.html
1673        return self.expression(exp.Tuple, expressions=[self._parse_conjunction()])
1674
1675    def _parse_select(
1676        self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True
1677    ) -> t.Optional[exp.Expression]:
1678        cte = self._parse_with()
1679        if cte:
1680            this = self._parse_statement()
1681
1682            if not this:
1683                self.raise_error("Failed to parse any statement following CTE")
1684                return cte
1685
1686            if "with" in this.arg_types:
1687                this.set("with", cte)
1688            else:
1689                self.raise_error(f"{this.key} does not support CTE")
1690                this = cte
1691        elif self._match(TokenType.SELECT):
1692            comments = self._prev_comments
1693
1694            hint = self._parse_hint()
1695            all_ = self._match(TokenType.ALL)
1696            distinct = self._match(TokenType.DISTINCT)
1697
1698            if distinct:
1699                distinct = self.expression(
1700                    exp.Distinct,
1701                    on=self._parse_value() if self._match(TokenType.ON) else None,
1702                )
1703
1704            if all_ and distinct:
1705                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
1706
1707            limit = self._parse_limit(top=True)
1708            expressions = self._parse_csv(self._parse_expression)
1709
1710            this = self.expression(
1711                exp.Select,
1712                hint=hint,
1713                distinct=distinct,
1714                expressions=expressions,
1715                limit=limit,
1716            )
1717            this.comments = comments
1718
1719            into = self._parse_into()
1720            if into:
1721                this.set("into", into)
1722
1723            from_ = self._parse_from()
1724            if from_:
1725                this.set("from", from_)
1726
1727            self._parse_query_modifiers(this)
1728        elif (table or nested) and self._match(TokenType.L_PAREN):
1729            this = self._parse_table() if table else self._parse_select(nested=True)
1730            self._parse_query_modifiers(this)
1731            this = self._parse_set_operations(this)
1732            self._match_r_paren()
1733
1734            # early return so that subquery unions aren't parsed again
1735            # SELECT * FROM (SELECT 1) UNION ALL SELECT 1
1736            # Union ALL should be a property of the top select node, not the subquery
1737            return self._parse_subquery(this, parse_alias=parse_subquery_alias)
1738        elif self._match(TokenType.VALUES):
1739            this = self.expression(
1740                exp.Values,
1741                expressions=self._parse_csv(self._parse_value),
1742                alias=self._parse_table_alias(),
1743            )
1744        else:
1745            this = None
1746
1747        return self._parse_set_operations(this)
1748
1749    def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]:
1750        if not skip_with_token and not self._match(TokenType.WITH):
1751            return None
1752
1753        recursive = self._match(TokenType.RECURSIVE)
1754
1755        expressions = []
1756        while True:
1757            expressions.append(self._parse_cte())
1758
1759            if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
1760                break
1761            else:
1762                self._match(TokenType.WITH)
1763
1764        return self.expression(exp.With, expressions=expressions, recursive=recursive)
1765
1766    def _parse_cte(self) -> exp.Expression:
1767        alias = self._parse_table_alias()
1768        if not alias or not alias.this:
1769            self.raise_error("Expected CTE to have alias")
1770
1771        self._match(TokenType.ALIAS)
1772
1773        return self.expression(
1774            exp.CTE,
1775            this=self._parse_wrapped(self._parse_statement),
1776            alias=alias,
1777        )
1778
1779    def _parse_table_alias(
1780        self, alias_tokens: t.Optional[t.Collection[TokenType]] = None
1781    ) -> t.Optional[exp.Expression]:
1782        any_token = self._match(TokenType.ALIAS)
1783        alias = self._parse_id_var(
1784            any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS
1785        )
1786        index = self._index
1787
1788        if self._match(TokenType.L_PAREN):
1789            columns = self._parse_csv(self._parse_function_parameter)
1790            self._match_r_paren() if columns else self._retreat(index)
1791        else:
1792            columns = None
1793
1794        if not alias and not columns:
1795            return None
1796
1797        return self.expression(exp.TableAlias, this=alias, columns=columns)
1798
1799    def _parse_subquery(
1800        self, this: t.Optional[exp.Expression], parse_alias: bool = True
1801    ) -> exp.Expression:
1802        return self.expression(
1803            exp.Subquery,
1804            this=this,
1805            pivots=self._parse_pivots(),
1806            alias=self._parse_table_alias() if parse_alias else None,
1807        )
1808
1809    def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None:
1810        if not isinstance(this, self.MODIFIABLES):
1811            return
1812
1813        table = isinstance(this, exp.Table)
1814
1815        while True:
1816            lateral = self._parse_lateral()
1817            join = self._parse_join()
1818            comma = None if table else self._match(TokenType.COMMA)
1819            if lateral:
1820                this.append("laterals", lateral)
1821            if join:
1822                this.append("joins", join)
1823            if comma:
1824                this.args["from"].append("expressions", self._parse_table())
1825            if not (lateral or join or comma):
1826                break
1827
1828        for key, parser in self.QUERY_MODIFIER_PARSERS.items():
1829            expression = parser(self)
1830
1831            if expression:
1832                this.set(key, expression)
1833
1834    def _parse_hint(self) -> t.Optional[exp.Expression]:
1835        if self._match(TokenType.HINT):
1836            hints = self._parse_csv(self._parse_function)
1837            if not self._match_pair(TokenType.STAR, TokenType.SLASH):
1838                self.raise_error("Expected */ after HINT")
1839            return self.expression(exp.Hint, expressions=hints)
1840
1841        return None
1842
1843    def _parse_into(self) -> t.Optional[exp.Expression]:
1844        if not self._match(TokenType.INTO):
1845            return None
1846
1847        temp = self._match(TokenType.TEMPORARY)
1848        unlogged = self._match(TokenType.UNLOGGED)
1849        self._match(TokenType.TABLE)
1850
1851        return self.expression(
1852            exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged
1853        )
1854
1855    def _parse_from(self) -> t.Optional[exp.Expression]:
1856        if not self._match(TokenType.FROM):
1857            return None
1858
1859        return self.expression(
1860            exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table)
1861        )
1862
1863    def _parse_match_recognize(self) -> t.Optional[exp.Expression]:
1864        if not self._match(TokenType.MATCH_RECOGNIZE):
1865            return None
1866        self._match_l_paren()
1867
1868        partition = self._parse_partition_by()
1869        order = self._parse_order()
1870        measures = (
1871            self._parse_alias(self._parse_conjunction())
1872            if self._match_text_seq("MEASURES")
1873            else None
1874        )
1875
1876        if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
1877            rows = exp.Var(this="ONE ROW PER MATCH")
1878        elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
1879            text = "ALL ROWS PER MATCH"
1880            if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
1881                text += f" SHOW EMPTY MATCHES"
1882            elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
1883                text += f" OMIT EMPTY MATCHES"
1884            elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
1885                text += f" WITH UNMATCHED ROWS"
1886            rows = exp.Var(this=text)
1887        else:
1888            rows = None
1889
1890        if self._match_text_seq("AFTER", "MATCH", "SKIP"):
1891            text = "AFTER MATCH SKIP"
1892            if self._match_text_seq("PAST", "LAST", "ROW"):
1893                text += f" PAST LAST ROW"
1894            elif self._match_text_seq("TO", "NEXT", "ROW"):
1895                text += f" TO NEXT ROW"
1896            elif self._match_text_seq("TO", "FIRST"):
1897                text += f" TO FIRST {self._advance_any().text}"  # type: ignore
1898            elif self._match_text_seq("TO", "LAST"):
1899                text += f" TO LAST {self._advance_any().text}"  # type: ignore
1900            after = exp.Var(this=text)
1901        else:
1902            after = None
1903
1904        if self._match_text_seq("PATTERN"):
1905            self._match_l_paren()
1906
1907            if not self._curr:
1908                self.raise_error("Expecting )", self._curr)
1909
1910            paren = 1
1911            start = self._curr
1912
1913            while self._curr and paren > 0:
1914                if self._curr.token_type == TokenType.L_PAREN:
1915                    paren += 1
1916                if self._curr.token_type == TokenType.R_PAREN:
1917                    paren -= 1
1918                end = self._prev
1919                self._advance()
1920            if paren > 0:
1921                self.raise_error("Expecting )", self._curr)
1922            pattern = exp.Var(this=self._find_sql(start, end))
1923        else:
1924            pattern = None
1925
1926        define = (
1927            self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None
1928        )
1929        self._match_r_paren()
1930
1931        return self.expression(
1932            exp.MatchRecognize,
1933            partition_by=partition,
1934            order=order,
1935            measures=measures,
1936            rows=rows,
1937            after=after,
1938            pattern=pattern,
1939            define=define,
1940        )
1941
1942    def _parse_lateral(self) -> t.Optional[exp.Expression]:
1943        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY)
1944        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
1945
1946        if outer_apply or cross_apply:
1947            this = self._parse_select(table=True)
1948            view = None
1949            outer = not cross_apply
1950        elif self._match(TokenType.LATERAL):
1951            this = self._parse_select(table=True)
1952            view = self._match(TokenType.VIEW)
1953            outer = self._match(TokenType.OUTER)
1954        else:
1955            return None
1956
1957        if not this:
1958            this = self._parse_function() or self._parse_id_var(any_token=False)
1959            while self._match(TokenType.DOT):
1960                this = exp.Dot(
1961                    this=this,
1962                    expression=self._parse_function() or self._parse_id_var(any_token=False),
1963                )
1964
1965        table_alias: t.Optional[exp.Expression]
1966
1967        if view:
1968            table = self._parse_id_var(any_token=False)
1969            columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
1970            table_alias = self.expression(exp.TableAlias, this=table, columns=columns)
1971        else:
1972            table_alias = self._parse_table_alias()
1973
1974        expression = self.expression(
1975            exp.Lateral,
1976            this=this,
1977            view=view,
1978            outer=outer,
1979            alias=table_alias,
1980        )
1981
1982        if outer_apply or cross_apply:
1983            return self.expression(exp.Join, this=expression, side=None if cross_apply else "LEFT")
1984
1985        return expression
1986
1987    def _parse_join_side_and_kind(
1988        self,
1989    ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
1990        return (
1991            self._match(TokenType.NATURAL) and self._prev,
1992            self._match_set(self.JOIN_SIDES) and self._prev,
1993            self._match_set(self.JOIN_KINDS) and self._prev,
1994        )
1995
1996    def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]:
1997        natural, side, kind = self._parse_join_side_and_kind()
1998
1999        if not skip_join_token and not self._match(TokenType.JOIN):
2000            return None
2001
2002        kwargs: t.Dict[
2003            str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]]
2004        ] = {"this": self._parse_table()}
2005
2006        if natural:
2007            kwargs["natural"] = True
2008        if side:
2009            kwargs["side"] = side.text
2010        if kind:
2011            kwargs["kind"] = kind.text
2012
2013        if self._match(TokenType.ON):
2014            kwargs["on"] = self._parse_conjunction()
2015        elif self._match(TokenType.USING):
2016            kwargs["using"] = self._parse_wrapped_id_vars()
2017
2018        return self.expression(exp.Join, **kwargs)  # type: ignore
2019
2020    def _parse_index(self) -> exp.Expression:
2021        index = self._parse_id_var()
2022        self._match(TokenType.ON)
2023        self._match(TokenType.TABLE)  # hive
2024
2025        return self.expression(
2026            exp.Index,
2027            this=index,
2028            table=self.expression(exp.Table, this=self._parse_id_var()),
2029            columns=self._parse_expression(),
2030        )
2031
2032    def _parse_create_table_index(self) -> t.Optional[exp.Expression]:
2033        unique = self._match(TokenType.UNIQUE)
2034        primary = self._match_text_seq("PRIMARY")
2035        amp = self._match_text_seq("AMP")
2036        if not self._match(TokenType.INDEX):
2037            return None
2038        index = self._parse_id_var()
2039        columns = None
2040        if self._match(TokenType.L_PAREN, advance=False):
2041            columns = self._parse_wrapped_csv(self._parse_column)
2042        return self.expression(
2043            exp.Index,
2044            this=index,
2045            columns=columns,
2046            unique=unique,
2047            primary=primary,
2048            amp=amp,
2049        )
2050
2051    def _parse_table_parts(self, schema: bool = False) -> exp.Expression:
2052        catalog = None
2053        db = None
2054        table = (not schema and self._parse_function()) or self._parse_id_var(any_token=False)
2055
2056        while self._match(TokenType.DOT):
2057            if catalog:
2058                # This allows nesting the table in arbitrarily many dot expressions if needed
2059                table = self.expression(exp.Dot, this=table, expression=self._parse_id_var())
2060            else:
2061                catalog = db
2062                db = table
2063                table = self._parse_id_var()
2064
2065        if not table:
2066            self.raise_error(f"Expected table name but got {self._curr}")
2067
2068        return self.expression(
2069            exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots()
2070        )
2071
2072    def _parse_table(
2073        self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
2074    ) -> t.Optional[exp.Expression]:
2075        lateral = self._parse_lateral()
2076
2077        if lateral:
2078            return lateral
2079
2080        unnest = self._parse_unnest()
2081
2082        if unnest:
2083            return unnest
2084
2085        values = self._parse_derived_table_values()
2086
2087        if values:
2088            return values
2089
2090        subquery = self._parse_select(table=True)
2091
2092        if subquery:
2093            return subquery
2094
2095        this = self._parse_table_parts(schema=schema)
2096
2097        if schema:
2098            return self._parse_schema(this=this)
2099
2100        if self.alias_post_tablesample:
2101            table_sample = self._parse_table_sample()
2102
2103        alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
2104
2105        if alias:
2106            this.set("alias", alias)
2107
2108        if not this.args.get("pivots"):
2109            this.set("pivots", self._parse_pivots())
2110
2111        if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
2112            this.set(
2113                "hints",
2114                self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)),
2115            )
2116            self._match_r_paren()
2117
2118        if not self.alias_post_tablesample:
2119            table_sample = self._parse_table_sample()
2120
2121        if table_sample:
2122            table_sample.set("this", this)
2123            this = table_sample
2124
2125        return this
2126
2127    def _parse_unnest(self) -> t.Optional[exp.Expression]:
2128        if not self._match(TokenType.UNNEST):
2129            return None
2130
2131        expressions = self._parse_wrapped_csv(self._parse_column)
2132        ordinality = bool(self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY))
2133        alias = self._parse_table_alias()
2134
2135        if alias and self.unnest_column_only:
2136            if alias.args.get("columns"):
2137                self.raise_error("Unexpected extra column alias in unnest.")
2138            alias.set("columns", [alias.this])
2139            alias.set("this", None)
2140
2141        offset = None
2142        if self._match_pair(TokenType.WITH, TokenType.OFFSET):
2143            self._match(TokenType.ALIAS)
2144            offset = self._parse_conjunction()
2145
2146        return self.expression(
2147            exp.Unnest,
2148            expressions=expressions,
2149            ordinality=ordinality,
2150            alias=alias,
2151            offset=offset,
2152        )
2153
2154    def _parse_derived_table_values(self) -> t.Optional[exp.Expression]:
2155        is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES)
2156        if not is_derived and not self._match(TokenType.VALUES):
2157            return None
2158
2159        expressions = self._parse_csv(self._parse_value)
2160
2161        if is_derived:
2162            self._match_r_paren()
2163
2164        return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias())
2165
2166    def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]:
2167        if not self._match(TokenType.TABLE_SAMPLE) and not (
2168            as_modifier and self._match_text_seq("USING", "SAMPLE")
2169        ):
2170            return None
2171
2172        bucket_numerator = None
2173        bucket_denominator = None
2174        bucket_field = None
2175        percent = None
2176        rows = None
2177        size = None
2178        seed = None
2179
2180        kind = "TABLESAMPLE" if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE"
2181        method = self._parse_var(tokens=(TokenType.ROW,))
2182
2183        self._match(TokenType.L_PAREN)
2184
2185        num = self._parse_number()
2186
2187        if self._match(TokenType.BUCKET):
2188            bucket_numerator = self._parse_number()
2189            self._match(TokenType.OUT_OF)
2190            bucket_denominator = bucket_denominator = self._parse_number()
2191            self._match(TokenType.ON)
2192            bucket_field = self._parse_field()
2193        elif self._match_set((TokenType.PERCENT, TokenType.MOD)):
2194            percent = num
2195        elif self._match(TokenType.ROWS):
2196            rows = num
2197        else:
2198            size = num
2199
2200        self._match(TokenType.R_PAREN)
2201
2202        if self._match(TokenType.L_PAREN):
2203            method = self._parse_var()
2204            seed = self._match(TokenType.COMMA) and self._parse_number()
2205            self._match_r_paren()
2206        elif self._match_texts(("SEED", "REPEATABLE")):
2207            seed = self._parse_wrapped(self._parse_number)
2208
2209        return self.expression(
2210            exp.TableSample,
2211            method=method,
2212            bucket_numerator=bucket_numerator,
2213            bucket_denominator=bucket_denominator,
2214            bucket_field=bucket_field,
2215            percent=percent,
2216            rows=rows,
2217            size=size,
2218            seed=seed,
2219            kind=kind,
2220        )
2221
2222    def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]:
2223        return list(iter(self._parse_pivot, None))
2224
2225    def _parse_pivot(self) -> t.Optional[exp.Expression]:
2226        index = self._index
2227
2228        if self._match(TokenType.PIVOT):
2229            unpivot = False
2230        elif self._match(TokenType.UNPIVOT):
2231            unpivot = True
2232        else:
2233            return None
2234
2235        expressions = []
2236        field = None
2237
2238        if not self._match(TokenType.L_PAREN):
2239            self._retreat(index)
2240            return None
2241
2242        if unpivot:
2243            expressions = self._parse_csv(self._parse_column)
2244        else:
2245            expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function()))
2246
2247        if not self._match(TokenType.FOR):
2248            self.raise_error("Expecting FOR")
2249
2250        value = self._parse_column()
2251
2252        if not self._match(TokenType.IN):
2253            self.raise_error("Expecting IN")
2254
2255        field = self._parse_in(value)
2256
2257        self._match_r_paren()
2258
2259        pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot)
2260
2261        if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False):
2262            pivot.set("alias", self._parse_table_alias())
2263
2264        return pivot
2265
2266    def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]:
2267        if not skip_where_token and not self._match(TokenType.WHERE):
2268            return None
2269
2270        return self.expression(
2271            exp.Where, comments=self._prev_comments, this=self._parse_conjunction()
2272        )
2273
2274    def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]:
2275        if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
2276            return None
2277
2278        elements = defaultdict(list)
2279
2280        while True:
2281            expressions = self._parse_csv(self._parse_conjunction)
2282            if expressions:
2283                elements["expressions"].extend(expressions)
2284
2285            grouping_sets = self._parse_grouping_sets()
2286            if grouping_sets:
2287                elements["grouping_sets"].extend(grouping_sets)
2288
2289            rollup = None
2290            cube = None
2291
2292            with_ = self._match(TokenType.WITH)
2293            if self._match(TokenType.ROLLUP):
2294                rollup = with_ or self._parse_wrapped_csv(self._parse_column)
2295                elements["rollup"].extend(ensure_list(rollup))
2296
2297            if self._match(TokenType.CUBE):
2298                cube = with_ or self._parse_wrapped_csv(self._parse_column)
2299                elements["cube"].extend(ensure_list(cube))
2300
2301            if not (expressions or grouping_sets or rollup or cube):
2302                break
2303
2304        return self.expression(exp.Group, **elements)  # type: ignore
2305
2306    def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
2307        if not self._match(TokenType.GROUPING_SETS):
2308            return None
2309
2310        return self._parse_wrapped_csv(self._parse_grouping_set)
2311
2312    def _parse_grouping_set(self) -> t.Optional[exp.Expression]:
2313        if self._match(TokenType.L_PAREN):
2314            grouping_set = self._parse_csv(self._parse_column)
2315            self._match_r_paren()
2316            return self.expression(exp.Tuple, expressions=grouping_set)
2317
2318        return self._parse_column()
2319
2320    def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]:
2321        if not skip_having_token and not self._match(TokenType.HAVING):
2322            return None
2323        return self.expression(exp.Having, this=self._parse_conjunction())
2324
2325    def _parse_qualify(self) -> t.Optional[exp.Expression]:
2326        if not self._match(TokenType.QUALIFY):
2327            return None
2328        return self.expression(exp.Qualify, this=self._parse_conjunction())
2329
2330    def _parse_order(
2331        self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
2332    ) -> t.Optional[exp.Expression]:
2333        if not skip_order_token and not self._match(TokenType.ORDER_BY):
2334            return this
2335
2336        return self.expression(
2337            exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
2338        )
2339
2340    def _parse_sort(
2341        self, token_type: TokenType, exp_class: t.Type[exp.Expression]
2342    ) -> t.Optional[exp.Expression]:
2343        if not self._match(token_type):
2344            return None
2345        return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered))
2346
2347    def _parse_ordered(self) -> exp.Expression:
2348        this = self._parse_conjunction()
2349        self._match(TokenType.ASC)
2350        is_desc = self._match(TokenType.DESC)
2351        is_nulls_first = self._match(TokenType.NULLS_FIRST)
2352        is_nulls_last = self._match(TokenType.NULLS_LAST)
2353        desc = is_desc or False
2354        asc = not desc
2355        nulls_first = is_nulls_first or False
2356        explicitly_null_ordered = is_nulls_first or is_nulls_last
2357        if (
2358            not explicitly_null_ordered
2359            and (
2360                (asc and self.null_ordering == "nulls_are_small")
2361                or (desc and self.null_ordering != "nulls_are_small")
2362            )
2363            and self.null_ordering != "nulls_are_last"
2364        ):
2365            nulls_first = True
2366
2367        return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first)
2368
2369    def _parse_limit(
2370        self, this: t.Optional[exp.Expression] = None, top: bool = False
2371    ) -> t.Optional[exp.Expression]:
2372        if self._match(TokenType.TOP if top else TokenType.LIMIT):
2373            limit_paren = self._match(TokenType.L_PAREN)
2374            limit_exp = self.expression(
2375                exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term()
2376            )
2377
2378            if limit_paren:
2379                self._match_r_paren()
2380
2381            return limit_exp
2382
2383        if self._match(TokenType.FETCH):
2384            direction = self._match_set((TokenType.FIRST, TokenType.NEXT))
2385            direction = self._prev.text if direction else "FIRST"
2386            count = self._parse_number()
2387            self._match_set((TokenType.ROW, TokenType.ROWS))
2388            self._match(TokenType.ONLY)
2389            return self.expression(exp.Fetch, direction=direction, count=count)
2390
2391        return this
2392
2393    def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2394        if not self._match_set((TokenType.OFFSET, TokenType.COMMA)):
2395            return this
2396
2397        count = self._parse_number()
2398        self._match_set((TokenType.ROW, TokenType.ROWS))
2399        return self.expression(exp.Offset, this=this, expression=count)
2400
2401    def _parse_lock(self) -> t.Optional[exp.Expression]:
2402        if self._match_text_seq("FOR", "UPDATE"):
2403            return self.expression(exp.Lock, update=True)
2404        if self._match_text_seq("FOR", "SHARE"):
2405            return self.expression(exp.Lock, update=False)
2406
2407        return None
2408
2409    def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2410        if not self._match_set(self.SET_OPERATIONS):
2411            return this
2412
2413        token_type = self._prev.token_type
2414
2415        if token_type == TokenType.UNION:
2416            expression = exp.Union
2417        elif token_type == TokenType.EXCEPT:
2418            expression = exp.Except
2419        else:
2420            expression = exp.Intersect
2421
2422        return self.expression(
2423            expression,
2424            this=this,
2425            distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
2426            expression=self._parse_set_operations(self._parse_select(nested=True)),
2427        )
2428
2429    def _parse_expression(self) -> t.Optional[exp.Expression]:
2430        return self._parse_alias(self._parse_conjunction())
2431
2432    def _parse_conjunction(self) -> t.Optional[exp.Expression]:
2433        return self._parse_tokens(self._parse_equality, self.CONJUNCTION)
2434
2435    def _parse_equality(self) -> t.Optional[exp.Expression]:
2436        return self._parse_tokens(self._parse_comparison, self.EQUALITY)
2437
2438    def _parse_comparison(self) -> t.Optional[exp.Expression]:
2439        return self._parse_tokens(self._parse_range, self.COMPARISON)
2440
2441    def _parse_range(self) -> t.Optional[exp.Expression]:
2442        this = self._parse_bitwise()
2443        negate = self._match(TokenType.NOT)
2444
2445        if self._match_set(self.RANGE_PARSERS):
2446            this = self.RANGE_PARSERS[self._prev.token_type](self, this)
2447        elif self._match(TokenType.ISNULL):
2448            this = self.expression(exp.Is, this=this, expression=exp.Null())
2449
2450        # Postgres supports ISNULL and NOTNULL for conditions.
2451        # https://blog.andreiavram.ro/postgresql-null-composite-type/
2452        if self._match(TokenType.NOTNULL):
2453            this = self.expression(exp.Is, this=this, expression=exp.Null())
2454            this = self.expression(exp.Not, this=this)
2455
2456        if negate:
2457            this = self.expression(exp.Not, this=this)
2458
2459        if self._match(TokenType.IS):
2460            this = self._parse_is(this)
2461
2462        return this
2463
2464    def _parse_is(self, this: t.Optional[exp.Expression]) -> exp.Expression:
2465        negate = self._match(TokenType.NOT)
2466        if self._match(TokenType.DISTINCT_FROM):
2467            klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ
2468            return self.expression(klass, this=this, expression=self._parse_expression())
2469
2470        this = self.expression(
2471            exp.Is,
2472            this=this,
2473            expression=self._parse_null() or self._parse_boolean(),
2474        )
2475        return self.expression(exp.Not, this=this) if negate else this
2476
2477    def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression:
2478        unnest = self._parse_unnest()
2479        if unnest:
2480            this = self.expression(exp.In, this=this, unnest=unnest)
2481        elif self._match(TokenType.L_PAREN):
2482            expressions = self._parse_csv(self._parse_select_or_expression)
2483
2484            if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable):
2485                this = self.expression(exp.In, this=this, query=expressions[0])
2486            else:
2487                this = self.expression(exp.In, this=this, expressions=expressions)
2488
2489            self._match_r_paren()
2490        else:
2491            this = self.expression(exp.In, this=this, field=self._parse_field())
2492
2493        return this
2494
2495    def _parse_between(self, this: exp.Expression) -> exp.Expression:
2496        low = self._parse_bitwise()
2497        self._match(TokenType.AND)
2498        high = self._parse_bitwise()
2499        return self.expression(exp.Between, this=this, low=low, high=high)
2500
2501    def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2502        if not self._match(TokenType.ESCAPE):
2503            return this
2504        return self.expression(exp.Escape, this=this, expression=self._parse_string())
2505
2506    def _parse_bitwise(self) -> t.Optional[exp.Expression]:
2507        this = self._parse_term()
2508
2509        while True:
2510            if self._match_set(self.BITWISE):
2511                this = self.expression(
2512                    self.BITWISE[self._prev.token_type],
2513                    this=this,
2514                    expression=self._parse_term(),
2515                )
2516            elif self._match_pair(TokenType.LT, TokenType.LT):
2517                this = self.expression(
2518                    exp.BitwiseLeftShift, this=this, expression=self._parse_term()
2519                )
2520            elif self._match_pair(TokenType.GT, TokenType.GT):
2521                this = self.expression(
2522                    exp.BitwiseRightShift, this=this, expression=self._parse_term()
2523                )
2524            else:
2525                break
2526
2527        return this
2528
2529    def _parse_term(self) -> t.Optional[exp.Expression]:
2530        return self._parse_tokens(self._parse_factor, self.TERM)
2531
2532    def _parse_factor(self) -> t.Optional[exp.Expression]:
2533        return self._parse_tokens(self._parse_unary, self.FACTOR)
2534
2535    def _parse_unary(self) -> t.Optional[exp.Expression]:
2536        if self._match_set(self.UNARY_PARSERS):
2537            return self.UNARY_PARSERS[self._prev.token_type](self)
2538        return self._parse_at_time_zone(self._parse_type())
2539
2540    def _parse_type(self) -> t.Optional[exp.Expression]:
2541        if self._match(TokenType.INTERVAL):
2542            return self.expression(exp.Interval, this=self._parse_term(), unit=self._parse_field())
2543
2544        index = self._index
2545        type_token = self._parse_types(check_func=True)
2546        this = self._parse_column()
2547
2548        if type_token:
2549            if isinstance(this, exp.Literal):
2550                return self.expression(exp.Cast, this=this, to=type_token)
2551            if not type_token.args.get("expressions"):
2552                self._retreat(index)
2553                return self._parse_column()
2554            return type_token
2555
2556        return this
2557
2558    def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]:
2559        index = self._index
2560
2561        prefix = self._match_text_seq("SYSUDTLIB", ".")
2562
2563        if not self._match_set(self.TYPE_TOKENS):
2564            return None
2565
2566        type_token = self._prev.token_type
2567
2568        if type_token == TokenType.PSEUDO_TYPE:
2569            return self.expression(exp.PseudoType, this=self._prev.text)
2570
2571        nested = type_token in self.NESTED_TYPE_TOKENS
2572        is_struct = type_token == TokenType.STRUCT
2573        expressions = None
2574        maybe_func = False
2575
2576        if self._match(TokenType.L_PAREN):
2577            if is_struct:
2578                expressions = self._parse_csv(self._parse_struct_kwargs)
2579            elif nested:
2580                expressions = self._parse_csv(self._parse_types)
2581            else:
2582                expressions = self._parse_csv(self._parse_conjunction)
2583
2584            if not expressions:
2585                self._retreat(index)
2586                return None
2587
2588            self._match_r_paren()
2589            maybe_func = True
2590
2591        if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2592            this = exp.DataType(
2593                this=exp.DataType.Type.ARRAY,
2594                expressions=[exp.DataType.build(type_token.value, expressions=expressions)],
2595                nested=True,
2596            )
2597
2598            while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2599                this = exp.DataType(
2600                    this=exp.DataType.Type.ARRAY,
2601                    expressions=[this],
2602                    nested=True,
2603                )
2604
2605            return this
2606
2607        if self._match(TokenType.L_BRACKET):
2608            self._retreat(index)
2609            return None
2610
2611        values: t.Optional[t.List[t.Optional[exp.Expression]]] = None
2612        if nested and self._match(TokenType.LT):
2613            if is_struct:
2614                expressions = self._parse_csv(self._parse_struct_kwargs)
2615            else:
2616                expressions = self._parse_csv(self._parse_types)
2617
2618            if not self._match(TokenType.GT):
2619                self.raise_error("Expecting >")
2620
2621            if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)):
2622                values = self._parse_csv(self._parse_conjunction)
2623                self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN))
2624
2625        value: t.Optional[exp.Expression] = None
2626        if type_token in self.TIMESTAMPS:
2627            if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ:
2628                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions)
2629            elif (
2630                self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ
2631            ):
2632                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions)
2633            elif self._match(TokenType.WITHOUT_TIME_ZONE):
2634                if type_token == TokenType.TIME:
2635                    value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions)
2636                else:
2637                    value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2638
2639            maybe_func = maybe_func and value is None
2640
2641            if value is None:
2642                value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2643        elif type_token == TokenType.INTERVAL:
2644            unit = self._parse_var()
2645
2646            if not unit:
2647                value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL)
2648            else:
2649                value = self.expression(exp.Interval, unit=unit)
2650
2651        if maybe_func and check_func:
2652            index2 = self._index
2653            peek = self._parse_string()
2654
2655            if not peek:
2656                self._retreat(index)
2657                return None
2658
2659            self._retreat(index2)
2660
2661        if value:
2662            return value
2663
2664        return exp.DataType(
2665            this=exp.DataType.Type[type_token.value.upper()],
2666            expressions=expressions,
2667            nested=nested,
2668            values=values,
2669            prefix=prefix,
2670        )
2671
2672    def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]:
2673        if self._curr and self._curr.token_type in self.TYPE_TOKENS:
2674            return self._parse_types()
2675
2676        this = self._parse_id_var()
2677        self._match(TokenType.COLON)
2678        data_type = self._parse_types()
2679
2680        if not data_type:
2681            return None
2682        return self.expression(exp.StructKwarg, this=this, expression=data_type)
2683
2684    def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2685        if not self._match(TokenType.AT_TIME_ZONE):
2686            return this
2687        return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary())
2688
2689    def _parse_column(self) -> t.Optional[exp.Expression]:
2690        this = self._parse_field()
2691        if isinstance(this, exp.Identifier):
2692            this = self.expression(exp.Column, this=this)
2693        elif not this:
2694            return self._parse_bracket(this)
2695        this = self._parse_bracket(this)
2696
2697        while self._match_set(self.COLUMN_OPERATORS):
2698            op_token = self._prev.token_type
2699            op = self.COLUMN_OPERATORS.get(op_token)
2700
2701            if op_token == TokenType.DCOLON:
2702                field = self._parse_types()
2703                if not field:
2704                    self.raise_error("Expected type")
2705            elif op:
2706                self._advance()
2707                value = self._prev.text
2708                field = (
2709                    exp.Literal.number(value)
2710                    if self._prev.token_type == TokenType.NUMBER
2711                    else exp.Literal.string(value)
2712                )
2713            else:
2714                field = self._parse_star() or self._parse_function() or self._parse_id_var()
2715
2716            if isinstance(field, exp.Func):
2717                # bigquery allows function calls like x.y.count(...)
2718                # SAFE.SUBSTR(...)
2719                # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
2720                this = self._replace_columns_with_dots(this)
2721
2722            if op:
2723                this = op(self, this, field)
2724            elif isinstance(this, exp.Column) and not this.args.get("catalog"):
2725                this = self.expression(
2726                    exp.Column,
2727                    this=field,
2728                    table=this.this,
2729                    db=this.args.get("table"),
2730                    catalog=this.args.get("db"),
2731                )
2732            else:
2733                this = self.expression(exp.Dot, this=this, expression=field)
2734            this = self._parse_bracket(this)
2735
2736        return this
2737
2738    def _parse_primary(self) -> t.Optional[exp.Expression]:
2739        if self._match_set(self.PRIMARY_PARSERS):
2740            token_type = self._prev.token_type
2741            primary = self.PRIMARY_PARSERS[token_type](self, self._prev)
2742
2743            if token_type == TokenType.STRING:
2744                expressions = [primary]
2745                while self._match(TokenType.STRING):
2746                    expressions.append(exp.Literal.string(self._prev.text))
2747                if len(expressions) > 1:
2748                    return self.expression(exp.Concat, expressions=expressions)
2749            return primary
2750
2751        if self._match_pair(TokenType.DOT, TokenType.NUMBER):
2752            return exp.Literal.number(f"0.{self._prev.text}")
2753
2754        if self._match(TokenType.L_PAREN):
2755            comments = self._prev_comments
2756            query = self._parse_select()
2757
2758            if query:
2759                expressions = [query]
2760            else:
2761                expressions = self._parse_csv(
2762                    lambda: self._parse_alias(self._parse_conjunction(), explicit=True)
2763                )
2764
2765            this = seq_get(expressions, 0)
2766            self._parse_query_modifiers(this)
2767            self._match_r_paren()
2768
2769            if isinstance(this, exp.Subqueryable):
2770                this = self._parse_set_operations(
2771                    self._parse_subquery(this=this, parse_alias=False)
2772                )
2773            elif len(expressions) > 1:
2774                this = self.expression(exp.Tuple, expressions=expressions)
2775            else:
2776                this = self.expression(exp.Paren, this=this)
2777
2778            if this and comments:
2779                this.comments = comments
2780
2781            return this
2782
2783        return None
2784
2785    def _parse_field(self, any_token: bool = False) -> t.Optional[exp.Expression]:
2786        return self._parse_primary() or self._parse_function() or self._parse_id_var(any_token)
2787
2788    def _parse_function(
2789        self, functions: t.Optional[t.Dict[str, t.Callable]] = None
2790    ) -> t.Optional[exp.Expression]:
2791        if not self._curr:
2792            return None
2793
2794        token_type = self._curr.token_type
2795
2796        if self._match_set(self.NO_PAREN_FUNCTION_PARSERS):
2797            return self.NO_PAREN_FUNCTION_PARSERS[token_type](self)
2798
2799        if not self._next or self._next.token_type != TokenType.L_PAREN:
2800            if token_type in self.NO_PAREN_FUNCTIONS:
2801                self._advance()
2802                return self.expression(self.NO_PAREN_FUNCTIONS[token_type])
2803
2804            return None
2805
2806        if token_type not in self.FUNC_TOKENS:
2807            return None
2808
2809        this = self._curr.text
2810        upper = this.upper()
2811        self._advance(2)
2812
2813        parser = self.FUNCTION_PARSERS.get(upper)
2814
2815        if parser:
2816            this = parser(self)
2817        else:
2818            subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
2819
2820            if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH):
2821                this = self.expression(subquery_predicate, this=self._parse_select())
2822                self._match_r_paren()
2823                return this
2824
2825            if functions is None:
2826                functions = self.FUNCTIONS
2827
2828            function = functions.get(upper)
2829            args = self._parse_csv(self._parse_lambda)
2830
2831            if function:
2832                # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the
2833                # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists.
2834                if count_params(function) == 2:
2835                    params = None
2836                    if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN):
2837                        params = self._parse_csv(self._parse_lambda)
2838
2839                    this = function(args, params)
2840                else:
2841                    this = function(args)
2842
2843                self.validate_expression(this, args)
2844            else:
2845                this = self.expression(exp.Anonymous, this=this, expressions=args)
2846
2847        self._match_r_paren(this)
2848        return self._parse_window(this)
2849
2850    def _parse_function_parameter(self) -> t.Optional[exp.Expression]:
2851        return self._parse_column_def(self._parse_id_var())
2852
2853    def _parse_user_defined_function(
2854        self, kind: t.Optional[TokenType] = None
2855    ) -> t.Optional[exp.Expression]:
2856        this = self._parse_id_var()
2857
2858        while self._match(TokenType.DOT):
2859            this = self.expression(exp.Dot, this=this, expression=self._parse_id_var())
2860
2861        if not self._match(TokenType.L_PAREN):
2862            return this
2863
2864        expressions = self._parse_csv(self._parse_function_parameter)
2865        self._match_r_paren()
2866        return self.expression(
2867            exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True
2868        )
2869
2870    def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]:
2871        literal = self._parse_primary()
2872        if literal:
2873            return self.expression(exp.Introducer, this=token.text, expression=literal)
2874
2875        return self.expression(exp.Identifier, this=token.text)
2876
2877    def _parse_national(self, token: Token) -> exp.Expression:
2878        return self.expression(exp.National, this=exp.Literal.string(token.text))
2879
2880    def _parse_session_parameter(self) -> exp.Expression:
2881        kind = None
2882        this = self._parse_id_var() or self._parse_primary()
2883
2884        if this and self._match(TokenType.DOT):
2885            kind = this.name
2886            this = self._parse_var() or self._parse_primary()
2887
2888        return self.expression(exp.SessionParameter, this=this, kind=kind)
2889
2890    def _parse_lambda(self) -> t.Optional[exp.Expression]:
2891        index = self._index
2892
2893        if self._match(TokenType.L_PAREN):
2894            expressions = self._parse_csv(self._parse_id_var)
2895
2896            if not self._match(TokenType.R_PAREN):
2897                self._retreat(index)
2898        else:
2899            expressions = [self._parse_id_var()]
2900
2901        if self._match_set(self.LAMBDAS):
2902            return self.LAMBDAS[self._prev.token_type](self, expressions)
2903
2904        self._retreat(index)
2905
2906        this: t.Optional[exp.Expression]
2907
2908        if self._match(TokenType.DISTINCT):
2909            this = self.expression(
2910                exp.Distinct, expressions=self._parse_csv(self._parse_conjunction)
2911            )
2912        else:
2913            this = self._parse_select_or_expression()
2914
2915        if self._match(TokenType.IGNORE_NULLS):
2916            this = self.expression(exp.IgnoreNulls, this=this)
2917        else:
2918            self._match(TokenType.RESPECT_NULLS)
2919
2920        return self._parse_limit(self._parse_order(this))
2921
2922    def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2923        index = self._index
2924        if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT):
2925            self._retreat(index)
2926            return this
2927
2928        args = self._parse_csv(
2929            lambda: self._parse_constraint()
2930            or self._parse_column_def(self._parse_field(any_token=True))
2931        )
2932        self._match_r_paren()
2933        return self.expression(exp.Schema, this=this, expressions=args)
2934
2935    def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2936        kind = self._parse_types()
2937
2938        if self._match_text_seq("FOR", "ORDINALITY"):
2939            return self.expression(exp.ColumnDef, this=this, ordinality=True)
2940
2941        constraints = []
2942        while True:
2943            constraint = self._parse_column_constraint()
2944            if not constraint:
2945                break
2946            constraints.append(constraint)
2947
2948        if not kind and not constraints:
2949            return this
2950
2951        return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints)
2952
2953    def _parse_auto_increment(self) -> exp.Expression:
2954        start = None
2955        increment = None
2956
2957        if self._match(TokenType.L_PAREN, advance=False):
2958            args = self._parse_wrapped_csv(self._parse_bitwise)
2959            start = seq_get(args, 0)
2960            increment = seq_get(args, 1)
2961        elif self._match_text_seq("START"):
2962            start = self._parse_bitwise()
2963            self._match_text_seq("INCREMENT")
2964            increment = self._parse_bitwise()
2965
2966        if start and increment:
2967            return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment)
2968
2969        return exp.AutoIncrementColumnConstraint()
2970
2971    def _parse_compress(self) -> exp.Expression:
2972        if self._match(TokenType.L_PAREN, advance=False):
2973            return self.expression(
2974                exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise)
2975            )
2976
2977        return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise())
2978
2979    def _parse_generated_as_identity(self) -> exp.Expression:
2980        if self._match(TokenType.BY_DEFAULT):
2981            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False)
2982        else:
2983            self._match_text_seq("ALWAYS")
2984            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True)
2985
2986        self._match_text_seq("AS", "IDENTITY")
2987        if self._match(TokenType.L_PAREN):
2988            if self._match_text_seq("START", "WITH"):
2989                this.set("start", self._parse_bitwise())
2990            if self._match_text_seq("INCREMENT", "BY"):
2991                this.set("increment", self._parse_bitwise())
2992            if self._match_text_seq("MINVALUE"):
2993                this.set("minvalue", self._parse_bitwise())
2994            if self._match_text_seq("MAXVALUE"):
2995                this.set("maxvalue", self._parse_bitwise())
2996
2997            if self._match_text_seq("CYCLE"):
2998                this.set("cycle", True)
2999            elif self._match_text_seq("NO", "CYCLE"):
3000                this.set("cycle", False)
3001
3002            self._match_r_paren()
3003
3004        return this
3005
3006    def _parse_inline(self) -> t.Optional[exp.Expression]:
3007        self._match_text_seq("LENGTH")
3008        return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise())
3009
3010    def _parse_not_constraint(self) -> t.Optional[exp.Expression]:
3011        if self._match_text_seq("NULL"):
3012            return self.expression(exp.NotNullColumnConstraint)
3013        if self._match_text_seq("CASESPECIFIC"):
3014            return self.expression(exp.CaseSpecificColumnConstraint, not_=True)
3015        return None
3016
3017    def _parse_column_constraint(self) -> t.Optional[exp.Expression]:
3018        this = self._parse_references()
3019        if this:
3020            return this
3021
3022        if self._match(TokenType.CONSTRAINT):
3023            this = self._parse_id_var()
3024
3025        if self._match_texts(self.CONSTRAINT_PARSERS):
3026            return self.expression(
3027                exp.ColumnConstraint,
3028                this=this,
3029                kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self),
3030            )
3031
3032        return this
3033
3034    def _parse_constraint(self) -> t.Optional[exp.Expression]:
3035        if not self._match(TokenType.CONSTRAINT):
3036            return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS)
3037
3038        this = self._parse_id_var()
3039        expressions = []
3040
3041        while True:
3042            constraint = self._parse_unnamed_constraint() or self._parse_function()
3043            if not constraint:
3044                break
3045            expressions.append(constraint)
3046
3047        return self.expression(exp.Constraint, this=this, expressions=expressions)
3048
3049    def _parse_unnamed_constraint(
3050        self, constraints: t.Optional[t.Collection[str]] = None
3051    ) -> t.Optional[exp.Expression]:
3052        if not self._match_texts(constraints or self.CONSTRAINT_PARSERS):
3053            return None
3054
3055        constraint = self._prev.text.upper()
3056        if constraint not in self.CONSTRAINT_PARSERS:
3057            self.raise_error(f"No parser found for schema constraint {constraint}.")
3058
3059        return self.CONSTRAINT_PARSERS[constraint](self)
3060
3061    def _parse_unique(self) -> exp.Expression:
3062        if not self._match(TokenType.L_PAREN, advance=False):
3063            return self.expression(exp.UniqueColumnConstraint)
3064        return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars())
3065
3066    def _parse_key_constraint_options(self) -> t.List[str]:
3067        options = []
3068        while True:
3069            if not self._curr:
3070                break
3071
3072            if self._match(TokenType.ON):
3073                action = None
3074                on = self._advance_any() and self._prev.text
3075
3076                if self._match(TokenType.NO_ACTION):
3077                    action = "NO ACTION"
3078                elif self._match(TokenType.CASCADE):
3079                    action = "CASCADE"
3080                elif self._match_pair(TokenType.SET, TokenType.NULL):
3081                    action = "SET NULL"
3082                elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
3083                    action = "SET DEFAULT"
3084                else:
3085                    self.raise_error("Invalid key constraint")
3086
3087                options.append(f"ON {on} {action}")
3088            elif self._match_text_seq("NOT", "ENFORCED"):
3089                options.append("NOT ENFORCED")
3090            elif self._match_text_seq("DEFERRABLE"):
3091                options.append("DEFERRABLE")
3092            elif self._match_text_seq("INITIALLY", "DEFERRED"):
3093                options.append("INITIALLY DEFERRED")
3094            elif self._match_text_seq("NORELY"):
3095                options.append("NORELY")
3096            elif self._match_text_seq("MATCH", "FULL"):
3097                options.append("MATCH FULL")
3098            else:
3099                break
3100
3101        return options
3102
3103    def _parse_references(self) -> t.Optional[exp.Expression]:
3104        if not self._match(TokenType.REFERENCES):
3105            return None
3106
3107        expressions = None
3108        this = self._parse_id_var()
3109
3110        if self._match(TokenType.L_PAREN, advance=False):
3111            expressions = self._parse_wrapped_id_vars()
3112
3113        options = self._parse_key_constraint_options()
3114        return self.expression(exp.Reference, this=this, expressions=expressions, options=options)
3115
3116    def _parse_foreign_key(self) -> exp.Expression:
3117        expressions = self._parse_wrapped_id_vars()
3118        reference = self._parse_references()
3119        options = {}
3120
3121        while self._match(TokenType.ON):
3122            if not self._match_set((TokenType.DELETE, TokenType.UPDATE)):
3123                self.raise_error("Expected DELETE or UPDATE")
3124
3125            kind = self._prev.text.lower()
3126
3127            if self._match(TokenType.NO_ACTION):
3128                action = "NO ACTION"
3129            elif self._match(TokenType.SET):
3130                self._match_set((TokenType.NULL, TokenType.DEFAULT))
3131                action = "SET " + self._prev.text.upper()
3132            else:
3133                self._advance()
3134                action = self._prev.text.upper()
3135
3136            options[kind] = action
3137
3138        return self.expression(
3139            exp.ForeignKey, expressions=expressions, reference=reference, **options  # type: ignore
3140        )
3141
3142    def _parse_primary_key(self) -> exp.Expression:
3143        desc = (
3144            self._match_set((TokenType.ASC, TokenType.DESC))
3145            and self._prev.token_type == TokenType.DESC
3146        )
3147
3148        if not self._match(TokenType.L_PAREN, advance=False):
3149            return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc)
3150
3151        expressions = self._parse_wrapped_id_vars()
3152        options = self._parse_key_constraint_options()
3153        return self.expression(exp.PrimaryKey, expressions=expressions, options=options)
3154
3155    def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3156        if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)):
3157            return this
3158
3159        bracket_kind = self._prev.token_type
3160        expressions: t.List[t.Optional[exp.Expression]]
3161
3162        if self._match(TokenType.COLON):
3163            expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())]
3164        else:
3165            expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction()))
3166
3167        # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
3168        if bracket_kind == TokenType.L_BRACE:
3169            this = self.expression(exp.Struct, expressions=expressions)
3170        elif not this or this.name.upper() == "ARRAY":
3171            this = self.expression(exp.Array, expressions=expressions)
3172        else:
3173            expressions = apply_index_offset(expressions, -self.index_offset)
3174            this = self.expression(exp.Bracket, this=this, expressions=expressions)
3175
3176        if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET:
3177            self.raise_error("Expected ]")
3178        elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE:
3179            self.raise_error("Expected }")
3180
3181        this.comments = self._prev_comments
3182        return self._parse_bracket(this)
3183
3184    def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3185        if self._match(TokenType.COLON):
3186            return self.expression(exp.Slice, this=this, expression=self._parse_conjunction())
3187        return this
3188
3189    def _parse_case(self) -> t.Optional[exp.Expression]:
3190        ifs = []
3191        default = None
3192
3193        expression = self._parse_conjunction()
3194
3195        while self._match(TokenType.WHEN):
3196            this = self._parse_conjunction()
3197            self._match(TokenType.THEN)
3198            then = self._parse_conjunction()
3199            ifs.append(self.expression(exp.If, this=this, true=then))
3200
3201        if self._match(TokenType.ELSE):
3202            default = self._parse_conjunction()
3203
3204        if not self._match(TokenType.END):
3205            self.raise_error("Expected END after CASE", self._prev)
3206
3207        return self._parse_window(
3208            self.expression(exp.Case, this=expression, ifs=ifs, default=default)
3209        )
3210
3211    def _parse_if(self) -> t.Optional[exp.Expression]:
3212        if self._match(TokenType.L_PAREN):
3213            args = self._parse_csv(self._parse_conjunction)
3214            this = exp.If.from_arg_list(args)
3215            self.validate_expression(this, args)
3216            self._match_r_paren()
3217        else:
3218            condition = self._parse_conjunction()
3219            self._match(TokenType.THEN)
3220            true = self._parse_conjunction()
3221            false = self._parse_conjunction() if self._match(TokenType.ELSE) else None
3222            self._match(TokenType.END)
3223            this = self.expression(exp.If, this=condition, true=true, false=false)
3224
3225        return self._parse_window(this)
3226
3227    def _parse_extract(self) -> exp.Expression:
3228        this = self._parse_function() or self._parse_var() or self._parse_type()
3229
3230        if self._match(TokenType.FROM):
3231            return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3232
3233        if not self._match(TokenType.COMMA):
3234            self.raise_error("Expected FROM or comma after EXTRACT", self._prev)
3235
3236        return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3237
3238    def _parse_cast(self, strict: bool) -> exp.Expression:
3239        this = self._parse_conjunction()
3240
3241        if not self._match(TokenType.ALIAS):
3242            self.raise_error("Expected AS after CAST")
3243
3244        to = self._parse_types()
3245
3246        if not to:
3247            self.raise_error("Expected TYPE after CAST")
3248        elif to.this == exp.DataType.Type.CHAR:
3249            if self._match(TokenType.CHARACTER_SET):
3250                to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())
3251
3252        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3253
3254    def _parse_string_agg(self) -> exp.Expression:
3255        expression: t.Optional[exp.Expression]
3256
3257        if self._match(TokenType.DISTINCT):
3258            args = self._parse_csv(self._parse_conjunction)
3259            expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)])
3260        else:
3261            args = self._parse_csv(self._parse_conjunction)
3262            expression = seq_get(args, 0)
3263
3264        index = self._index
3265        if not self._match(TokenType.R_PAREN):
3266            # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]])
3267            order = self._parse_order(this=expression)
3268            return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3269
3270        # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]).
3271        # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that
3272        # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them.
3273        if not self._match(TokenType.WITHIN_GROUP):
3274            self._retreat(index)
3275            this = exp.GroupConcat.from_arg_list(args)
3276            self.validate_expression(this, args)
3277            return this
3278
3279        self._match_l_paren()  # The corresponding match_r_paren will be called in parse_function (caller)
3280        order = self._parse_order(this=expression)
3281        return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3282
3283    def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]:
3284        to: t.Optional[exp.Expression]
3285        this = self._parse_bitwise()
3286
3287        if self._match(TokenType.USING):
3288            to = self.expression(exp.CharacterSet, this=self._parse_var())
3289        elif self._match(TokenType.COMMA):
3290            to = self._parse_bitwise()
3291        else:
3292            to = None
3293
3294        # Swap the argument order if needed to produce the correct AST
3295        if self.CONVERT_TYPE_FIRST:
3296            this, to = to, this
3297
3298        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3299
3300    def _parse_position(self, haystack_first: bool = False) -> exp.Expression:
3301        args = self._parse_csv(self._parse_bitwise)
3302
3303        if self._match(TokenType.IN):
3304            return self.expression(
3305                exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0)
3306            )
3307
3308        if haystack_first:
3309            haystack = seq_get(args, 0)
3310            needle = seq_get(args, 1)
3311        else:
3312            needle = seq_get(args, 0)
3313            haystack = seq_get(args, 1)
3314
3315        this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2))
3316
3317        self.validate_expression(this, args)
3318
3319        return this
3320
3321    def _parse_join_hint(self, func_name: str) -> exp.Expression:
3322        args = self._parse_csv(self._parse_table)
3323        return exp.JoinHint(this=func_name.upper(), expressions=args)
3324
3325    def _parse_substring(self) -> exp.Expression:
3326        # Postgres supports the form: substring(string [from int] [for int])
3327        # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
3328
3329        args = self._parse_csv(self._parse_bitwise)
3330
3331        if self._match(TokenType.FROM):
3332            args.append(self._parse_bitwise())
3333            if self._match(TokenType.FOR):
3334                args.append(self._parse_bitwise())
3335
3336        this = exp.Substring.from_arg_list(args)
3337        self.validate_expression(this, args)
3338
3339        return this
3340
3341    def _parse_trim(self) -> exp.Expression:
3342        # https://www.w3resource.com/sql/character-functions/trim.php
3343        # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html
3344
3345        position = None
3346        collation = None
3347
3348        if self._match_set(self.TRIM_TYPES):
3349            position = self._prev.text.upper()
3350
3351        expression = self._parse_term()
3352        if self._match_set((TokenType.FROM, TokenType.COMMA)):
3353            this = self._parse_term()
3354        else:
3355            this = expression
3356            expression = None
3357
3358        if self._match(TokenType.COLLATE):
3359            collation = self._parse_term()
3360
3361        return self.expression(
3362            exp.Trim,
3363            this=this,
3364            position=position,
3365            expression=expression,
3366            collation=collation,
3367        )
3368
3369    def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3370        return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window)
3371
3372    def _parse_named_window(self) -> t.Optional[exp.Expression]:
3373        return self._parse_window(self._parse_id_var(), alias=True)
3374
3375    def _parse_window(
3376        self, this: t.Optional[exp.Expression], alias: bool = False
3377    ) -> t.Optional[exp.Expression]:
3378        if self._match(TokenType.FILTER):
3379            where = self._parse_wrapped(self._parse_where)
3380            this = self.expression(exp.Filter, this=this, expression=where)
3381
3382        # T-SQL allows the OVER (...) syntax after WITHIN GROUP.
3383        # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
3384        if self._match(TokenType.WITHIN_GROUP):
3385            order = self._parse_wrapped(self._parse_order)
3386            this = self.expression(exp.WithinGroup, this=this, expression=order)
3387
3388        # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
3389        # Some dialects choose to implement and some do not.
3390        # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
3391
3392        # There is some code above in _parse_lambda that handles
3393        #   SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ...
3394
3395        # The below changes handle
3396        #   SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ...
3397
3398        # Oracle allows both formats
3399        #   (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html)
3400        #   and Snowflake chose to do the same for familiarity
3401        #   https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes
3402        if self._match(TokenType.IGNORE_NULLS):
3403            this = self.expression(exp.IgnoreNulls, this=this)
3404        elif self._match(TokenType.RESPECT_NULLS):
3405            this = self.expression(exp.RespectNulls, this=this)
3406
3407        # bigquery select from window x AS (partition by ...)
3408        if alias:
3409            self._match(TokenType.ALIAS)
3410        elif not self._match(TokenType.OVER):
3411            return this
3412
3413        if not self._match(TokenType.L_PAREN):
3414            return self.expression(exp.Window, this=this, alias=self._parse_id_var(False))
3415
3416        window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
3417        partition = self._parse_partition_by()
3418        order = self._parse_order()
3419        kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text
3420
3421        if kind:
3422            self._match(TokenType.BETWEEN)
3423            start = self._parse_window_spec()
3424            self._match(TokenType.AND)
3425            end = self._parse_window_spec()
3426
3427            spec = self.expression(
3428                exp.WindowSpec,
3429                kind=kind,
3430                start=start["value"],
3431                start_side=start["side"],
3432                end=end["value"],
3433                end_side=end["side"],
3434            )
3435        else:
3436            spec = None
3437
3438        self._match_r_paren()
3439
3440        return self.expression(
3441            exp.Window,
3442            this=this,
3443            partition_by=partition,
3444            order=order,
3445            spec=spec,
3446            alias=window_alias,
3447        )
3448
3449    def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]:
3450        self._match(TokenType.BETWEEN)
3451
3452        return {
3453            "value": (
3454                self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text
3455            )
3456            or self._parse_bitwise(),
3457            "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text,
3458        }
3459
3460    def _parse_alias(
3461        self, this: t.Optional[exp.Expression], explicit: bool = False
3462    ) -> t.Optional[exp.Expression]:
3463        any_token = self._match(TokenType.ALIAS)
3464
3465        if explicit and not any_token:
3466            return this
3467
3468        if self._match(TokenType.L_PAREN):
3469            aliases = self.expression(
3470                exp.Aliases,
3471                this=this,
3472                expressions=self._parse_csv(lambda: self._parse_id_var(any_token)),
3473            )
3474            self._match_r_paren(aliases)
3475            return aliases
3476
3477        alias = self._parse_id_var(any_token)
3478
3479        if alias:
3480            return self.expression(exp.Alias, this=this, alias=alias)
3481
3482        return this
3483
3484    def _parse_id_var(
3485        self,
3486        any_token: bool = True,
3487        tokens: t.Optional[t.Collection[TokenType]] = None,
3488        prefix_tokens: t.Optional[t.Collection[TokenType]] = None,
3489    ) -> t.Optional[exp.Expression]:
3490        identifier = self._parse_identifier()
3491
3492        if identifier:
3493            return identifier
3494
3495        prefix = ""
3496
3497        if prefix_tokens:
3498            while self._match_set(prefix_tokens):
3499                prefix += self._prev.text
3500
3501        if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS):
3502            quoted = self._prev.token_type == TokenType.STRING
3503            return exp.Identifier(this=prefix + self._prev.text, quoted=quoted)
3504
3505        return None
3506
3507    def _parse_string(self) -> t.Optional[exp.Expression]:
3508        if self._match(TokenType.STRING):
3509            return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev)
3510        return self._parse_placeholder()
3511
3512    def _parse_number(self) -> t.Optional[exp.Expression]:
3513        if self._match(TokenType.NUMBER):
3514            return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev)
3515        return self._parse_placeholder()
3516
3517    def _parse_identifier(self) -> t.Optional[exp.Expression]:
3518        if self._match(TokenType.IDENTIFIER):
3519            return self.expression(exp.Identifier, this=self._prev.text, quoted=True)
3520        return self._parse_placeholder()
3521
3522    def _parse_var(
3523        self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None
3524    ) -> t.Optional[exp.Expression]:
3525        if (
3526            (any_token and self._advance_any())
3527            or self._match(TokenType.VAR)
3528            or (self._match_set(tokens) if tokens else False)
3529        ):
3530            return self.expression(exp.Var, this=self._prev.text)
3531        return self._parse_placeholder()
3532
3533    def _advance_any(self) -> t.Optional[Token]:
3534        if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS:
3535            self._advance()
3536            return self._prev
3537        return None
3538
3539    def _parse_var_or_string(self) -> t.Optional[exp.Expression]:
3540        return self._parse_var() or self._parse_string()
3541
3542    def _parse_null(self) -> t.Optional[exp.Expression]:
3543        if self._match(TokenType.NULL):
3544            return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev)
3545        return None
3546
3547    def _parse_boolean(self) -> t.Optional[exp.Expression]:
3548        if self._match(TokenType.TRUE):
3549            return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev)
3550        if self._match(TokenType.FALSE):
3551            return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev)
3552        return None
3553
3554    def _parse_star(self) -> t.Optional[exp.Expression]:
3555        if self._match(TokenType.STAR):
3556            return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev)
3557        return None
3558
3559    def _parse_parameter(self) -> exp.Expression:
3560        wrapped = self._match(TokenType.L_BRACE)
3561        this = self._parse_var() or self._parse_primary()
3562        self._match(TokenType.R_BRACE)
3563        return self.expression(exp.Parameter, this=this, wrapped=wrapped)
3564
3565    def _parse_placeholder(self) -> t.Optional[exp.Expression]:
3566        if self._match_set(self.PLACEHOLDER_PARSERS):
3567            placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self)
3568            if placeholder:
3569                return placeholder
3570            self._advance(-1)
3571        return None
3572
3573    def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3574        if not self._match(TokenType.EXCEPT):
3575            return None
3576        if self._match(TokenType.L_PAREN, advance=False):
3577            return self._parse_wrapped_csv(self._parse_column)
3578        return self._parse_csv(self._parse_column)
3579
3580    def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3581        if not self._match(TokenType.REPLACE):
3582            return None
3583        if self._match(TokenType.L_PAREN, advance=False):
3584            return self._parse_wrapped_csv(self._parse_expression)
3585        return self._parse_csv(self._parse_expression)
3586
3587    def _parse_csv(
3588        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
3589    ) -> t.List[t.Optional[exp.Expression]]:
3590        parse_result = parse_method()
3591        items = [parse_result] if parse_result is not None else []
3592
3593        while self._match(sep):
3594            if parse_result and self._prev_comments:
3595                parse_result.comments = self._prev_comments
3596
3597            parse_result = parse_method()
3598            if parse_result is not None:
3599                items.append(parse_result)
3600
3601        return items
3602
3603    def _parse_tokens(
3604        self, parse_method: t.Callable, expressions: t.Dict
3605    ) -> t.Optional[exp.Expression]:
3606        this = parse_method()
3607
3608        while self._match_set(expressions):
3609            this = self.expression(
3610                expressions[self._prev.token_type],
3611                this=this,
3612                comments=self._prev_comments,
3613                expression=parse_method(),
3614            )
3615
3616        return this
3617
3618    def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]:
3619        return self._parse_wrapped_csv(self._parse_id_var)
3620
3621    def _parse_wrapped_csv(
3622        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
3623    ) -> t.List[t.Optional[exp.Expression]]:
3624        return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep))
3625
3626    def _parse_wrapped(self, parse_method: t.Callable) -> t.Any:
3627        self._match_l_paren()
3628        parse_result = parse_method()
3629        self._match_r_paren()
3630        return parse_result
3631
3632    def _parse_select_or_expression(self) -> t.Optional[exp.Expression]:
3633        return self._parse_select() or self._parse_expression()
3634
3635    def _parse_ddl_select(self) -> t.Optional[exp.Expression]:
3636        return self._parse_set_operations(
3637            self._parse_select(nested=True, parse_subquery_alias=False)
3638        )
3639
3640    def _parse_transaction(self) -> exp.Expression:
3641        this = None
3642        if self._match_texts(self.TRANSACTION_KIND):
3643            this = self._prev.text
3644
3645        self._match_texts({"TRANSACTION", "WORK"})
3646
3647        modes = []
3648        while True:
3649            mode = []
3650            while self._match(TokenType.VAR):
3651                mode.append(self._prev.text)
3652
3653            if mode:
3654                modes.append(" ".join(mode))
3655            if not self._match(TokenType.COMMA):
3656                break
3657
3658        return self.expression(exp.Transaction, this=this, modes=modes)
3659
3660    def _parse_commit_or_rollback(self) -> exp.Expression:
3661        chain = None
3662        savepoint = None
3663        is_rollback = self._prev.token_type == TokenType.ROLLBACK
3664
3665        self._match_texts({"TRANSACTION", "WORK"})
3666
3667        if self._match_text_seq("TO"):
3668            self._match_text_seq("SAVEPOINT")
3669            savepoint = self._parse_id_var()
3670
3671        if self._match(TokenType.AND):
3672            chain = not self._match_text_seq("NO")
3673            self._match_text_seq("CHAIN")
3674
3675        if is_rollback:
3676            return self.expression(exp.Rollback, savepoint=savepoint)
3677        return self.expression(exp.Commit, chain=chain)
3678
3679    def _parse_add_column(self) -> t.Optional[exp.Expression]:
3680        if not self._match_text_seq("ADD"):
3681            return None
3682
3683        self._match(TokenType.COLUMN)
3684        exists_column = self._parse_exists(not_=True)
3685        expression = self._parse_column_def(self._parse_field(any_token=True))
3686
3687        if expression:
3688            expression.set("exists", exists_column)
3689
3690        return expression
3691
3692    def _parse_drop_column(self) -> t.Optional[exp.Expression]:
3693        return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN")
3694
3695    # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html
3696    def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression:
3697        return self.expression(
3698            exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists
3699        )
3700
3701    def _parse_add_constraint(self) -> t.Optional[exp.Expression]:
3702        this = None
3703        kind = self._prev.token_type
3704
3705        if kind == TokenType.CONSTRAINT:
3706            this = self._parse_id_var()
3707
3708            if self._match_text_seq("CHECK"):
3709                expression = self._parse_wrapped(self._parse_conjunction)
3710                enforced = self._match_text_seq("ENFORCED")
3711
3712                return self.expression(
3713                    exp.AddConstraint, this=this, expression=expression, enforced=enforced
3714                )
3715
3716        if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY):
3717            expression = self._parse_foreign_key()
3718        elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY):
3719            expression = self._parse_primary_key()
3720
3721        return self.expression(exp.AddConstraint, this=this, expression=expression)
3722
3723    def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]:
3724        index = self._index - 1
3725
3726        if self._match_set(self.ADD_CONSTRAINT_TOKENS):
3727            return self._parse_csv(self._parse_add_constraint)
3728
3729        self._retreat(index)
3730        return self._parse_csv(self._parse_add_column)
3731
3732    def _parse_alter_table_alter(self) -> exp.Expression:
3733        self._match(TokenType.COLUMN)
3734        column = self._parse_field(any_token=True)
3735
3736        if self._match_pair(TokenType.DROP, TokenType.DEFAULT):
3737            return self.expression(exp.AlterColumn, this=column, drop=True)
3738        if self._match_pair(TokenType.SET, TokenType.DEFAULT):
3739            return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction())
3740
3741        self._match_text_seq("SET", "DATA")
3742        return self.expression(
3743            exp.AlterColumn,
3744            this=column,
3745            dtype=self._match_text_seq("TYPE") and self._parse_types(),
3746            collate=self._match(TokenType.COLLATE) and self._parse_term(),
3747            using=self._match(TokenType.USING) and self._parse_conjunction(),
3748        )
3749
3750    def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]:
3751        index = self._index - 1
3752
3753        partition_exists = self._parse_exists()
3754        if self._match(TokenType.PARTITION, advance=False):
3755            return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists))
3756
3757        self._retreat(index)
3758        return self._parse_csv(self._parse_drop_column)
3759
3760    def _parse_alter_table_rename(self) -> exp.Expression:
3761        self._match_text_seq("TO")
3762        return self.expression(exp.RenameTable, this=self._parse_table(schema=True))
3763
3764    def _parse_alter(self) -> t.Optional[exp.Expression]:
3765        start = self._prev
3766
3767        if not self._match(TokenType.TABLE):
3768            return self._parse_as_command(start)
3769
3770        exists = self._parse_exists()
3771        this = self._parse_table(schema=True)
3772
3773        if self._next:
3774            self._advance()
3775        parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None
3776
3777        if parser:
3778            return self.expression(
3779                exp.AlterTable,
3780                this=this,
3781                exists=exists,
3782                actions=ensure_list(parser(self)),
3783            )
3784        return self._parse_as_command(start)
3785
3786    def _parse_show(self) -> t.Optional[exp.Expression]:
3787        parser = self._find_parser(self.SHOW_PARSERS, self._show_trie)  # type: ignore
3788        if parser:
3789            return parser(self)
3790        self._advance()
3791        return self.expression(exp.Show, this=self._prev.text.upper())
3792
3793    def _default_parse_set_item(self) -> exp.Expression:
3794        return self.expression(
3795            exp.SetItem,
3796            this=self._parse_statement(),
3797        )
3798
3799    def _parse_set_item(self) -> t.Optional[exp.Expression]:
3800        parser = self._find_parser(self.SET_PARSERS, self._set_trie)  # type: ignore
3801        return parser(self) if parser else self._default_parse_set_item()
3802
3803    def _parse_merge(self) -> exp.Expression:
3804        self._match(TokenType.INTO)
3805        target = self._parse_table()
3806
3807        self._match(TokenType.USING)
3808        using = self._parse_table()
3809
3810        self._match(TokenType.ON)
3811        on = self._parse_conjunction()
3812
3813        whens = []
3814        while self._match(TokenType.WHEN):
3815            matched = not self._match(TokenType.NOT)
3816            self._match_text_seq("MATCHED")
3817            source = (
3818                False
3819                if self._match_text_seq("BY", "TARGET")
3820                else self._match_text_seq("BY", "SOURCE")
3821            )
3822            condition = self._parse_conjunction() if self._match(TokenType.AND) else None
3823
3824            self._match(TokenType.THEN)
3825
3826            if self._match(TokenType.INSERT):
3827                _this = self._parse_star()
3828                if _this:
3829                    then = self.expression(exp.Insert, this=_this)
3830                else:
3831                    then = self.expression(
3832                        exp.Insert,
3833                        this=self._parse_value(),
3834                        expression=self._match(TokenType.VALUES) and self._parse_value(),
3835                    )
3836            elif self._match(TokenType.UPDATE):
3837                expressions = self._parse_star()
3838                if expressions:
3839                    then = self.expression(exp.Update, expressions=expressions)
3840                else:
3841                    then = self.expression(
3842                        exp.Update,
3843                        expressions=self._match(TokenType.SET)
3844                        and self._parse_csv(self._parse_equality),
3845                    )
3846            elif self._match(TokenType.DELETE):
3847                then = self.expression(exp.Var, this=self._prev.text)
3848            else:
3849                then = None
3850
3851            whens.append(
3852                self.expression(
3853                    exp.When,
3854                    matched=matched,
3855                    source=source,
3856                    condition=condition,
3857                    then=then,
3858                )
3859            )
3860
3861        return self.expression(
3862            exp.Merge,
3863            this=target,
3864            using=using,
3865            on=on,
3866            expressions=whens,
3867        )
3868
3869    def _parse_set(self) -> exp.Expression:
3870        return self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item))
3871
3872    def _parse_as_command(self, start: Token) -> exp.Command:
3873        while self._curr:
3874            self._advance()
3875        text = self._find_sql(start, self._prev)
3876        size = len(start.text)
3877        return exp.Command(this=text[:size], expression=text[size:])
3878
3879    def _find_parser(
3880        self, parsers: t.Dict[str, t.Callable], trie: t.Dict
3881    ) -> t.Optional[t.Callable]:
3882        index = self._index
3883        this = []
3884        while True:
3885            # The current token might be multiple words
3886            curr = self._curr.text.upper()
3887            key = curr.split(" ")
3888            this.append(curr)
3889            self._advance()
3890            result, trie = in_trie(trie, key)
3891            if result == 0:
3892                break
3893            if result == 2:
3894                subparser = parsers[" ".join(this)]
3895                return subparser
3896        self._retreat(index)
3897        return None
3898
3899    def _match(self, token_type, advance=True):
3900        if not self._curr:
3901            return None
3902
3903        if self._curr.token_type == token_type:
3904            if advance:
3905                self._advance()
3906            return True
3907
3908        return None
3909
3910    def _match_set(self, types, advance=True):
3911        if not self._curr:
3912            return None
3913
3914        if self._curr.token_type in types:
3915            if advance:
3916                self._advance()
3917            return True
3918
3919        return None
3920
3921    def _match_pair(self, token_type_a, token_type_b, advance=True):
3922        if not self._curr or not self._next:
3923            return None
3924
3925        if self._curr.token_type == token_type_a and self._next.token_type == token_type_b:
3926            if advance:
3927                self._advance(2)
3928            return True
3929
3930        return None
3931
3932    def _match_l_paren(self, expression=None):
3933        if not self._match(TokenType.L_PAREN):
3934            self.raise_error("Expecting (")
3935        if expression and self._prev_comments:
3936            expression.comments = self._prev_comments
3937
3938    def _match_r_paren(self, expression=None):
3939        if not self._match(TokenType.R_PAREN):
3940            self.raise_error("Expecting )")
3941        if expression and self._prev_comments:
3942            expression.comments = self._prev_comments
3943
3944    def _match_texts(self, texts, advance=True):
3945        if self._curr and self._curr.text.upper() in texts:
3946            if advance:
3947                self._advance()
3948            return True
3949        return False
3950
3951    def _match_text_seq(self, *texts, advance=True):
3952        index = self._index
3953        for text in texts:
3954            if self._curr and self._curr.text.upper() == text:
3955                self._advance()
3956            else:
3957                self._retreat(index)
3958                return False
3959
3960        if not advance:
3961            self._retreat(index)
3962
3963        return True
3964
3965    def _replace_columns_with_dots(self, this):
3966        if isinstance(this, exp.Dot):
3967            exp.replace_children(this, self._replace_columns_with_dots)
3968        elif isinstance(this, exp.Column):
3969            exp.replace_children(this, self._replace_columns_with_dots)
3970            table = this.args.get("table")
3971            this = (
3972                self.expression(exp.Dot, this=table, expression=this.this)
3973                if table
3974                else self.expression(exp.Var, this=this.name)
3975            )
3976        elif isinstance(this, exp.Identifier):
3977            this = self.expression(exp.Var, this=this.name)
3978        return this
3979
3980    def _replace_lambda(self, node, lambda_variables):
3981        if isinstance(node, exp.Column):
3982            if node.name in lambda_variables:
3983                return node.this
3984        return node
def parse_var_map(args):
23def parse_var_map(args):
24    keys = []
25    values = []
26    for i in range(0, len(args), 2):
27        keys.append(args[i])
28        values.append(args[i + 1])
29    return exp.VarMap(
30        keys=exp.Array(expressions=keys),
31        values=exp.Array(expressions=values),
32    )
def binary_range_parser( expr_type: Type[sqlglot.expressions.Expression]) -> Callable[[sqlglot.parser.Parser, Optional[sqlglot.expressions.Expression]], Optional[sqlglot.expressions.Expression]]:
35def binary_range_parser(
36    expr_type: t.Type[exp.Expression],
37) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]:
38    return lambda self, this: self._parse_escape(
39        self.expression(expr_type, this=this, expression=self._parse_bitwise())
40    )
class Parser:
  55class Parser(metaclass=_Parser):
  56    """
  57    Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces
  58    a parsed syntax tree.
  59
  60    Args:
  61        error_level: the desired error level.
  62            Default: ErrorLevel.RAISE
  63        error_message_context: determines the amount of context to capture from a
  64            query string when displaying the error message (in number of characters).
  65            Default: 50.
  66        index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list.
  67            Default: 0
  68        alias_post_tablesample: If the table alias comes after tablesample.
  69            Default: False
  70        max_errors: Maximum number of error messages to include in a raised ParseError.
  71            This is only relevant if error_level is ErrorLevel.RAISE.
  72            Default: 3
  73        null_ordering: Indicates the default null ordering method to use if not explicitly set.
  74            Options are "nulls_are_small", "nulls_are_large", "nulls_are_last".
  75            Default: "nulls_are_small"
  76    """
  77
  78    FUNCTIONS: t.Dict[str, t.Callable] = {
  79        **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()},
  80        "DATE_TO_DATE_STR": lambda args: exp.Cast(
  81            this=seq_get(args, 0),
  82            to=exp.DataType(this=exp.DataType.Type.TEXT),
  83        ),
  84        "TIME_TO_TIME_STR": lambda args: exp.Cast(
  85            this=seq_get(args, 0),
  86            to=exp.DataType(this=exp.DataType.Type.TEXT),
  87        ),
  88        "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
  89            this=exp.Cast(
  90                this=seq_get(args, 0),
  91                to=exp.DataType(this=exp.DataType.Type.TEXT),
  92            ),
  93            start=exp.Literal.number(1),
  94            length=exp.Literal.number(10),
  95        ),
  96        "VAR_MAP": parse_var_map,
  97        "IFNULL": exp.Coalesce.from_arg_list,
  98    }
  99
 100    NO_PAREN_FUNCTIONS = {
 101        TokenType.CURRENT_DATE: exp.CurrentDate,
 102        TokenType.CURRENT_DATETIME: exp.CurrentDate,
 103        TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
 104    }
 105
 106    NESTED_TYPE_TOKENS = {
 107        TokenType.ARRAY,
 108        TokenType.MAP,
 109        TokenType.STRUCT,
 110        TokenType.NULLABLE,
 111    }
 112
 113    TYPE_TOKENS = {
 114        TokenType.BIT,
 115        TokenType.BOOLEAN,
 116        TokenType.TINYINT,
 117        TokenType.SMALLINT,
 118        TokenType.INT,
 119        TokenType.BIGINT,
 120        TokenType.FLOAT,
 121        TokenType.DOUBLE,
 122        TokenType.CHAR,
 123        TokenType.NCHAR,
 124        TokenType.VARCHAR,
 125        TokenType.NVARCHAR,
 126        TokenType.TEXT,
 127        TokenType.MEDIUMTEXT,
 128        TokenType.LONGTEXT,
 129        TokenType.MEDIUMBLOB,
 130        TokenType.LONGBLOB,
 131        TokenType.BINARY,
 132        TokenType.VARBINARY,
 133        TokenType.JSON,
 134        TokenType.JSONB,
 135        TokenType.INTERVAL,
 136        TokenType.TIME,
 137        TokenType.TIMESTAMP,
 138        TokenType.TIMESTAMPTZ,
 139        TokenType.TIMESTAMPLTZ,
 140        TokenType.DATETIME,
 141        TokenType.DATE,
 142        TokenType.DECIMAL,
 143        TokenType.UUID,
 144        TokenType.GEOGRAPHY,
 145        TokenType.GEOMETRY,
 146        TokenType.HLLSKETCH,
 147        TokenType.HSTORE,
 148        TokenType.PSEUDO_TYPE,
 149        TokenType.SUPER,
 150        TokenType.SERIAL,
 151        TokenType.SMALLSERIAL,
 152        TokenType.BIGSERIAL,
 153        TokenType.XML,
 154        TokenType.UNIQUEIDENTIFIER,
 155        TokenType.MONEY,
 156        TokenType.SMALLMONEY,
 157        TokenType.ROWVERSION,
 158        TokenType.IMAGE,
 159        TokenType.VARIANT,
 160        TokenType.OBJECT,
 161        TokenType.INET,
 162        *NESTED_TYPE_TOKENS,
 163    }
 164
 165    SUBQUERY_PREDICATES = {
 166        TokenType.ANY: exp.Any,
 167        TokenType.ALL: exp.All,
 168        TokenType.EXISTS: exp.Exists,
 169        TokenType.SOME: exp.Any,
 170    }
 171
 172    RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT}
 173
 174    DB_CREATABLES = {
 175        TokenType.DATABASE,
 176        TokenType.SCHEMA,
 177        TokenType.TABLE,
 178        TokenType.VIEW,
 179    }
 180
 181    CREATABLES = {
 182        TokenType.COLUMN,
 183        TokenType.FUNCTION,
 184        TokenType.INDEX,
 185        TokenType.PROCEDURE,
 186        *DB_CREATABLES,
 187    }
 188
 189    ID_VAR_TOKENS = {
 190        TokenType.VAR,
 191        TokenType.ANTI,
 192        TokenType.APPLY,
 193        TokenType.AUTO_INCREMENT,
 194        TokenType.BEGIN,
 195        TokenType.BOTH,
 196        TokenType.BUCKET,
 197        TokenType.CACHE,
 198        TokenType.CASCADE,
 199        TokenType.COLLATE,
 200        TokenType.COMMAND,
 201        TokenType.COMMENT,
 202        TokenType.COMMIT,
 203        TokenType.COMPOUND,
 204        TokenType.CONSTRAINT,
 205        TokenType.CURRENT_TIME,
 206        TokenType.DEFAULT,
 207        TokenType.DELETE,
 208        TokenType.DESCRIBE,
 209        TokenType.DIV,
 210        TokenType.END,
 211        TokenType.EXECUTE,
 212        TokenType.ESCAPE,
 213        TokenType.FALSE,
 214        TokenType.FIRST,
 215        TokenType.FILTER,
 216        TokenType.FOLLOWING,
 217        TokenType.FORMAT,
 218        TokenType.IF,
 219        TokenType.ISNULL,
 220        TokenType.INTERVAL,
 221        TokenType.LAZY,
 222        TokenType.LEADING,
 223        TokenType.LEFT,
 224        TokenType.LOCAL,
 225        TokenType.MATERIALIZED,
 226        TokenType.MERGE,
 227        TokenType.NATURAL,
 228        TokenType.NEXT,
 229        TokenType.OFFSET,
 230        TokenType.ONLY,
 231        TokenType.OPTIONS,
 232        TokenType.ORDINALITY,
 233        TokenType.PERCENT,
 234        TokenType.PIVOT,
 235        TokenType.PRECEDING,
 236        TokenType.RANGE,
 237        TokenType.REFERENCES,
 238        TokenType.RIGHT,
 239        TokenType.ROW,
 240        TokenType.ROWS,
 241        TokenType.SEED,
 242        TokenType.SEMI,
 243        TokenType.SET,
 244        TokenType.SHOW,
 245        TokenType.SORTKEY,
 246        TokenType.TEMPORARY,
 247        TokenType.TOP,
 248        TokenType.TRAILING,
 249        TokenType.TRUE,
 250        TokenType.UNBOUNDED,
 251        TokenType.UNIQUE,
 252        TokenType.UNLOGGED,
 253        TokenType.UNPIVOT,
 254        TokenType.VOLATILE,
 255        TokenType.WINDOW,
 256        *CREATABLES,
 257        *SUBQUERY_PREDICATES,
 258        *TYPE_TOKENS,
 259        *NO_PAREN_FUNCTIONS,
 260    }
 261
 262    TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {
 263        TokenType.APPLY,
 264        TokenType.LEFT,
 265        TokenType.NATURAL,
 266        TokenType.OFFSET,
 267        TokenType.RIGHT,
 268        TokenType.WINDOW,
 269    }
 270
 271    UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
 272
 273    TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH}
 274
 275    FUNC_TOKENS = {
 276        TokenType.COMMAND,
 277        TokenType.CURRENT_DATE,
 278        TokenType.CURRENT_DATETIME,
 279        TokenType.CURRENT_TIMESTAMP,
 280        TokenType.CURRENT_TIME,
 281        TokenType.FILTER,
 282        TokenType.FIRST,
 283        TokenType.FORMAT,
 284        TokenType.IDENTIFIER,
 285        TokenType.INDEX,
 286        TokenType.ISNULL,
 287        TokenType.ILIKE,
 288        TokenType.LIKE,
 289        TokenType.MERGE,
 290        TokenType.OFFSET,
 291        TokenType.PRIMARY_KEY,
 292        TokenType.REPLACE,
 293        TokenType.ROW,
 294        TokenType.UNNEST,
 295        TokenType.VAR,
 296        TokenType.LEFT,
 297        TokenType.RIGHT,
 298        TokenType.DATE,
 299        TokenType.DATETIME,
 300        TokenType.TABLE,
 301        TokenType.TIMESTAMP,
 302        TokenType.TIMESTAMPTZ,
 303        TokenType.WINDOW,
 304        *TYPE_TOKENS,
 305        *SUBQUERY_PREDICATES,
 306    }
 307
 308    CONJUNCTION = {
 309        TokenType.AND: exp.And,
 310        TokenType.OR: exp.Or,
 311    }
 312
 313    EQUALITY = {
 314        TokenType.EQ: exp.EQ,
 315        TokenType.NEQ: exp.NEQ,
 316        TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
 317    }
 318
 319    COMPARISON = {
 320        TokenType.GT: exp.GT,
 321        TokenType.GTE: exp.GTE,
 322        TokenType.LT: exp.LT,
 323        TokenType.LTE: exp.LTE,
 324    }
 325
 326    BITWISE = {
 327        TokenType.AMP: exp.BitwiseAnd,
 328        TokenType.CARET: exp.BitwiseXor,
 329        TokenType.PIPE: exp.BitwiseOr,
 330        TokenType.DPIPE: exp.DPipe,
 331    }
 332
 333    TERM = {
 334        TokenType.DASH: exp.Sub,
 335        TokenType.PLUS: exp.Add,
 336        TokenType.MOD: exp.Mod,
 337        TokenType.COLLATE: exp.Collate,
 338    }
 339
 340    FACTOR = {
 341        TokenType.DIV: exp.IntDiv,
 342        TokenType.LR_ARROW: exp.Distance,
 343        TokenType.SLASH: exp.Div,
 344        TokenType.STAR: exp.Mul,
 345    }
 346
 347    TIMESTAMPS = {
 348        TokenType.TIME,
 349        TokenType.TIMESTAMP,
 350        TokenType.TIMESTAMPTZ,
 351        TokenType.TIMESTAMPLTZ,
 352    }
 353
 354    SET_OPERATIONS = {
 355        TokenType.UNION,
 356        TokenType.INTERSECT,
 357        TokenType.EXCEPT,
 358    }
 359
 360    JOIN_SIDES = {
 361        TokenType.LEFT,
 362        TokenType.RIGHT,
 363        TokenType.FULL,
 364    }
 365
 366    JOIN_KINDS = {
 367        TokenType.INNER,
 368        TokenType.OUTER,
 369        TokenType.CROSS,
 370        TokenType.SEMI,
 371        TokenType.ANTI,
 372    }
 373
 374    LAMBDAS = {
 375        TokenType.ARROW: lambda self, expressions: self.expression(
 376            exp.Lambda,
 377            this=self._parse_conjunction().transform(
 378                self._replace_lambda, {node.name for node in expressions}
 379            ),
 380            expressions=expressions,
 381        ),
 382        TokenType.FARROW: lambda self, expressions: self.expression(
 383            exp.Kwarg,
 384            this=exp.Var(this=expressions[0].name),
 385            expression=self._parse_conjunction(),
 386        ),
 387    }
 388
 389    COLUMN_OPERATORS = {
 390        TokenType.DOT: None,
 391        TokenType.DCOLON: lambda self, this, to: self.expression(
 392            exp.Cast,
 393            this=this,
 394            to=to,
 395        ),
 396        TokenType.ARROW: lambda self, this, path: self.expression(
 397            exp.JSONExtract,
 398            this=this,
 399            expression=path,
 400        ),
 401        TokenType.DARROW: lambda self, this, path: self.expression(
 402            exp.JSONExtractScalar,
 403            this=this,
 404            expression=path,
 405        ),
 406        TokenType.HASH_ARROW: lambda self, this, path: self.expression(
 407            exp.JSONBExtract,
 408            this=this,
 409            expression=path,
 410        ),
 411        TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
 412            exp.JSONBExtractScalar,
 413            this=this,
 414            expression=path,
 415        ),
 416        TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
 417            exp.JSONBContains,
 418            this=this,
 419            expression=key,
 420        ),
 421    }
 422
 423    EXPRESSION_PARSERS = {
 424        exp.Column: lambda self: self._parse_column(),
 425        exp.DataType: lambda self: self._parse_types(),
 426        exp.From: lambda self: self._parse_from(),
 427        exp.Group: lambda self: self._parse_group(),
 428        exp.Identifier: lambda self: self._parse_id_var(),
 429        exp.Lateral: lambda self: self._parse_lateral(),
 430        exp.Join: lambda self: self._parse_join(),
 431        exp.Order: lambda self: self._parse_order(),
 432        exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
 433        exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
 434        exp.Lambda: lambda self: self._parse_lambda(),
 435        exp.Limit: lambda self: self._parse_limit(),
 436        exp.Offset: lambda self: self._parse_offset(),
 437        exp.TableAlias: lambda self: self._parse_table_alias(),
 438        exp.Table: lambda self: self._parse_table(),
 439        exp.Condition: lambda self: self._parse_conjunction(),
 440        exp.Expression: lambda self: self._parse_statement(),
 441        exp.Properties: lambda self: self._parse_properties(),
 442        exp.Where: lambda self: self._parse_where(),
 443        exp.Ordered: lambda self: self._parse_ordered(),
 444        exp.Having: lambda self: self._parse_having(),
 445        exp.With: lambda self: self._parse_with(),
 446        exp.Window: lambda self: self._parse_named_window(),
 447        exp.Qualify: lambda self: self._parse_qualify(),
 448        "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(),
 449    }
 450
 451    STATEMENT_PARSERS = {
 452        TokenType.ALTER: lambda self: self._parse_alter(),
 453        TokenType.BEGIN: lambda self: self._parse_transaction(),
 454        TokenType.CACHE: lambda self: self._parse_cache(),
 455        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
 456        TokenType.COMMENT: lambda self: self._parse_comment(),
 457        TokenType.CREATE: lambda self: self._parse_create(),
 458        TokenType.DELETE: lambda self: self._parse_delete(),
 459        TokenType.DESC: lambda self: self._parse_describe(),
 460        TokenType.DESCRIBE: lambda self: self._parse_describe(),
 461        TokenType.DROP: lambda self: self._parse_drop(),
 462        TokenType.END: lambda self: self._parse_commit_or_rollback(),
 463        TokenType.INSERT: lambda self: self._parse_insert(),
 464        TokenType.LOAD_DATA: lambda self: self._parse_load_data(),
 465        TokenType.MERGE: lambda self: self._parse_merge(),
 466        TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
 467        TokenType.UNCACHE: lambda self: self._parse_uncache(),
 468        TokenType.UPDATE: lambda self: self._parse_update(),
 469        TokenType.USE: lambda self: self.expression(
 470            exp.Use,
 471            kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"))
 472            and exp.Var(this=self._prev.text),
 473            this=self._parse_table(schema=False),
 474        ),
 475    }
 476
 477    UNARY_PARSERS = {
 478        TokenType.PLUS: lambda self: self._parse_unary(),  # Unary + is handled as a no-op
 479        TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()),
 480        TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()),
 481        TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()),
 482    }
 483
 484    PRIMARY_PARSERS = {
 485        TokenType.STRING: lambda self, token: self.expression(
 486            exp.Literal, this=token.text, is_string=True
 487        ),
 488        TokenType.NUMBER: lambda self, token: self.expression(
 489            exp.Literal, this=token.text, is_string=False
 490        ),
 491        TokenType.STAR: lambda self, _: self.expression(
 492            exp.Star,
 493            **{"except": self._parse_except(), "replace": self._parse_replace()},
 494        ),
 495        TokenType.NULL: lambda self, _: self.expression(exp.Null),
 496        TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True),
 497        TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False),
 498        TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text),
 499        TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text),
 500        TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text),
 501        TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
 502        TokenType.NATIONAL: lambda self, token: self._parse_national(token),
 503        TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
 504    }
 505
 506    PLACEHOLDER_PARSERS = {
 507        TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
 508        TokenType.PARAMETER: lambda self: self._parse_parameter(),
 509        TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text)
 510        if self._match_set((TokenType.NUMBER, TokenType.VAR))
 511        else None,
 512    }
 513
 514    RANGE_PARSERS = {
 515        TokenType.BETWEEN: lambda self, this: self._parse_between(this),
 516        TokenType.GLOB: binary_range_parser(exp.Glob),
 517        TokenType.OVERLAPS: binary_range_parser(exp.Overlaps),
 518        TokenType.IN: lambda self, this: self._parse_in(this),
 519        TokenType.IS: lambda self, this: self._parse_is(this),
 520        TokenType.LIKE: binary_range_parser(exp.Like),
 521        TokenType.ILIKE: binary_range_parser(exp.ILike),
 522        TokenType.IRLIKE: binary_range_parser(exp.RegexpILike),
 523        TokenType.RLIKE: binary_range_parser(exp.RegexpLike),
 524        TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo),
 525    }
 526
 527    PROPERTY_PARSERS = {
 528        "AFTER": lambda self: self._parse_afterjournal(
 529            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 530        ),
 531        "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
 532        "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
 533        "BEFORE": lambda self: self._parse_journal(
 534            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 535        ),
 536        "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
 537        "CHARACTER SET": lambda self: self._parse_character_set(),
 538        "CHECKSUM": lambda self: self._parse_checksum(),
 539        "CLUSTER BY": lambda self: self.expression(
 540            exp.Cluster, expressions=self._parse_csv(self._parse_ordered)
 541        ),
 542        "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty),
 543        "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
 544        "DATABLOCKSIZE": lambda self: self._parse_datablocksize(
 545            default=self._prev.text.upper() == "DEFAULT"
 546        ),
 547        "DEFINER": lambda self: self._parse_definer(),
 548        "DETERMINISTIC": lambda self: self.expression(
 549            exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE")
 550        ),
 551        "DISTKEY": lambda self: self._parse_distkey(),
 552        "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
 553        "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
 554        "EXTERNAL": lambda self: self.expression(exp.ExternalProperty),
 555        "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"),
 556        "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 557        "FREESPACE": lambda self: self._parse_freespace(),
 558        "GLOBAL": lambda self: self._parse_temporary(global_=True),
 559        "IMMUTABLE": lambda self: self.expression(
 560            exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE")
 561        ),
 562        "JOURNAL": lambda self: self._parse_journal(
 563            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 564        ),
 565        "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
 566        "LIKE": lambda self: self._parse_create_like(),
 567        "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True),
 568        "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
 569        "LOCK": lambda self: self._parse_locking(),
 570        "LOCKING": lambda self: self._parse_locking(),
 571        "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"),
 572        "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty),
 573        "MAX": lambda self: self._parse_datablocksize(),
 574        "MAXIMUM": lambda self: self._parse_datablocksize(),
 575        "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio(
 576            no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT"
 577        ),
 578        "MIN": lambda self: self._parse_datablocksize(),
 579        "MINIMUM": lambda self: self._parse_datablocksize(),
 580        "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True),
 581        "NO": lambda self: self._parse_noprimaryindex(),
 582        "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False),
 583        "ON": lambda self: self._parse_oncommit(),
 584        "PARTITION BY": lambda self: self._parse_partitioned_by(),
 585        "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
 586        "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
 587        "RETURNS": lambda self: self._parse_returns(),
 588        "ROW": lambda self: self._parse_row(),
 589        "SET": lambda self: self.expression(exp.SetProperty, multi=False),
 590        "SORTKEY": lambda self: self._parse_sortkey(),
 591        "STABLE": lambda self: self.expression(
 592            exp.VolatilityProperty, this=exp.Literal.string("STABLE")
 593        ),
 594        "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 595        "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty),
 596        "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property),
 597        "TEMPORARY": lambda self: self._parse_temporary(global_=False),
 598        "TRANSIENT": lambda self: self.expression(exp.TransientProperty),
 599        "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty),
 600        "VOLATILE": lambda self: self.expression(
 601            exp.VolatilityProperty, this=exp.Literal.string("VOLATILE")
 602        ),
 603        "WITH": lambda self: self._parse_with_property(),
 604    }
 605
 606    CONSTRAINT_PARSERS = {
 607        "AUTOINCREMENT": lambda self: self._parse_auto_increment(),
 608        "AUTO_INCREMENT": lambda self: self._parse_auto_increment(),
 609        "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False),
 610        "CHARACTER SET": lambda self: self.expression(
 611            exp.CharacterSetColumnConstraint, this=self._parse_var_or_string()
 612        ),
 613        "CHECK": lambda self: self.expression(
 614            exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction)
 615        ),
 616        "COLLATE": lambda self: self.expression(
 617            exp.CollateColumnConstraint, this=self._parse_var()
 618        ),
 619        "COMMENT": lambda self: self.expression(
 620            exp.CommentColumnConstraint, this=self._parse_string()
 621        ),
 622        "COMPRESS": lambda self: self._parse_compress(),
 623        "DEFAULT": lambda self: self.expression(
 624            exp.DefaultColumnConstraint, this=self._parse_bitwise()
 625        ),
 626        "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()),
 627        "FOREIGN KEY": lambda self: self._parse_foreign_key(),
 628        "FORMAT": lambda self: self.expression(
 629            exp.DateFormatColumnConstraint, this=self._parse_var_or_string()
 630        ),
 631        "GENERATED": lambda self: self._parse_generated_as_identity(),
 632        "IDENTITY": lambda self: self._parse_auto_increment(),
 633        "INLINE": lambda self: self._parse_inline(),
 634        "LIKE": lambda self: self._parse_create_like(),
 635        "NOT": lambda self: self._parse_not_constraint(),
 636        "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True),
 637        "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()),
 638        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 639        "TITLE": lambda self: self.expression(
 640            exp.TitleColumnConstraint, this=self._parse_var_or_string()
 641        ),
 642        "UNIQUE": lambda self: self._parse_unique(),
 643        "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint),
 644    }
 645
 646    ALTER_PARSERS = {
 647        "ADD": lambda self: self._parse_alter_table_add(),
 648        "ALTER": lambda self: self._parse_alter_table_alter(),
 649        "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()),
 650        "DROP": lambda self: self._parse_alter_table_drop(),
 651        "RENAME": lambda self: self._parse_alter_table_rename(),
 652    }
 653
 654    SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"}
 655
 656    NO_PAREN_FUNCTION_PARSERS = {
 657        TokenType.CASE: lambda self: self._parse_case(),
 658        TokenType.IF: lambda self: self._parse_if(),
 659        TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()),
 660    }
 661
 662    FUNCTION_PARSERS: t.Dict[str, t.Callable] = {
 663        "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
 664        "TRY_CONVERT": lambda self: self._parse_convert(False),
 665        "EXTRACT": lambda self: self._parse_extract(),
 666        "POSITION": lambda self: self._parse_position(),
 667        "SUBSTRING": lambda self: self._parse_substring(),
 668        "TRIM": lambda self: self._parse_trim(),
 669        "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
 670        "TRY_CAST": lambda self: self._parse_cast(False),
 671        "STRING_AGG": lambda self: self._parse_string_agg(),
 672    }
 673
 674    QUERY_MODIFIER_PARSERS = {
 675        "match": lambda self: self._parse_match_recognize(),
 676        "where": lambda self: self._parse_where(),
 677        "group": lambda self: self._parse_group(),
 678        "having": lambda self: self._parse_having(),
 679        "qualify": lambda self: self._parse_qualify(),
 680        "windows": lambda self: self._parse_window_clause(),
 681        "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute),
 682        "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
 683        "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
 684        "order": lambda self: self._parse_order(),
 685        "limit": lambda self: self._parse_limit(),
 686        "offset": lambda self: self._parse_offset(),
 687        "lock": lambda self: self._parse_lock(),
 688        "sample": lambda self: self._parse_table_sample(as_modifier=True),
 689    }
 690
 691    SHOW_PARSERS: t.Dict[str, t.Callable] = {}
 692    SET_PARSERS: t.Dict[str, t.Callable] = {}
 693
 694    MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
 695
 696    TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
 697
 698    INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
 699
 700    WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
 701
 702    ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY}
 703
 704    STRICT_CAST = True
 705
 706    INTEGER_DIVISION = True
 707
 708    CONVERT_TYPE_FIRST = False
 709
 710    __slots__ = (
 711        "error_level",
 712        "error_message_context",
 713        "sql",
 714        "errors",
 715        "index_offset",
 716        "unnest_column_only",
 717        "alias_post_tablesample",
 718        "max_errors",
 719        "null_ordering",
 720        "_tokens",
 721        "_index",
 722        "_curr",
 723        "_next",
 724        "_prev",
 725        "_prev_comments",
 726        "_show_trie",
 727        "_set_trie",
 728    )
 729
 730    def __init__(
 731        self,
 732        error_level: t.Optional[ErrorLevel] = None,
 733        error_message_context: int = 100,
 734        index_offset: int = 0,
 735        unnest_column_only: bool = False,
 736        alias_post_tablesample: bool = False,
 737        max_errors: int = 3,
 738        null_ordering: t.Optional[str] = None,
 739    ):
 740        self.error_level = error_level or ErrorLevel.IMMEDIATE
 741        self.error_message_context = error_message_context
 742        self.index_offset = index_offset
 743        self.unnest_column_only = unnest_column_only
 744        self.alias_post_tablesample = alias_post_tablesample
 745        self.max_errors = max_errors
 746        self.null_ordering = null_ordering
 747        self.reset()
 748
 749    def reset(self):
 750        self.sql = ""
 751        self.errors = []
 752        self._tokens = []
 753        self._index = 0
 754        self._curr = None
 755        self._next = None
 756        self._prev = None
 757        self._prev_comments = None
 758
 759    def parse(
 760        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
 761    ) -> t.List[t.Optional[exp.Expression]]:
 762        """
 763        Parses a list of tokens and returns a list of syntax trees, one tree
 764        per parsed SQL statement.
 765
 766        Args:
 767            raw_tokens: the list of tokens.
 768            sql: the original SQL string, used to produce helpful debug messages.
 769
 770        Returns:
 771            The list of syntax trees.
 772        """
 773        return self._parse(
 774            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
 775        )
 776
 777    def parse_into(
 778        self,
 779        expression_types: exp.IntoType,
 780        raw_tokens: t.List[Token],
 781        sql: t.Optional[str] = None,
 782    ) -> t.List[t.Optional[exp.Expression]]:
 783        """
 784        Parses a list of tokens into a given Expression type. If a collection of Expression
 785        types is given instead, this method will try to parse the token list into each one
 786        of them, stopping at the first for which the parsing succeeds.
 787
 788        Args:
 789            expression_types: the expression type(s) to try and parse the token list into.
 790            raw_tokens: the list of tokens.
 791            sql: the original SQL string, used to produce helpful debug messages.
 792
 793        Returns:
 794            The target Expression.
 795        """
 796        errors = []
 797        for expression_type in ensure_collection(expression_types):
 798            parser = self.EXPRESSION_PARSERS.get(expression_type)
 799            if not parser:
 800                raise TypeError(f"No parser registered for {expression_type}")
 801            try:
 802                return self._parse(parser, raw_tokens, sql)
 803            except ParseError as e:
 804                e.errors[0]["into_expression"] = expression_type
 805                errors.append(e)
 806        raise ParseError(
 807            f"Failed to parse into {expression_types}",
 808            errors=merge_errors(errors),
 809        ) from errors[-1]
 810
 811    def _parse(
 812        self,
 813        parse_method: t.Callable[[Parser], t.Optional[exp.Expression]],
 814        raw_tokens: t.List[Token],
 815        sql: t.Optional[str] = None,
 816    ) -> t.List[t.Optional[exp.Expression]]:
 817        self.reset()
 818        self.sql = sql or ""
 819        total = len(raw_tokens)
 820        chunks: t.List[t.List[Token]] = [[]]
 821
 822        for i, token in enumerate(raw_tokens):
 823            if token.token_type == TokenType.SEMICOLON:
 824                if i < total - 1:
 825                    chunks.append([])
 826            else:
 827                chunks[-1].append(token)
 828
 829        expressions = []
 830
 831        for tokens in chunks:
 832            self._index = -1
 833            self._tokens = tokens
 834            self._advance()
 835
 836            expressions.append(parse_method(self))
 837
 838            if self._index < len(self._tokens):
 839                self.raise_error("Invalid expression / Unexpected token")
 840
 841            self.check_errors()
 842
 843        return expressions
 844
 845    def check_errors(self) -> None:
 846        """
 847        Logs or raises any found errors, depending on the chosen error level setting.
 848        """
 849        if self.error_level == ErrorLevel.WARN:
 850            for error in self.errors:
 851                logger.error(str(error))
 852        elif self.error_level == ErrorLevel.RAISE and self.errors:
 853            raise ParseError(
 854                concat_messages(self.errors, self.max_errors),
 855                errors=merge_errors(self.errors),
 856            )
 857
 858    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
 859        """
 860        Appends an error in the list of recorded errors or raises it, depending on the chosen
 861        error level setting.
 862        """
 863        token = token or self._curr or self._prev or Token.string("")
 864        start = self._find_token(token)
 865        end = start + len(token.text)
 866        start_context = self.sql[max(start - self.error_message_context, 0) : start]
 867        highlight = self.sql[start:end]
 868        end_context = self.sql[end : end + self.error_message_context]
 869
 870        error = ParseError.new(
 871            f"{message}. Line {token.line}, Col: {token.col}.\n"
 872            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
 873            description=message,
 874            line=token.line,
 875            col=token.col,
 876            start_context=start_context,
 877            highlight=highlight,
 878            end_context=end_context,
 879        )
 880
 881        if self.error_level == ErrorLevel.IMMEDIATE:
 882            raise error
 883
 884        self.errors.append(error)
 885
 886    def expression(
 887        self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs
 888    ) -> exp.Expression:
 889        """
 890        Creates a new, validated Expression.
 891
 892        Args:
 893            exp_class: the expression class to instantiate.
 894            comments: an optional list of comments to attach to the expression.
 895            kwargs: the arguments to set for the expression along with their respective values.
 896
 897        Returns:
 898            The target expression.
 899        """
 900        instance = exp_class(**kwargs)
 901        if self._prev_comments:
 902            instance.comments = self._prev_comments
 903            self._prev_comments = None
 904        if comments:
 905            instance.comments = comments
 906        self.validate_expression(instance)
 907        return instance
 908
 909    def validate_expression(
 910        self, expression: exp.Expression, args: t.Optional[t.List] = None
 911    ) -> None:
 912        """
 913        Validates an already instantiated expression, making sure that all its mandatory arguments
 914        are set.
 915
 916        Args:
 917            expression: the expression to validate.
 918            args: an optional list of items that was used to instantiate the expression, if it's a Func.
 919        """
 920        if self.error_level == ErrorLevel.IGNORE:
 921            return
 922
 923        for error_message in expression.error_messages(args):
 924            self.raise_error(error_message)
 925
 926    def _find_sql(self, start: Token, end: Token) -> str:
 927        return self.sql[self._find_token(start) : self._find_token(end) + len(end.text)]
 928
 929    def _find_token(self, token: Token) -> int:
 930        line = 1
 931        col = 1
 932        index = 0
 933
 934        while line < token.line or col < token.col:
 935            if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK:
 936                line += 1
 937                col = 1
 938            else:
 939                col += 1
 940            index += 1
 941
 942        return index
 943
 944    def _advance(self, times: int = 1) -> None:
 945        self._index += times
 946        self._curr = seq_get(self._tokens, self._index)
 947        self._next = seq_get(self._tokens, self._index + 1)
 948        if self._index > 0:
 949            self._prev = self._tokens[self._index - 1]
 950            self._prev_comments = self._prev.comments
 951        else:
 952            self._prev = None
 953            self._prev_comments = None
 954
 955    def _retreat(self, index: int) -> None:
 956        if index != self._index:
 957            self._advance(index - self._index)
 958
 959    def _parse_command(self) -> exp.Expression:
 960        return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string())
 961
 962    def _parse_comment(self, allow_exists: bool = True) -> exp.Expression:
 963        start = self._prev
 964        exists = self._parse_exists() if allow_exists else None
 965
 966        self._match(TokenType.ON)
 967
 968        kind = self._match_set(self.CREATABLES) and self._prev
 969
 970        if not kind:
 971            return self._parse_as_command(start)
 972
 973        if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
 974            this = self._parse_user_defined_function(kind=kind.token_type)
 975        elif kind.token_type == TokenType.TABLE:
 976            this = self._parse_table()
 977        elif kind.token_type == TokenType.COLUMN:
 978            this = self._parse_column()
 979        else:
 980            this = self._parse_id_var()
 981
 982        self._match(TokenType.IS)
 983
 984        return self.expression(
 985            exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists
 986        )
 987
 988    def _parse_statement(self) -> t.Optional[exp.Expression]:
 989        if self._curr is None:
 990            return None
 991
 992        if self._match_set(self.STATEMENT_PARSERS):
 993            return self.STATEMENT_PARSERS[self._prev.token_type](self)
 994
 995        if self._match_set(Tokenizer.COMMANDS):
 996            return self._parse_command()
 997
 998        expression = self._parse_expression()
 999        expression = self._parse_set_operations(expression) if expression else self._parse_select()
1000
1001        self._parse_query_modifiers(expression)
1002        return expression
1003
1004    def _parse_drop(self, default_kind: t.Optional[str] = None) -> t.Optional[exp.Expression]:
1005        start = self._prev
1006        temporary = self._match(TokenType.TEMPORARY)
1007        materialized = self._match(TokenType.MATERIALIZED)
1008        kind = self._match_set(self.CREATABLES) and self._prev.text
1009        if not kind:
1010            if default_kind:
1011                kind = default_kind
1012            else:
1013                return self._parse_as_command(start)
1014
1015        return self.expression(
1016            exp.Drop,
1017            exists=self._parse_exists(),
1018            this=self._parse_table(schema=True),
1019            kind=kind,
1020            temporary=temporary,
1021            materialized=materialized,
1022            cascade=self._match(TokenType.CASCADE),
1023        )
1024
1025    def _parse_exists(self, not_: bool = False) -> t.Optional[bool]:
1026        return (
1027            self._match(TokenType.IF)
1028            and (not not_ or self._match(TokenType.NOT))
1029            and self._match(TokenType.EXISTS)
1030        )
1031
1032    def _parse_create(self) -> t.Optional[exp.Expression]:
1033        start = self._prev
1034        replace = self._prev.text.upper() == "REPLACE" or self._match_pair(
1035            TokenType.OR, TokenType.REPLACE
1036        )
1037        unique = self._match(TokenType.UNIQUE)
1038        volatile = self._match(TokenType.VOLATILE)
1039
1040        if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
1041            self._match(TokenType.TABLE)
1042
1043        properties = None
1044        create_token = self._match_set(self.CREATABLES) and self._prev
1045
1046        if not create_token:
1047            properties = self._parse_properties()  # exp.Properties.Location.POST_CREATE
1048            create_token = self._match_set(self.CREATABLES) and self._prev
1049
1050            if not properties or not create_token:
1051                return self._parse_as_command(start)
1052
1053        exists = self._parse_exists(not_=True)
1054        this = None
1055        expression = None
1056        indexes = None
1057        no_schema_binding = None
1058        begin = None
1059
1060        if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1061            this = self._parse_user_defined_function(kind=create_token.token_type)
1062            temp_properties = self._parse_properties()
1063            if properties and temp_properties:
1064                properties.expressions.extend(temp_properties.expressions)
1065            elif temp_properties:
1066                properties = temp_properties
1067
1068            self._match(TokenType.ALIAS)
1069            begin = self._match(TokenType.BEGIN)
1070            return_ = self._match_text_seq("RETURN")
1071            expression = self._parse_statement()
1072
1073            if return_:
1074                expression = self.expression(exp.Return, this=expression)
1075        elif create_token.token_type == TokenType.INDEX:
1076            this = self._parse_index()
1077        elif create_token.token_type in self.DB_CREATABLES:
1078            table_parts = self._parse_table_parts(schema=True)
1079
1080            # exp.Properties.Location.POST_NAME
1081            if self._match(TokenType.COMMA):
1082                temp_properties = self._parse_properties(before=True)
1083                if properties and temp_properties:
1084                    properties.expressions.extend(temp_properties.expressions)
1085                elif temp_properties:
1086                    properties = temp_properties
1087
1088            this = self._parse_schema(this=table_parts)
1089
1090            # exp.Properties.Location.POST_SCHEMA and POST_WITH
1091            temp_properties = self._parse_properties()
1092            if properties and temp_properties:
1093                properties.expressions.extend(temp_properties.expressions)
1094            elif temp_properties:
1095                properties = temp_properties
1096
1097            self._match(TokenType.ALIAS)
1098
1099            # exp.Properties.Location.POST_ALIAS
1100            if not (
1101                self._match(TokenType.SELECT, advance=False)
1102                or self._match(TokenType.WITH, advance=False)
1103                or self._match(TokenType.L_PAREN, advance=False)
1104            ):
1105                temp_properties = self._parse_properties()
1106                if properties and temp_properties:
1107                    properties.expressions.extend(temp_properties.expressions)
1108                elif temp_properties:
1109                    properties = temp_properties
1110
1111            expression = self._parse_ddl_select()
1112
1113            if create_token.token_type == TokenType.TABLE:
1114                # exp.Properties.Location.POST_EXPRESSION
1115                temp_properties = self._parse_properties()
1116                if properties and temp_properties:
1117                    properties.expressions.extend(temp_properties.expressions)
1118                elif temp_properties:
1119                    properties = temp_properties
1120
1121                indexes = []
1122                while True:
1123                    index = self._parse_create_table_index()
1124
1125                    # exp.Properties.Location.POST_INDEX
1126                    if self._match(TokenType.PARTITION_BY, advance=False):
1127                        temp_properties = self._parse_properties()
1128                        if properties and temp_properties:
1129                            properties.expressions.extend(temp_properties.expressions)
1130                        elif temp_properties:
1131                            properties = temp_properties
1132
1133                    if not index:
1134                        break
1135                    else:
1136                        indexes.append(index)
1137            elif create_token.token_type == TokenType.VIEW:
1138                if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
1139                    no_schema_binding = True
1140
1141        return self.expression(
1142            exp.Create,
1143            this=this,
1144            kind=create_token.text,
1145            replace=replace,
1146            unique=unique,
1147            volatile=volatile,
1148            expression=expression,
1149            exists=exists,
1150            properties=properties,
1151            indexes=indexes,
1152            no_schema_binding=no_schema_binding,
1153            begin=begin,
1154        )
1155
1156    def _parse_property_before(self) -> t.Optional[exp.Expression]:
1157        self._match(TokenType.COMMA)
1158
1159        # parsers look to _prev for no/dual/default, so need to consume first
1160        self._match_text_seq("NO")
1161        self._match_text_seq("DUAL")
1162        self._match_text_seq("DEFAULT")
1163
1164        if self.PROPERTY_PARSERS.get(self._curr.text.upper()):
1165            return self.PROPERTY_PARSERS[self._curr.text.upper()](self)
1166
1167        return None
1168
1169    def _parse_property(self) -> t.Optional[exp.Expression]:
1170        if self._match_texts(self.PROPERTY_PARSERS):
1171            return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
1172
1173        if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET):
1174            return self._parse_character_set(default=True)
1175
1176        if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY):
1177            return self._parse_sortkey(compound=True)
1178
1179        if self._match_text_seq("SQL", "SECURITY"):
1180            return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER"))
1181
1182        assignment = self._match_pair(
1183            TokenType.VAR, TokenType.EQ, advance=False
1184        ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False)
1185
1186        if assignment:
1187            key = self._parse_var_or_string()
1188            self._match(TokenType.EQ)
1189            return self.expression(exp.Property, this=key, value=self._parse_column())
1190
1191        return None
1192
1193    def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression:
1194        self._match(TokenType.EQ)
1195        self._match(TokenType.ALIAS)
1196        return self.expression(
1197            exp_class,
1198            this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
1199        )
1200
1201    def _parse_properties(self, before=None) -> t.Optional[exp.Expression]:
1202        properties = []
1203
1204        while True:
1205            if before:
1206                identified_property = self._parse_property_before()
1207            else:
1208                identified_property = self._parse_property()
1209
1210            if not identified_property:
1211                break
1212            for p in ensure_collection(identified_property):
1213                properties.append(p)
1214
1215        if properties:
1216            return self.expression(exp.Properties, expressions=properties)
1217
1218        return None
1219
1220    def _parse_fallback(self, no=False) -> exp.Expression:
1221        self._match_text_seq("FALLBACK")
1222        return self.expression(
1223            exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
1224        )
1225
1226    def _parse_with_property(
1227        self,
1228    ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]:
1229        self._match(TokenType.WITH)
1230        if self._match(TokenType.L_PAREN, advance=False):
1231            return self._parse_wrapped_csv(self._parse_property)
1232
1233        if self._match_text_seq("JOURNAL"):
1234            return self._parse_withjournaltable()
1235
1236        if self._match_text_seq("DATA"):
1237            return self._parse_withdata(no=False)
1238        elif self._match_text_seq("NO", "DATA"):
1239            return self._parse_withdata(no=True)
1240
1241        if not self._next:
1242            return None
1243
1244        return self._parse_withisolatedloading()
1245
1246    # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
1247    def _parse_definer(self) -> t.Optional[exp.Expression]:
1248        self._match(TokenType.EQ)
1249
1250        user = self._parse_id_var()
1251        self._match(TokenType.PARAMETER)
1252        host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
1253
1254        if not user or not host:
1255            return None
1256
1257        return exp.DefinerProperty(this=f"{user}@{host}")
1258
1259    def _parse_withjournaltable(self) -> exp.Expression:
1260        self._match(TokenType.TABLE)
1261        self._match(TokenType.EQ)
1262        return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
1263
1264    def _parse_log(self, no=False) -> exp.Expression:
1265        self._match_text_seq("LOG")
1266        return self.expression(exp.LogProperty, no=no)
1267
1268    def _parse_journal(self, no=False, dual=False) -> exp.Expression:
1269        before = self._match_text_seq("BEFORE")
1270        self._match_text_seq("JOURNAL")
1271        return self.expression(exp.JournalProperty, no=no, dual=dual, before=before)
1272
1273    def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression:
1274        self._match_text_seq("NOT")
1275        self._match_text_seq("LOCAL")
1276        self._match_text_seq("AFTER", "JOURNAL")
1277        return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local)
1278
1279    def _parse_checksum(self) -> exp.Expression:
1280        self._match_text_seq("CHECKSUM")
1281        self._match(TokenType.EQ)
1282
1283        on = None
1284        if self._match(TokenType.ON):
1285            on = True
1286        elif self._match_text_seq("OFF"):
1287            on = False
1288        default = self._match(TokenType.DEFAULT)
1289
1290        return self.expression(
1291            exp.ChecksumProperty,
1292            on=on,
1293            default=default,
1294        )
1295
1296    def _parse_freespace(self) -> exp.Expression:
1297        self._match_text_seq("FREESPACE")
1298        self._match(TokenType.EQ)
1299        return self.expression(
1300            exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
1301        )
1302
1303    def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression:
1304        self._match_text_seq("MERGEBLOCKRATIO")
1305        if self._match(TokenType.EQ):
1306            return self.expression(
1307                exp.MergeBlockRatioProperty,
1308                this=self._parse_number(),
1309                percent=self._match(TokenType.PERCENT),
1310            )
1311        else:
1312            return self.expression(
1313                exp.MergeBlockRatioProperty,
1314                no=no,
1315                default=default,
1316            )
1317
1318    def _parse_datablocksize(self, default=None) -> exp.Expression:
1319        if default:
1320            self._match_text_seq("DATABLOCKSIZE")
1321            return self.expression(exp.DataBlocksizeProperty, default=True)
1322        elif self._match_texts(("MIN", "MINIMUM")):
1323            self._match_text_seq("DATABLOCKSIZE")
1324            return self.expression(exp.DataBlocksizeProperty, min=True)
1325        elif self._match_texts(("MAX", "MAXIMUM")):
1326            self._match_text_seq("DATABLOCKSIZE")
1327            return self.expression(exp.DataBlocksizeProperty, min=False)
1328
1329        self._match_text_seq("DATABLOCKSIZE")
1330        self._match(TokenType.EQ)
1331        size = self._parse_number()
1332        units = None
1333        if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
1334            units = self._prev.text
1335        return self.expression(exp.DataBlocksizeProperty, size=size, units=units)
1336
1337    def _parse_blockcompression(self) -> exp.Expression:
1338        self._match_text_seq("BLOCKCOMPRESSION")
1339        self._match(TokenType.EQ)
1340        always = self._match_text_seq("ALWAYS")
1341        manual = self._match_text_seq("MANUAL")
1342        never = self._match_text_seq("NEVER")
1343        default = self._match_text_seq("DEFAULT")
1344        autotemp = None
1345        if self._match_text_seq("AUTOTEMP"):
1346            autotemp = self._parse_schema()
1347
1348        return self.expression(
1349            exp.BlockCompressionProperty,
1350            always=always,
1351            manual=manual,
1352            never=never,
1353            default=default,
1354            autotemp=autotemp,
1355        )
1356
1357    def _parse_withisolatedloading(self) -> exp.Expression:
1358        no = self._match_text_seq("NO")
1359        concurrent = self._match_text_seq("CONCURRENT")
1360        self._match_text_seq("ISOLATED", "LOADING")
1361        for_all = self._match_text_seq("FOR", "ALL")
1362        for_insert = self._match_text_seq("FOR", "INSERT")
1363        for_none = self._match_text_seq("FOR", "NONE")
1364        return self.expression(
1365            exp.IsolatedLoadingProperty,
1366            no=no,
1367            concurrent=concurrent,
1368            for_all=for_all,
1369            for_insert=for_insert,
1370            for_none=for_none,
1371        )
1372
1373    def _parse_locking(self) -> exp.Expression:
1374        if self._match(TokenType.TABLE):
1375            kind = "TABLE"
1376        elif self._match(TokenType.VIEW):
1377            kind = "VIEW"
1378        elif self._match(TokenType.ROW):
1379            kind = "ROW"
1380        elif self._match_text_seq("DATABASE"):
1381            kind = "DATABASE"
1382        else:
1383            kind = None
1384
1385        if kind in ("DATABASE", "TABLE", "VIEW"):
1386            this = self._parse_table_parts()
1387        else:
1388            this = None
1389
1390        if self._match(TokenType.FOR):
1391            for_or_in = "FOR"
1392        elif self._match(TokenType.IN):
1393            for_or_in = "IN"
1394        else:
1395            for_or_in = None
1396
1397        if self._match_text_seq("ACCESS"):
1398            lock_type = "ACCESS"
1399        elif self._match_texts(("EXCL", "EXCLUSIVE")):
1400            lock_type = "EXCLUSIVE"
1401        elif self._match_text_seq("SHARE"):
1402            lock_type = "SHARE"
1403        elif self._match_text_seq("READ"):
1404            lock_type = "READ"
1405        elif self._match_text_seq("WRITE"):
1406            lock_type = "WRITE"
1407        elif self._match_text_seq("CHECKSUM"):
1408            lock_type = "CHECKSUM"
1409        else:
1410            lock_type = None
1411
1412        override = self._match_text_seq("OVERRIDE")
1413
1414        return self.expression(
1415            exp.LockingProperty,
1416            this=this,
1417            kind=kind,
1418            for_or_in=for_or_in,
1419            lock_type=lock_type,
1420            override=override,
1421        )
1422
1423    def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]:
1424        if self._match(TokenType.PARTITION_BY):
1425            return self._parse_csv(self._parse_conjunction)
1426        return []
1427
1428    def _parse_partitioned_by(self) -> exp.Expression:
1429        self._match(TokenType.EQ)
1430        return self.expression(
1431            exp.PartitionedByProperty,
1432            this=self._parse_schema() or self._parse_bracket(self._parse_field()),
1433        )
1434
1435    def _parse_withdata(self, no=False) -> exp.Expression:
1436        if self._match_text_seq("AND", "STATISTICS"):
1437            statistics = True
1438        elif self._match_text_seq("AND", "NO", "STATISTICS"):
1439            statistics = False
1440        else:
1441            statistics = None
1442
1443        return self.expression(exp.WithDataProperty, no=no, statistics=statistics)
1444
1445    def _parse_noprimaryindex(self) -> exp.Expression:
1446        self._match_text_seq("PRIMARY", "INDEX")
1447        return exp.NoPrimaryIndexProperty()
1448
1449    def _parse_oncommit(self) -> exp.Expression:
1450        self._match_text_seq("COMMIT", "PRESERVE", "ROWS")
1451        return exp.OnCommitProperty()
1452
1453    def _parse_distkey(self) -> exp.Expression:
1454        return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
1455
1456    def _parse_create_like(self) -> t.Optional[exp.Expression]:
1457        table = self._parse_table(schema=True)
1458        options = []
1459        while self._match_texts(("INCLUDING", "EXCLUDING")):
1460            this = self._prev.text.upper()
1461            id_var = self._parse_id_var()
1462
1463            if not id_var:
1464                return None
1465
1466            options.append(
1467                self.expression(
1468                    exp.Property,
1469                    this=this,
1470                    value=exp.Var(this=id_var.this.upper()),
1471                )
1472            )
1473        return self.expression(exp.LikeProperty, this=table, expressions=options)
1474
1475    def _parse_sortkey(self, compound: bool = False) -> exp.Expression:
1476        return self.expression(
1477            exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound
1478        )
1479
1480    def _parse_character_set(self, default: bool = False) -> exp.Expression:
1481        self._match(TokenType.EQ)
1482        return self.expression(
1483            exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default
1484        )
1485
1486    def _parse_returns(self) -> exp.Expression:
1487        value: t.Optional[exp.Expression]
1488        is_table = self._match(TokenType.TABLE)
1489
1490        if is_table:
1491            if self._match(TokenType.LT):
1492                value = self.expression(
1493                    exp.Schema,
1494                    this="TABLE",
1495                    expressions=self._parse_csv(self._parse_struct_kwargs),
1496                )
1497                if not self._match(TokenType.GT):
1498                    self.raise_error("Expecting >")
1499            else:
1500                value = self._parse_schema(exp.Var(this="TABLE"))
1501        else:
1502            value = self._parse_types()
1503
1504        return self.expression(exp.ReturnsProperty, this=value, is_table=is_table)
1505
1506    def _parse_temporary(self, global_=False) -> exp.Expression:
1507        self._match(TokenType.TEMPORARY)  # in case calling from "GLOBAL"
1508        return self.expression(exp.TemporaryProperty, global_=global_)
1509
1510    def _parse_describe(self) -> exp.Expression:
1511        kind = self._match_set(self.CREATABLES) and self._prev.text
1512        this = self._parse_table()
1513
1514        return self.expression(exp.Describe, this=this, kind=kind)
1515
1516    def _parse_insert(self) -> exp.Expression:
1517        overwrite = self._match(TokenType.OVERWRITE)
1518        local = self._match(TokenType.LOCAL)
1519        alternative = None
1520
1521        if self._match_text_seq("DIRECTORY"):
1522            this: t.Optional[exp.Expression] = self.expression(
1523                exp.Directory,
1524                this=self._parse_var_or_string(),
1525                local=local,
1526                row_format=self._parse_row_format(match_row=True),
1527            )
1528        else:
1529            if self._match(TokenType.OR):
1530                alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text
1531
1532            self._match(TokenType.INTO)
1533            self._match(TokenType.TABLE)
1534            this = self._parse_table(schema=True)
1535
1536        return self.expression(
1537            exp.Insert,
1538            this=this,
1539            exists=self._parse_exists(),
1540            partition=self._parse_partition(),
1541            expression=self._parse_ddl_select(),
1542            returning=self._parse_returning(),
1543            overwrite=overwrite,
1544            alternative=alternative,
1545        )
1546
1547    def _parse_returning(self) -> t.Optional[exp.Expression]:
1548        if not self._match(TokenType.RETURNING):
1549            return None
1550
1551        return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column))
1552
1553    def _parse_row(self) -> t.Optional[exp.Expression]:
1554        if not self._match(TokenType.FORMAT):
1555            return None
1556        return self._parse_row_format()
1557
1558    def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]:
1559        if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
1560            return None
1561
1562        if self._match_text_seq("SERDE"):
1563            return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string())
1564
1565        self._match_text_seq("DELIMITED")
1566
1567        kwargs = {}
1568
1569        if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
1570            kwargs["fields"] = self._parse_string()
1571            if self._match_text_seq("ESCAPED", "BY"):
1572                kwargs["escaped"] = self._parse_string()
1573        if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
1574            kwargs["collection_items"] = self._parse_string()
1575        if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
1576            kwargs["map_keys"] = self._parse_string()
1577        if self._match_text_seq("LINES", "TERMINATED", "BY"):
1578            kwargs["lines"] = self._parse_string()
1579        if self._match_text_seq("NULL", "DEFINED", "AS"):
1580            kwargs["null"] = self._parse_string()
1581
1582        return self.expression(exp.RowFormatDelimitedProperty, **kwargs)  # type: ignore
1583
1584    def _parse_load_data(self) -> exp.Expression:
1585        local = self._match(TokenType.LOCAL)
1586        self._match_text_seq("INPATH")
1587        inpath = self._parse_string()
1588        overwrite = self._match(TokenType.OVERWRITE)
1589        self._match_pair(TokenType.INTO, TokenType.TABLE)
1590
1591        return self.expression(
1592            exp.LoadData,
1593            this=self._parse_table(schema=True),
1594            local=local,
1595            overwrite=overwrite,
1596            inpath=inpath,
1597            partition=self._parse_partition(),
1598            input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
1599            serde=self._match_text_seq("SERDE") and self._parse_string(),
1600        )
1601
1602    def _parse_delete(self) -> exp.Expression:
1603        self._match(TokenType.FROM)
1604
1605        return self.expression(
1606            exp.Delete,
1607            this=self._parse_table(schema=True),
1608            using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()),
1609            where=self._parse_where(),
1610            returning=self._parse_returning(),
1611        )
1612
1613    def _parse_update(self) -> exp.Expression:
1614        return self.expression(
1615            exp.Update,
1616            **{  # type: ignore
1617                "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS),
1618                "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality),
1619                "from": self._parse_from(),
1620                "where": self._parse_where(),
1621                "returning": self._parse_returning(),
1622            },
1623        )
1624
1625    def _parse_uncache(self) -> exp.Expression:
1626        if not self._match(TokenType.TABLE):
1627            self.raise_error("Expecting TABLE after UNCACHE")
1628
1629        return self.expression(
1630            exp.Uncache,
1631            exists=self._parse_exists(),
1632            this=self._parse_table(schema=True),
1633        )
1634
1635    def _parse_cache(self) -> exp.Expression:
1636        lazy = self._match(TokenType.LAZY)
1637        self._match(TokenType.TABLE)
1638        table = self._parse_table(schema=True)
1639        options = []
1640
1641        if self._match(TokenType.OPTIONS):
1642            self._match_l_paren()
1643            k = self._parse_string()
1644            self._match(TokenType.EQ)
1645            v = self._parse_string()
1646            options = [k, v]
1647            self._match_r_paren()
1648
1649        self._match(TokenType.ALIAS)
1650        return self.expression(
1651            exp.Cache,
1652            this=table,
1653            lazy=lazy,
1654            options=options,
1655            expression=self._parse_select(nested=True),
1656        )
1657
1658    def _parse_partition(self) -> t.Optional[exp.Expression]:
1659        if not self._match(TokenType.PARTITION):
1660            return None
1661
1662        return self.expression(
1663            exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction)
1664        )
1665
1666    def _parse_value(self) -> exp.Expression:
1667        if self._match(TokenType.L_PAREN):
1668            expressions = self._parse_csv(self._parse_conjunction)
1669            self._match_r_paren()
1670            return self.expression(exp.Tuple, expressions=expressions)
1671
1672        # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows.
1673        # Source: https://prestodb.io/docs/current/sql/values.html
1674        return self.expression(exp.Tuple, expressions=[self._parse_conjunction()])
1675
1676    def _parse_select(
1677        self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True
1678    ) -> t.Optional[exp.Expression]:
1679        cte = self._parse_with()
1680        if cte:
1681            this = self._parse_statement()
1682
1683            if not this:
1684                self.raise_error("Failed to parse any statement following CTE")
1685                return cte
1686
1687            if "with" in this.arg_types:
1688                this.set("with", cte)
1689            else:
1690                self.raise_error(f"{this.key} does not support CTE")
1691                this = cte
1692        elif self._match(TokenType.SELECT):
1693            comments = self._prev_comments
1694
1695            hint = self._parse_hint()
1696            all_ = self._match(TokenType.ALL)
1697            distinct = self._match(TokenType.DISTINCT)
1698
1699            if distinct:
1700                distinct = self.expression(
1701                    exp.Distinct,
1702                    on=self._parse_value() if self._match(TokenType.ON) else None,
1703                )
1704
1705            if all_ and distinct:
1706                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
1707
1708            limit = self._parse_limit(top=True)
1709            expressions = self._parse_csv(self._parse_expression)
1710
1711            this = self.expression(
1712                exp.Select,
1713                hint=hint,
1714                distinct=distinct,
1715                expressions=expressions,
1716                limit=limit,
1717            )
1718            this.comments = comments
1719
1720            into = self._parse_into()
1721            if into:
1722                this.set("into", into)
1723
1724            from_ = self._parse_from()
1725            if from_:
1726                this.set("from", from_)
1727
1728            self._parse_query_modifiers(this)
1729        elif (table or nested) and self._match(TokenType.L_PAREN):
1730            this = self._parse_table() if table else self._parse_select(nested=True)
1731            self._parse_query_modifiers(this)
1732            this = self._parse_set_operations(this)
1733            self._match_r_paren()
1734
1735            # early return so that subquery unions aren't parsed again
1736            # SELECT * FROM (SELECT 1) UNION ALL SELECT 1
1737            # Union ALL should be a property of the top select node, not the subquery
1738            return self._parse_subquery(this, parse_alias=parse_subquery_alias)
1739        elif self._match(TokenType.VALUES):
1740            this = self.expression(
1741                exp.Values,
1742                expressions=self._parse_csv(self._parse_value),
1743                alias=self._parse_table_alias(),
1744            )
1745        else:
1746            this = None
1747
1748        return self._parse_set_operations(this)
1749
1750    def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]:
1751        if not skip_with_token and not self._match(TokenType.WITH):
1752            return None
1753
1754        recursive = self._match(TokenType.RECURSIVE)
1755
1756        expressions = []
1757        while True:
1758            expressions.append(self._parse_cte())
1759
1760            if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
1761                break
1762            else:
1763                self._match(TokenType.WITH)
1764
1765        return self.expression(exp.With, expressions=expressions, recursive=recursive)
1766
1767    def _parse_cte(self) -> exp.Expression:
1768        alias = self._parse_table_alias()
1769        if not alias or not alias.this:
1770            self.raise_error("Expected CTE to have alias")
1771
1772        self._match(TokenType.ALIAS)
1773
1774        return self.expression(
1775            exp.CTE,
1776            this=self._parse_wrapped(self._parse_statement),
1777            alias=alias,
1778        )
1779
1780    def _parse_table_alias(
1781        self, alias_tokens: t.Optional[t.Collection[TokenType]] = None
1782    ) -> t.Optional[exp.Expression]:
1783        any_token = self._match(TokenType.ALIAS)
1784        alias = self._parse_id_var(
1785            any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS
1786        )
1787        index = self._index
1788
1789        if self._match(TokenType.L_PAREN):
1790            columns = self._parse_csv(self._parse_function_parameter)
1791            self._match_r_paren() if columns else self._retreat(index)
1792        else:
1793            columns = None
1794
1795        if not alias and not columns:
1796            return None
1797
1798        return self.expression(exp.TableAlias, this=alias, columns=columns)
1799
1800    def _parse_subquery(
1801        self, this: t.Optional[exp.Expression], parse_alias: bool = True
1802    ) -> exp.Expression:
1803        return self.expression(
1804            exp.Subquery,
1805            this=this,
1806            pivots=self._parse_pivots(),
1807            alias=self._parse_table_alias() if parse_alias else None,
1808        )
1809
1810    def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None:
1811        if not isinstance(this, self.MODIFIABLES):
1812            return
1813
1814        table = isinstance(this, exp.Table)
1815
1816        while True:
1817            lateral = self._parse_lateral()
1818            join = self._parse_join()
1819            comma = None if table else self._match(TokenType.COMMA)
1820            if lateral:
1821                this.append("laterals", lateral)
1822            if join:
1823                this.append("joins", join)
1824            if comma:
1825                this.args["from"].append("expressions", self._parse_table())
1826            if not (lateral or join or comma):
1827                break
1828
1829        for key, parser in self.QUERY_MODIFIER_PARSERS.items():
1830            expression = parser(self)
1831
1832            if expression:
1833                this.set(key, expression)
1834
1835    def _parse_hint(self) -> t.Optional[exp.Expression]:
1836        if self._match(TokenType.HINT):
1837            hints = self._parse_csv(self._parse_function)
1838            if not self._match_pair(TokenType.STAR, TokenType.SLASH):
1839                self.raise_error("Expected */ after HINT")
1840            return self.expression(exp.Hint, expressions=hints)
1841
1842        return None
1843
1844    def _parse_into(self) -> t.Optional[exp.Expression]:
1845        if not self._match(TokenType.INTO):
1846            return None
1847
1848        temp = self._match(TokenType.TEMPORARY)
1849        unlogged = self._match(TokenType.UNLOGGED)
1850        self._match(TokenType.TABLE)
1851
1852        return self.expression(
1853            exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged
1854        )
1855
1856    def _parse_from(self) -> t.Optional[exp.Expression]:
1857        if not self._match(TokenType.FROM):
1858            return None
1859
1860        return self.expression(
1861            exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table)
1862        )
1863
1864    def _parse_match_recognize(self) -> t.Optional[exp.Expression]:
1865        if not self._match(TokenType.MATCH_RECOGNIZE):
1866            return None
1867        self._match_l_paren()
1868
1869        partition = self._parse_partition_by()
1870        order = self._parse_order()
1871        measures = (
1872            self._parse_alias(self._parse_conjunction())
1873            if self._match_text_seq("MEASURES")
1874            else None
1875        )
1876
1877        if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
1878            rows = exp.Var(this="ONE ROW PER MATCH")
1879        elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
1880            text = "ALL ROWS PER MATCH"
1881            if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
1882                text += f" SHOW EMPTY MATCHES"
1883            elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
1884                text += f" OMIT EMPTY MATCHES"
1885            elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
1886                text += f" WITH UNMATCHED ROWS"
1887            rows = exp.Var(this=text)
1888        else:
1889            rows = None
1890
1891        if self._match_text_seq("AFTER", "MATCH", "SKIP"):
1892            text = "AFTER MATCH SKIP"
1893            if self._match_text_seq("PAST", "LAST", "ROW"):
1894                text += f" PAST LAST ROW"
1895            elif self._match_text_seq("TO", "NEXT", "ROW"):
1896                text += f" TO NEXT ROW"
1897            elif self._match_text_seq("TO", "FIRST"):
1898                text += f" TO FIRST {self._advance_any().text}"  # type: ignore
1899            elif self._match_text_seq("TO", "LAST"):
1900                text += f" TO LAST {self._advance_any().text}"  # type: ignore
1901            after = exp.Var(this=text)
1902        else:
1903            after = None
1904
1905        if self._match_text_seq("PATTERN"):
1906            self._match_l_paren()
1907
1908            if not self._curr:
1909                self.raise_error("Expecting )", self._curr)
1910
1911            paren = 1
1912            start = self._curr
1913
1914            while self._curr and paren > 0:
1915                if self._curr.token_type == TokenType.L_PAREN:
1916                    paren += 1
1917                if self._curr.token_type == TokenType.R_PAREN:
1918                    paren -= 1
1919                end = self._prev
1920                self._advance()
1921            if paren > 0:
1922                self.raise_error("Expecting )", self._curr)
1923            pattern = exp.Var(this=self._find_sql(start, end))
1924        else:
1925            pattern = None
1926
1927        define = (
1928            self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None
1929        )
1930        self._match_r_paren()
1931
1932        return self.expression(
1933            exp.MatchRecognize,
1934            partition_by=partition,
1935            order=order,
1936            measures=measures,
1937            rows=rows,
1938            after=after,
1939            pattern=pattern,
1940            define=define,
1941        )
1942
1943    def _parse_lateral(self) -> t.Optional[exp.Expression]:
1944        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY)
1945        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
1946
1947        if outer_apply or cross_apply:
1948            this = self._parse_select(table=True)
1949            view = None
1950            outer = not cross_apply
1951        elif self._match(TokenType.LATERAL):
1952            this = self._parse_select(table=True)
1953            view = self._match(TokenType.VIEW)
1954            outer = self._match(TokenType.OUTER)
1955        else:
1956            return None
1957
1958        if not this:
1959            this = self._parse_function() or self._parse_id_var(any_token=False)
1960            while self._match(TokenType.DOT):
1961                this = exp.Dot(
1962                    this=this,
1963                    expression=self._parse_function() or self._parse_id_var(any_token=False),
1964                )
1965
1966        table_alias: t.Optional[exp.Expression]
1967
1968        if view:
1969            table = self._parse_id_var(any_token=False)
1970            columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
1971            table_alias = self.expression(exp.TableAlias, this=table, columns=columns)
1972        else:
1973            table_alias = self._parse_table_alias()
1974
1975        expression = self.expression(
1976            exp.Lateral,
1977            this=this,
1978            view=view,
1979            outer=outer,
1980            alias=table_alias,
1981        )
1982
1983        if outer_apply or cross_apply:
1984            return self.expression(exp.Join, this=expression, side=None if cross_apply else "LEFT")
1985
1986        return expression
1987
1988    def _parse_join_side_and_kind(
1989        self,
1990    ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
1991        return (
1992            self._match(TokenType.NATURAL) and self._prev,
1993            self._match_set(self.JOIN_SIDES) and self._prev,
1994            self._match_set(self.JOIN_KINDS) and self._prev,
1995        )
1996
1997    def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]:
1998        natural, side, kind = self._parse_join_side_and_kind()
1999
2000        if not skip_join_token and not self._match(TokenType.JOIN):
2001            return None
2002
2003        kwargs: t.Dict[
2004            str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]]
2005        ] = {"this": self._parse_table()}
2006
2007        if natural:
2008            kwargs["natural"] = True
2009        if side:
2010            kwargs["side"] = side.text
2011        if kind:
2012            kwargs["kind"] = kind.text
2013
2014        if self._match(TokenType.ON):
2015            kwargs["on"] = self._parse_conjunction()
2016        elif self._match(TokenType.USING):
2017            kwargs["using"] = self._parse_wrapped_id_vars()
2018
2019        return self.expression(exp.Join, **kwargs)  # type: ignore
2020
2021    def _parse_index(self) -> exp.Expression:
2022        index = self._parse_id_var()
2023        self._match(TokenType.ON)
2024        self._match(TokenType.TABLE)  # hive
2025
2026        return self.expression(
2027            exp.Index,
2028            this=index,
2029            table=self.expression(exp.Table, this=self._parse_id_var()),
2030            columns=self._parse_expression(),
2031        )
2032
2033    def _parse_create_table_index(self) -> t.Optional[exp.Expression]:
2034        unique = self._match(TokenType.UNIQUE)
2035        primary = self._match_text_seq("PRIMARY")
2036        amp = self._match_text_seq("AMP")
2037        if not self._match(TokenType.INDEX):
2038            return None
2039        index = self._parse_id_var()
2040        columns = None
2041        if self._match(TokenType.L_PAREN, advance=False):
2042            columns = self._parse_wrapped_csv(self._parse_column)
2043        return self.expression(
2044            exp.Index,
2045            this=index,
2046            columns=columns,
2047            unique=unique,
2048            primary=primary,
2049            amp=amp,
2050        )
2051
2052    def _parse_table_parts(self, schema: bool = False) -> exp.Expression:
2053        catalog = None
2054        db = None
2055        table = (not schema and self._parse_function()) or self._parse_id_var(any_token=False)
2056
2057        while self._match(TokenType.DOT):
2058            if catalog:
2059                # This allows nesting the table in arbitrarily many dot expressions if needed
2060                table = self.expression(exp.Dot, this=table, expression=self._parse_id_var())
2061            else:
2062                catalog = db
2063                db = table
2064                table = self._parse_id_var()
2065
2066        if not table:
2067            self.raise_error(f"Expected table name but got {self._curr}")
2068
2069        return self.expression(
2070            exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots()
2071        )
2072
2073    def _parse_table(
2074        self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
2075    ) -> t.Optional[exp.Expression]:
2076        lateral = self._parse_lateral()
2077
2078        if lateral:
2079            return lateral
2080
2081        unnest = self._parse_unnest()
2082
2083        if unnest:
2084            return unnest
2085
2086        values = self._parse_derived_table_values()
2087
2088        if values:
2089            return values
2090
2091        subquery = self._parse_select(table=True)
2092
2093        if subquery:
2094            return subquery
2095
2096        this = self._parse_table_parts(schema=schema)
2097
2098        if schema:
2099            return self._parse_schema(this=this)
2100
2101        if self.alias_post_tablesample:
2102            table_sample = self._parse_table_sample()
2103
2104        alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
2105
2106        if alias:
2107            this.set("alias", alias)
2108
2109        if not this.args.get("pivots"):
2110            this.set("pivots", self._parse_pivots())
2111
2112        if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
2113            this.set(
2114                "hints",
2115                self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)),
2116            )
2117            self._match_r_paren()
2118
2119        if not self.alias_post_tablesample:
2120            table_sample = self._parse_table_sample()
2121
2122        if table_sample:
2123            table_sample.set("this", this)
2124            this = table_sample
2125
2126        return this
2127
2128    def _parse_unnest(self) -> t.Optional[exp.Expression]:
2129        if not self._match(TokenType.UNNEST):
2130            return None
2131
2132        expressions = self._parse_wrapped_csv(self._parse_column)
2133        ordinality = bool(self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY))
2134        alias = self._parse_table_alias()
2135
2136        if alias and self.unnest_column_only:
2137            if alias.args.get("columns"):
2138                self.raise_error("Unexpected extra column alias in unnest.")
2139            alias.set("columns", [alias.this])
2140            alias.set("this", None)
2141
2142        offset = None
2143        if self._match_pair(TokenType.WITH, TokenType.OFFSET):
2144            self._match(TokenType.ALIAS)
2145            offset = self._parse_conjunction()
2146
2147        return self.expression(
2148            exp.Unnest,
2149            expressions=expressions,
2150            ordinality=ordinality,
2151            alias=alias,
2152            offset=offset,
2153        )
2154
2155    def _parse_derived_table_values(self) -> t.Optional[exp.Expression]:
2156        is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES)
2157        if not is_derived and not self._match(TokenType.VALUES):
2158            return None
2159
2160        expressions = self._parse_csv(self._parse_value)
2161
2162        if is_derived:
2163            self._match_r_paren()
2164
2165        return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias())
2166
2167    def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]:
2168        if not self._match(TokenType.TABLE_SAMPLE) and not (
2169            as_modifier and self._match_text_seq("USING", "SAMPLE")
2170        ):
2171            return None
2172
2173        bucket_numerator = None
2174        bucket_denominator = None
2175        bucket_field = None
2176        percent = None
2177        rows = None
2178        size = None
2179        seed = None
2180
2181        kind = "TABLESAMPLE" if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE"
2182        method = self._parse_var(tokens=(TokenType.ROW,))
2183
2184        self._match(TokenType.L_PAREN)
2185
2186        num = self._parse_number()
2187
2188        if self._match(TokenType.BUCKET):
2189            bucket_numerator = self._parse_number()
2190            self._match(TokenType.OUT_OF)
2191            bucket_denominator = bucket_denominator = self._parse_number()
2192            self._match(TokenType.ON)
2193            bucket_field = self._parse_field()
2194        elif self._match_set((TokenType.PERCENT, TokenType.MOD)):
2195            percent = num
2196        elif self._match(TokenType.ROWS):
2197            rows = num
2198        else:
2199            size = num
2200
2201        self._match(TokenType.R_PAREN)
2202
2203        if self._match(TokenType.L_PAREN):
2204            method = self._parse_var()
2205            seed = self._match(TokenType.COMMA) and self._parse_number()
2206            self._match_r_paren()
2207        elif self._match_texts(("SEED", "REPEATABLE")):
2208            seed = self._parse_wrapped(self._parse_number)
2209
2210        return self.expression(
2211            exp.TableSample,
2212            method=method,
2213            bucket_numerator=bucket_numerator,
2214            bucket_denominator=bucket_denominator,
2215            bucket_field=bucket_field,
2216            percent=percent,
2217            rows=rows,
2218            size=size,
2219            seed=seed,
2220            kind=kind,
2221        )
2222
2223    def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]:
2224        return list(iter(self._parse_pivot, None))
2225
2226    def _parse_pivot(self) -> t.Optional[exp.Expression]:
2227        index = self._index
2228
2229        if self._match(TokenType.PIVOT):
2230            unpivot = False
2231        elif self._match(TokenType.UNPIVOT):
2232            unpivot = True
2233        else:
2234            return None
2235
2236        expressions = []
2237        field = None
2238
2239        if not self._match(TokenType.L_PAREN):
2240            self._retreat(index)
2241            return None
2242
2243        if unpivot:
2244            expressions = self._parse_csv(self._parse_column)
2245        else:
2246            expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function()))
2247
2248        if not self._match(TokenType.FOR):
2249            self.raise_error("Expecting FOR")
2250
2251        value = self._parse_column()
2252
2253        if not self._match(TokenType.IN):
2254            self.raise_error("Expecting IN")
2255
2256        field = self._parse_in(value)
2257
2258        self._match_r_paren()
2259
2260        pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot)
2261
2262        if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False):
2263            pivot.set("alias", self._parse_table_alias())
2264
2265        return pivot
2266
2267    def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]:
2268        if not skip_where_token and not self._match(TokenType.WHERE):
2269            return None
2270
2271        return self.expression(
2272            exp.Where, comments=self._prev_comments, this=self._parse_conjunction()
2273        )
2274
2275    def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]:
2276        if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
2277            return None
2278
2279        elements = defaultdict(list)
2280
2281        while True:
2282            expressions = self._parse_csv(self._parse_conjunction)
2283            if expressions:
2284                elements["expressions"].extend(expressions)
2285
2286            grouping_sets = self._parse_grouping_sets()
2287            if grouping_sets:
2288                elements["grouping_sets"].extend(grouping_sets)
2289
2290            rollup = None
2291            cube = None
2292
2293            with_ = self._match(TokenType.WITH)
2294            if self._match(TokenType.ROLLUP):
2295                rollup = with_ or self._parse_wrapped_csv(self._parse_column)
2296                elements["rollup"].extend(ensure_list(rollup))
2297
2298            if self._match(TokenType.CUBE):
2299                cube = with_ or self._parse_wrapped_csv(self._parse_column)
2300                elements["cube"].extend(ensure_list(cube))
2301
2302            if not (expressions or grouping_sets or rollup or cube):
2303                break
2304
2305        return self.expression(exp.Group, **elements)  # type: ignore
2306
2307    def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
2308        if not self._match(TokenType.GROUPING_SETS):
2309            return None
2310
2311        return self._parse_wrapped_csv(self._parse_grouping_set)
2312
2313    def _parse_grouping_set(self) -> t.Optional[exp.Expression]:
2314        if self._match(TokenType.L_PAREN):
2315            grouping_set = self._parse_csv(self._parse_column)
2316            self._match_r_paren()
2317            return self.expression(exp.Tuple, expressions=grouping_set)
2318
2319        return self._parse_column()
2320
2321    def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]:
2322        if not skip_having_token and not self._match(TokenType.HAVING):
2323            return None
2324        return self.expression(exp.Having, this=self._parse_conjunction())
2325
2326    def _parse_qualify(self) -> t.Optional[exp.Expression]:
2327        if not self._match(TokenType.QUALIFY):
2328            return None
2329        return self.expression(exp.Qualify, this=self._parse_conjunction())
2330
2331    def _parse_order(
2332        self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
2333    ) -> t.Optional[exp.Expression]:
2334        if not skip_order_token and not self._match(TokenType.ORDER_BY):
2335            return this
2336
2337        return self.expression(
2338            exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
2339        )
2340
2341    def _parse_sort(
2342        self, token_type: TokenType, exp_class: t.Type[exp.Expression]
2343    ) -> t.Optional[exp.Expression]:
2344        if not self._match(token_type):
2345            return None
2346        return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered))
2347
2348    def _parse_ordered(self) -> exp.Expression:
2349        this = self._parse_conjunction()
2350        self._match(TokenType.ASC)
2351        is_desc = self._match(TokenType.DESC)
2352        is_nulls_first = self._match(TokenType.NULLS_FIRST)
2353        is_nulls_last = self._match(TokenType.NULLS_LAST)
2354        desc = is_desc or False
2355        asc = not desc
2356        nulls_first = is_nulls_first or False
2357        explicitly_null_ordered = is_nulls_first or is_nulls_last
2358        if (
2359            not explicitly_null_ordered
2360            and (
2361                (asc and self.null_ordering == "nulls_are_small")
2362                or (desc and self.null_ordering != "nulls_are_small")
2363            )
2364            and self.null_ordering != "nulls_are_last"
2365        ):
2366            nulls_first = True
2367
2368        return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first)
2369
2370    def _parse_limit(
2371        self, this: t.Optional[exp.Expression] = None, top: bool = False
2372    ) -> t.Optional[exp.Expression]:
2373        if self._match(TokenType.TOP if top else TokenType.LIMIT):
2374            limit_paren = self._match(TokenType.L_PAREN)
2375            limit_exp = self.expression(
2376                exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term()
2377            )
2378
2379            if limit_paren:
2380                self._match_r_paren()
2381
2382            return limit_exp
2383
2384        if self._match(TokenType.FETCH):
2385            direction = self._match_set((TokenType.FIRST, TokenType.NEXT))
2386            direction = self._prev.text if direction else "FIRST"
2387            count = self._parse_number()
2388            self._match_set((TokenType.ROW, TokenType.ROWS))
2389            self._match(TokenType.ONLY)
2390            return self.expression(exp.Fetch, direction=direction, count=count)
2391
2392        return this
2393
2394    def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2395        if not self._match_set((TokenType.OFFSET, TokenType.COMMA)):
2396            return this
2397
2398        count = self._parse_number()
2399        self._match_set((TokenType.ROW, TokenType.ROWS))
2400        return self.expression(exp.Offset, this=this, expression=count)
2401
2402    def _parse_lock(self) -> t.Optional[exp.Expression]:
2403        if self._match_text_seq("FOR", "UPDATE"):
2404            return self.expression(exp.Lock, update=True)
2405        if self._match_text_seq("FOR", "SHARE"):
2406            return self.expression(exp.Lock, update=False)
2407
2408        return None
2409
2410    def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2411        if not self._match_set(self.SET_OPERATIONS):
2412            return this
2413
2414        token_type = self._prev.token_type
2415
2416        if token_type == TokenType.UNION:
2417            expression = exp.Union
2418        elif token_type == TokenType.EXCEPT:
2419            expression = exp.Except
2420        else:
2421            expression = exp.Intersect
2422
2423        return self.expression(
2424            expression,
2425            this=this,
2426            distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
2427            expression=self._parse_set_operations(self._parse_select(nested=True)),
2428        )
2429
2430    def _parse_expression(self) -> t.Optional[exp.Expression]:
2431        return self._parse_alias(self._parse_conjunction())
2432
2433    def _parse_conjunction(self) -> t.Optional[exp.Expression]:
2434        return self._parse_tokens(self._parse_equality, self.CONJUNCTION)
2435
2436    def _parse_equality(self) -> t.Optional[exp.Expression]:
2437        return self._parse_tokens(self._parse_comparison, self.EQUALITY)
2438
2439    def _parse_comparison(self) -> t.Optional[exp.Expression]:
2440        return self._parse_tokens(self._parse_range, self.COMPARISON)
2441
2442    def _parse_range(self) -> t.Optional[exp.Expression]:
2443        this = self._parse_bitwise()
2444        negate = self._match(TokenType.NOT)
2445
2446        if self._match_set(self.RANGE_PARSERS):
2447            this = self.RANGE_PARSERS[self._prev.token_type](self, this)
2448        elif self._match(TokenType.ISNULL):
2449            this = self.expression(exp.Is, this=this, expression=exp.Null())
2450
2451        # Postgres supports ISNULL and NOTNULL for conditions.
2452        # https://blog.andreiavram.ro/postgresql-null-composite-type/
2453        if self._match(TokenType.NOTNULL):
2454            this = self.expression(exp.Is, this=this, expression=exp.Null())
2455            this = self.expression(exp.Not, this=this)
2456
2457        if negate:
2458            this = self.expression(exp.Not, this=this)
2459
2460        if self._match(TokenType.IS):
2461            this = self._parse_is(this)
2462
2463        return this
2464
2465    def _parse_is(self, this: t.Optional[exp.Expression]) -> exp.Expression:
2466        negate = self._match(TokenType.NOT)
2467        if self._match(TokenType.DISTINCT_FROM):
2468            klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ
2469            return self.expression(klass, this=this, expression=self._parse_expression())
2470
2471        this = self.expression(
2472            exp.Is,
2473            this=this,
2474            expression=self._parse_null() or self._parse_boolean(),
2475        )
2476        return self.expression(exp.Not, this=this) if negate else this
2477
2478    def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression:
2479        unnest = self._parse_unnest()
2480        if unnest:
2481            this = self.expression(exp.In, this=this, unnest=unnest)
2482        elif self._match(TokenType.L_PAREN):
2483            expressions = self._parse_csv(self._parse_select_or_expression)
2484
2485            if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable):
2486                this = self.expression(exp.In, this=this, query=expressions[0])
2487            else:
2488                this = self.expression(exp.In, this=this, expressions=expressions)
2489
2490            self._match_r_paren()
2491        else:
2492            this = self.expression(exp.In, this=this, field=self._parse_field())
2493
2494        return this
2495
2496    def _parse_between(self, this: exp.Expression) -> exp.Expression:
2497        low = self._parse_bitwise()
2498        self._match(TokenType.AND)
2499        high = self._parse_bitwise()
2500        return self.expression(exp.Between, this=this, low=low, high=high)
2501
2502    def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2503        if not self._match(TokenType.ESCAPE):
2504            return this
2505        return self.expression(exp.Escape, this=this, expression=self._parse_string())
2506
2507    def _parse_bitwise(self) -> t.Optional[exp.Expression]:
2508        this = self._parse_term()
2509
2510        while True:
2511            if self._match_set(self.BITWISE):
2512                this = self.expression(
2513                    self.BITWISE[self._prev.token_type],
2514                    this=this,
2515                    expression=self._parse_term(),
2516                )
2517            elif self._match_pair(TokenType.LT, TokenType.LT):
2518                this = self.expression(
2519                    exp.BitwiseLeftShift, this=this, expression=self._parse_term()
2520                )
2521            elif self._match_pair(TokenType.GT, TokenType.GT):
2522                this = self.expression(
2523                    exp.BitwiseRightShift, this=this, expression=self._parse_term()
2524                )
2525            else:
2526                break
2527
2528        return this
2529
2530    def _parse_term(self) -> t.Optional[exp.Expression]:
2531        return self._parse_tokens(self._parse_factor, self.TERM)
2532
2533    def _parse_factor(self) -> t.Optional[exp.Expression]:
2534        return self._parse_tokens(self._parse_unary, self.FACTOR)
2535
2536    def _parse_unary(self) -> t.Optional[exp.Expression]:
2537        if self._match_set(self.UNARY_PARSERS):
2538            return self.UNARY_PARSERS[self._prev.token_type](self)
2539        return self._parse_at_time_zone(self._parse_type())
2540
2541    def _parse_type(self) -> t.Optional[exp.Expression]:
2542        if self._match(TokenType.INTERVAL):
2543            return self.expression(exp.Interval, this=self._parse_term(), unit=self._parse_field())
2544
2545        index = self._index
2546        type_token = self._parse_types(check_func=True)
2547        this = self._parse_column()
2548
2549        if type_token:
2550            if isinstance(this, exp.Literal):
2551                return self.expression(exp.Cast, this=this, to=type_token)
2552            if not type_token.args.get("expressions"):
2553                self._retreat(index)
2554                return self._parse_column()
2555            return type_token
2556
2557        return this
2558
2559    def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]:
2560        index = self._index
2561
2562        prefix = self._match_text_seq("SYSUDTLIB", ".")
2563
2564        if not self._match_set(self.TYPE_TOKENS):
2565            return None
2566
2567        type_token = self._prev.token_type
2568
2569        if type_token == TokenType.PSEUDO_TYPE:
2570            return self.expression(exp.PseudoType, this=self._prev.text)
2571
2572        nested = type_token in self.NESTED_TYPE_TOKENS
2573        is_struct = type_token == TokenType.STRUCT
2574        expressions = None
2575        maybe_func = False
2576
2577        if self._match(TokenType.L_PAREN):
2578            if is_struct:
2579                expressions = self._parse_csv(self._parse_struct_kwargs)
2580            elif nested:
2581                expressions = self._parse_csv(self._parse_types)
2582            else:
2583                expressions = self._parse_csv(self._parse_conjunction)
2584
2585            if not expressions:
2586                self._retreat(index)
2587                return None
2588
2589            self._match_r_paren()
2590            maybe_func = True
2591
2592        if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2593            this = exp.DataType(
2594                this=exp.DataType.Type.ARRAY,
2595                expressions=[exp.DataType.build(type_token.value, expressions=expressions)],
2596                nested=True,
2597            )
2598
2599            while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2600                this = exp.DataType(
2601                    this=exp.DataType.Type.ARRAY,
2602                    expressions=[this],
2603                    nested=True,
2604                )
2605
2606            return this
2607
2608        if self._match(TokenType.L_BRACKET):
2609            self._retreat(index)
2610            return None
2611
2612        values: t.Optional[t.List[t.Optional[exp.Expression]]] = None
2613        if nested and self._match(TokenType.LT):
2614            if is_struct:
2615                expressions = self._parse_csv(self._parse_struct_kwargs)
2616            else:
2617                expressions = self._parse_csv(self._parse_types)
2618
2619            if not self._match(TokenType.GT):
2620                self.raise_error("Expecting >")
2621
2622            if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)):
2623                values = self._parse_csv(self._parse_conjunction)
2624                self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN))
2625
2626        value: t.Optional[exp.Expression] = None
2627        if type_token in self.TIMESTAMPS:
2628            if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ:
2629                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions)
2630            elif (
2631                self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ
2632            ):
2633                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions)
2634            elif self._match(TokenType.WITHOUT_TIME_ZONE):
2635                if type_token == TokenType.TIME:
2636                    value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions)
2637                else:
2638                    value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2639
2640            maybe_func = maybe_func and value is None
2641
2642            if value is None:
2643                value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2644        elif type_token == TokenType.INTERVAL:
2645            unit = self._parse_var()
2646
2647            if not unit:
2648                value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL)
2649            else:
2650                value = self.expression(exp.Interval, unit=unit)
2651
2652        if maybe_func and check_func:
2653            index2 = self._index
2654            peek = self._parse_string()
2655
2656            if not peek:
2657                self._retreat(index)
2658                return None
2659
2660            self._retreat(index2)
2661
2662        if value:
2663            return value
2664
2665        return exp.DataType(
2666            this=exp.DataType.Type[type_token.value.upper()],
2667            expressions=expressions,
2668            nested=nested,
2669            values=values,
2670            prefix=prefix,
2671        )
2672
2673    def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]:
2674        if self._curr and self._curr.token_type in self.TYPE_TOKENS:
2675            return self._parse_types()
2676
2677        this = self._parse_id_var()
2678        self._match(TokenType.COLON)
2679        data_type = self._parse_types()
2680
2681        if not data_type:
2682            return None
2683        return self.expression(exp.StructKwarg, this=this, expression=data_type)
2684
2685    def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2686        if not self._match(TokenType.AT_TIME_ZONE):
2687            return this
2688        return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary())
2689
2690    def _parse_column(self) -> t.Optional[exp.Expression]:
2691        this = self._parse_field()
2692        if isinstance(this, exp.Identifier):
2693            this = self.expression(exp.Column, this=this)
2694        elif not this:
2695            return self._parse_bracket(this)
2696        this = self._parse_bracket(this)
2697
2698        while self._match_set(self.COLUMN_OPERATORS):
2699            op_token = self._prev.token_type
2700            op = self.COLUMN_OPERATORS.get(op_token)
2701
2702            if op_token == TokenType.DCOLON:
2703                field = self._parse_types()
2704                if not field:
2705                    self.raise_error("Expected type")
2706            elif op:
2707                self._advance()
2708                value = self._prev.text
2709                field = (
2710                    exp.Literal.number(value)
2711                    if self._prev.token_type == TokenType.NUMBER
2712                    else exp.Literal.string(value)
2713                )
2714            else:
2715                field = self._parse_star() or self._parse_function() or self._parse_id_var()
2716
2717            if isinstance(field, exp.Func):
2718                # bigquery allows function calls like x.y.count(...)
2719                # SAFE.SUBSTR(...)
2720                # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
2721                this = self._replace_columns_with_dots(this)
2722
2723            if op:
2724                this = op(self, this, field)
2725            elif isinstance(this, exp.Column) and not this.args.get("catalog"):
2726                this = self.expression(
2727                    exp.Column,
2728                    this=field,
2729                    table=this.this,
2730                    db=this.args.get("table"),
2731                    catalog=this.args.get("db"),
2732                )
2733            else:
2734                this = self.expression(exp.Dot, this=this, expression=field)
2735            this = self._parse_bracket(this)
2736
2737        return this
2738
2739    def _parse_primary(self) -> t.Optional[exp.Expression]:
2740        if self._match_set(self.PRIMARY_PARSERS):
2741            token_type = self._prev.token_type
2742            primary = self.PRIMARY_PARSERS[token_type](self, self._prev)
2743
2744            if token_type == TokenType.STRING:
2745                expressions = [primary]
2746                while self._match(TokenType.STRING):
2747                    expressions.append(exp.Literal.string(self._prev.text))
2748                if len(expressions) > 1:
2749                    return self.expression(exp.Concat, expressions=expressions)
2750            return primary
2751
2752        if self._match_pair(TokenType.DOT, TokenType.NUMBER):
2753            return exp.Literal.number(f"0.{self._prev.text}")
2754
2755        if self._match(TokenType.L_PAREN):
2756            comments = self._prev_comments
2757            query = self._parse_select()
2758
2759            if query:
2760                expressions = [query]
2761            else:
2762                expressions = self._parse_csv(
2763                    lambda: self._parse_alias(self._parse_conjunction(), explicit=True)
2764                )
2765
2766            this = seq_get(expressions, 0)
2767            self._parse_query_modifiers(this)
2768            self._match_r_paren()
2769
2770            if isinstance(this, exp.Subqueryable):
2771                this = self._parse_set_operations(
2772                    self._parse_subquery(this=this, parse_alias=False)
2773                )
2774            elif len(expressions) > 1:
2775                this = self.expression(exp.Tuple, expressions=expressions)
2776            else:
2777                this = self.expression(exp.Paren, this=this)
2778
2779            if this and comments:
2780                this.comments = comments
2781
2782            return this
2783
2784        return None
2785
2786    def _parse_field(self, any_token: bool = False) -> t.Optional[exp.Expression]:
2787        return self._parse_primary() or self._parse_function() or self._parse_id_var(any_token)
2788
2789    def _parse_function(
2790        self, functions: t.Optional[t.Dict[str, t.Callable]] = None
2791    ) -> t.Optional[exp.Expression]:
2792        if not self._curr:
2793            return None
2794
2795        token_type = self._curr.token_type
2796
2797        if self._match_set(self.NO_PAREN_FUNCTION_PARSERS):
2798            return self.NO_PAREN_FUNCTION_PARSERS[token_type](self)
2799
2800        if not self._next or self._next.token_type != TokenType.L_PAREN:
2801            if token_type in self.NO_PAREN_FUNCTIONS:
2802                self._advance()
2803                return self.expression(self.NO_PAREN_FUNCTIONS[token_type])
2804
2805            return None
2806
2807        if token_type not in self.FUNC_TOKENS:
2808            return None
2809
2810        this = self._curr.text
2811        upper = this.upper()
2812        self._advance(2)
2813
2814        parser = self.FUNCTION_PARSERS.get(upper)
2815
2816        if parser:
2817            this = parser(self)
2818        else:
2819            subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
2820
2821            if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH):
2822                this = self.expression(subquery_predicate, this=self._parse_select())
2823                self._match_r_paren()
2824                return this
2825
2826            if functions is None:
2827                functions = self.FUNCTIONS
2828
2829            function = functions.get(upper)
2830            args = self._parse_csv(self._parse_lambda)
2831
2832            if function:
2833                # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the
2834                # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists.
2835                if count_params(function) == 2:
2836                    params = None
2837                    if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN):
2838                        params = self._parse_csv(self._parse_lambda)
2839
2840                    this = function(args, params)
2841                else:
2842                    this = function(args)
2843
2844                self.validate_expression(this, args)
2845            else:
2846                this = self.expression(exp.Anonymous, this=this, expressions=args)
2847
2848        self._match_r_paren(this)
2849        return self._parse_window(this)
2850
2851    def _parse_function_parameter(self) -> t.Optional[exp.Expression]:
2852        return self._parse_column_def(self._parse_id_var())
2853
2854    def _parse_user_defined_function(
2855        self, kind: t.Optional[TokenType] = None
2856    ) -> t.Optional[exp.Expression]:
2857        this = self._parse_id_var()
2858
2859        while self._match(TokenType.DOT):
2860            this = self.expression(exp.Dot, this=this, expression=self._parse_id_var())
2861
2862        if not self._match(TokenType.L_PAREN):
2863            return this
2864
2865        expressions = self._parse_csv(self._parse_function_parameter)
2866        self._match_r_paren()
2867        return self.expression(
2868            exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True
2869        )
2870
2871    def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]:
2872        literal = self._parse_primary()
2873        if literal:
2874            return self.expression(exp.Introducer, this=token.text, expression=literal)
2875
2876        return self.expression(exp.Identifier, this=token.text)
2877
2878    def _parse_national(self, token: Token) -> exp.Expression:
2879        return self.expression(exp.National, this=exp.Literal.string(token.text))
2880
2881    def _parse_session_parameter(self) -> exp.Expression:
2882        kind = None
2883        this = self._parse_id_var() or self._parse_primary()
2884
2885        if this and self._match(TokenType.DOT):
2886            kind = this.name
2887            this = self._parse_var() or self._parse_primary()
2888
2889        return self.expression(exp.SessionParameter, this=this, kind=kind)
2890
2891    def _parse_lambda(self) -> t.Optional[exp.Expression]:
2892        index = self._index
2893
2894        if self._match(TokenType.L_PAREN):
2895            expressions = self._parse_csv(self._parse_id_var)
2896
2897            if not self._match(TokenType.R_PAREN):
2898                self._retreat(index)
2899        else:
2900            expressions = [self._parse_id_var()]
2901
2902        if self._match_set(self.LAMBDAS):
2903            return self.LAMBDAS[self._prev.token_type](self, expressions)
2904
2905        self._retreat(index)
2906
2907        this: t.Optional[exp.Expression]
2908
2909        if self._match(TokenType.DISTINCT):
2910            this = self.expression(
2911                exp.Distinct, expressions=self._parse_csv(self._parse_conjunction)
2912            )
2913        else:
2914            this = self._parse_select_or_expression()
2915
2916        if self._match(TokenType.IGNORE_NULLS):
2917            this = self.expression(exp.IgnoreNulls, this=this)
2918        else:
2919            self._match(TokenType.RESPECT_NULLS)
2920
2921        return self._parse_limit(self._parse_order(this))
2922
2923    def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2924        index = self._index
2925        if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT):
2926            self._retreat(index)
2927            return this
2928
2929        args = self._parse_csv(
2930            lambda: self._parse_constraint()
2931            or self._parse_column_def(self._parse_field(any_token=True))
2932        )
2933        self._match_r_paren()
2934        return self.expression(exp.Schema, this=this, expressions=args)
2935
2936    def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2937        kind = self._parse_types()
2938
2939        if self._match_text_seq("FOR", "ORDINALITY"):
2940            return self.expression(exp.ColumnDef, this=this, ordinality=True)
2941
2942        constraints = []
2943        while True:
2944            constraint = self._parse_column_constraint()
2945            if not constraint:
2946                break
2947            constraints.append(constraint)
2948
2949        if not kind and not constraints:
2950            return this
2951
2952        return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints)
2953
2954    def _parse_auto_increment(self) -> exp.Expression:
2955        start = None
2956        increment = None
2957
2958        if self._match(TokenType.L_PAREN, advance=False):
2959            args = self._parse_wrapped_csv(self._parse_bitwise)
2960            start = seq_get(args, 0)
2961            increment = seq_get(args, 1)
2962        elif self._match_text_seq("START"):
2963            start = self._parse_bitwise()
2964            self._match_text_seq("INCREMENT")
2965            increment = self._parse_bitwise()
2966
2967        if start and increment:
2968            return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment)
2969
2970        return exp.AutoIncrementColumnConstraint()
2971
2972    def _parse_compress(self) -> exp.Expression:
2973        if self._match(TokenType.L_PAREN, advance=False):
2974            return self.expression(
2975                exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise)
2976            )
2977
2978        return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise())
2979
2980    def _parse_generated_as_identity(self) -> exp.Expression:
2981        if self._match(TokenType.BY_DEFAULT):
2982            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False)
2983        else:
2984            self._match_text_seq("ALWAYS")
2985            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True)
2986
2987        self._match_text_seq("AS", "IDENTITY")
2988        if self._match(TokenType.L_PAREN):
2989            if self._match_text_seq("START", "WITH"):
2990                this.set("start", self._parse_bitwise())
2991            if self._match_text_seq("INCREMENT", "BY"):
2992                this.set("increment", self._parse_bitwise())
2993            if self._match_text_seq("MINVALUE"):
2994                this.set("minvalue", self._parse_bitwise())
2995            if self._match_text_seq("MAXVALUE"):
2996                this.set("maxvalue", self._parse_bitwise())
2997
2998            if self._match_text_seq("CYCLE"):
2999                this.set("cycle", True)
3000            elif self._match_text_seq("NO", "CYCLE"):
3001                this.set("cycle", False)
3002
3003            self._match_r_paren()
3004
3005        return this
3006
3007    def _parse_inline(self) -> t.Optional[exp.Expression]:
3008        self._match_text_seq("LENGTH")
3009        return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise())
3010
3011    def _parse_not_constraint(self) -> t.Optional[exp.Expression]:
3012        if self._match_text_seq("NULL"):
3013            return self.expression(exp.NotNullColumnConstraint)
3014        if self._match_text_seq("CASESPECIFIC"):
3015            return self.expression(exp.CaseSpecificColumnConstraint, not_=True)
3016        return None
3017
3018    def _parse_column_constraint(self) -> t.Optional[exp.Expression]:
3019        this = self._parse_references()
3020        if this:
3021            return this
3022
3023        if self._match(TokenType.CONSTRAINT):
3024            this = self._parse_id_var()
3025
3026        if self._match_texts(self.CONSTRAINT_PARSERS):
3027            return self.expression(
3028                exp.ColumnConstraint,
3029                this=this,
3030                kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self),
3031            )
3032
3033        return this
3034
3035    def _parse_constraint(self) -> t.Optional[exp.Expression]:
3036        if not self._match(TokenType.CONSTRAINT):
3037            return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS)
3038
3039        this = self._parse_id_var()
3040        expressions = []
3041
3042        while True:
3043            constraint = self._parse_unnamed_constraint() or self._parse_function()
3044            if not constraint:
3045                break
3046            expressions.append(constraint)
3047
3048        return self.expression(exp.Constraint, this=this, expressions=expressions)
3049
3050    def _parse_unnamed_constraint(
3051        self, constraints: t.Optional[t.Collection[str]] = None
3052    ) -> t.Optional[exp.Expression]:
3053        if not self._match_texts(constraints or self.CONSTRAINT_PARSERS):
3054            return None
3055
3056        constraint = self._prev.text.upper()
3057        if constraint not in self.CONSTRAINT_PARSERS:
3058            self.raise_error(f"No parser found for schema constraint {constraint}.")
3059
3060        return self.CONSTRAINT_PARSERS[constraint](self)
3061
3062    def _parse_unique(self) -> exp.Expression:
3063        if not self._match(TokenType.L_PAREN, advance=False):
3064            return self.expression(exp.UniqueColumnConstraint)
3065        return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars())
3066
3067    def _parse_key_constraint_options(self) -> t.List[str]:
3068        options = []
3069        while True:
3070            if not self._curr:
3071                break
3072
3073            if self._match(TokenType.ON):
3074                action = None
3075                on = self._advance_any() and self._prev.text
3076
3077                if self._match(TokenType.NO_ACTION):
3078                    action = "NO ACTION"
3079                elif self._match(TokenType.CASCADE):
3080                    action = "CASCADE"
3081                elif self._match_pair(TokenType.SET, TokenType.NULL):
3082                    action = "SET NULL"
3083                elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
3084                    action = "SET DEFAULT"
3085                else:
3086                    self.raise_error("Invalid key constraint")
3087
3088                options.append(f"ON {on} {action}")
3089            elif self._match_text_seq("NOT", "ENFORCED"):
3090                options.append("NOT ENFORCED")
3091            elif self._match_text_seq("DEFERRABLE"):
3092                options.append("DEFERRABLE")
3093            elif self._match_text_seq("INITIALLY", "DEFERRED"):
3094                options.append("INITIALLY DEFERRED")
3095            elif self._match_text_seq("NORELY"):
3096                options.append("NORELY")
3097            elif self._match_text_seq("MATCH", "FULL"):
3098                options.append("MATCH FULL")
3099            else:
3100                break
3101
3102        return options
3103
3104    def _parse_references(self) -> t.Optional[exp.Expression]:
3105        if not self._match(TokenType.REFERENCES):
3106            return None
3107
3108        expressions = None
3109        this = self._parse_id_var()
3110
3111        if self._match(TokenType.L_PAREN, advance=False):
3112            expressions = self._parse_wrapped_id_vars()
3113
3114        options = self._parse_key_constraint_options()
3115        return self.expression(exp.Reference, this=this, expressions=expressions, options=options)
3116
3117    def _parse_foreign_key(self) -> exp.Expression:
3118        expressions = self._parse_wrapped_id_vars()
3119        reference = self._parse_references()
3120        options = {}
3121
3122        while self._match(TokenType.ON):
3123            if not self._match_set((TokenType.DELETE, TokenType.UPDATE)):
3124                self.raise_error("Expected DELETE or UPDATE")
3125
3126            kind = self._prev.text.lower()
3127
3128            if self._match(TokenType.NO_ACTION):
3129                action = "NO ACTION"
3130            elif self._match(TokenType.SET):
3131                self._match_set((TokenType.NULL, TokenType.DEFAULT))
3132                action = "SET " + self._prev.text.upper()
3133            else:
3134                self._advance()
3135                action = self._prev.text.upper()
3136
3137            options[kind] = action
3138
3139        return self.expression(
3140            exp.ForeignKey, expressions=expressions, reference=reference, **options  # type: ignore
3141        )
3142
3143    def _parse_primary_key(self) -> exp.Expression:
3144        desc = (
3145            self._match_set((TokenType.ASC, TokenType.DESC))
3146            and self._prev.token_type == TokenType.DESC
3147        )
3148
3149        if not self._match(TokenType.L_PAREN, advance=False):
3150            return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc)
3151
3152        expressions = self._parse_wrapped_id_vars()
3153        options = self._parse_key_constraint_options()
3154        return self.expression(exp.PrimaryKey, expressions=expressions, options=options)
3155
3156    def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3157        if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)):
3158            return this
3159
3160        bracket_kind = self._prev.token_type
3161        expressions: t.List[t.Optional[exp.Expression]]
3162
3163        if self._match(TokenType.COLON):
3164            expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())]
3165        else:
3166            expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction()))
3167
3168        # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
3169        if bracket_kind == TokenType.L_BRACE:
3170            this = self.expression(exp.Struct, expressions=expressions)
3171        elif not this or this.name.upper() == "ARRAY":
3172            this = self.expression(exp.Array, expressions=expressions)
3173        else:
3174            expressions = apply_index_offset(expressions, -self.index_offset)
3175            this = self.expression(exp.Bracket, this=this, expressions=expressions)
3176
3177        if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET:
3178            self.raise_error("Expected ]")
3179        elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE:
3180            self.raise_error("Expected }")
3181
3182        this.comments = self._prev_comments
3183        return self._parse_bracket(this)
3184
3185    def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3186        if self._match(TokenType.COLON):
3187            return self.expression(exp.Slice, this=this, expression=self._parse_conjunction())
3188        return this
3189
3190    def _parse_case(self) -> t.Optional[exp.Expression]:
3191        ifs = []
3192        default = None
3193
3194        expression = self._parse_conjunction()
3195
3196        while self._match(TokenType.WHEN):
3197            this = self._parse_conjunction()
3198            self._match(TokenType.THEN)
3199            then = self._parse_conjunction()
3200            ifs.append(self.expression(exp.If, this=this, true=then))
3201
3202        if self._match(TokenType.ELSE):
3203            default = self._parse_conjunction()
3204
3205        if not self._match(TokenType.END):
3206            self.raise_error("Expected END after CASE", self._prev)
3207
3208        return self._parse_window(
3209            self.expression(exp.Case, this=expression, ifs=ifs, default=default)
3210        )
3211
3212    def _parse_if(self) -> t.Optional[exp.Expression]:
3213        if self._match(TokenType.L_PAREN):
3214            args = self._parse_csv(self._parse_conjunction)
3215            this = exp.If.from_arg_list(args)
3216            self.validate_expression(this, args)
3217            self._match_r_paren()
3218        else:
3219            condition = self._parse_conjunction()
3220            self._match(TokenType.THEN)
3221            true = self._parse_conjunction()
3222            false = self._parse_conjunction() if self._match(TokenType.ELSE) else None
3223            self._match(TokenType.END)
3224            this = self.expression(exp.If, this=condition, true=true, false=false)
3225
3226        return self._parse_window(this)
3227
3228    def _parse_extract(self) -> exp.Expression:
3229        this = self._parse_function() or self._parse_var() or self._parse_type()
3230
3231        if self._match(TokenType.FROM):
3232            return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3233
3234        if not self._match(TokenType.COMMA):
3235            self.raise_error("Expected FROM or comma after EXTRACT", self._prev)
3236
3237        return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3238
3239    def _parse_cast(self, strict: bool) -> exp.Expression:
3240        this = self._parse_conjunction()
3241
3242        if not self._match(TokenType.ALIAS):
3243            self.raise_error("Expected AS after CAST")
3244
3245        to = self._parse_types()
3246
3247        if not to:
3248            self.raise_error("Expected TYPE after CAST")
3249        elif to.this == exp.DataType.Type.CHAR:
3250            if self._match(TokenType.CHARACTER_SET):
3251                to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())
3252
3253        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3254
3255    def _parse_string_agg(self) -> exp.Expression:
3256        expression: t.Optional[exp.Expression]
3257
3258        if self._match(TokenType.DISTINCT):
3259            args = self._parse_csv(self._parse_conjunction)
3260            expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)])
3261        else:
3262            args = self._parse_csv(self._parse_conjunction)
3263            expression = seq_get(args, 0)
3264
3265        index = self._index
3266        if not self._match(TokenType.R_PAREN):
3267            # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]])
3268            order = self._parse_order(this=expression)
3269            return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3270
3271        # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]).
3272        # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that
3273        # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them.
3274        if not self._match(TokenType.WITHIN_GROUP):
3275            self._retreat(index)
3276            this = exp.GroupConcat.from_arg_list(args)
3277            self.validate_expression(this, args)
3278            return this
3279
3280        self._match_l_paren()  # The corresponding match_r_paren will be called in parse_function (caller)
3281        order = self._parse_order(this=expression)
3282        return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3283
3284    def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]:
3285        to: t.Optional[exp.Expression]
3286        this = self._parse_bitwise()
3287
3288        if self._match(TokenType.USING):
3289            to = self.expression(exp.CharacterSet, this=self._parse_var())
3290        elif self._match(TokenType.COMMA):
3291            to = self._parse_bitwise()
3292        else:
3293            to = None
3294
3295        # Swap the argument order if needed to produce the correct AST
3296        if self.CONVERT_TYPE_FIRST:
3297            this, to = to, this
3298
3299        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3300
3301    def _parse_position(self, haystack_first: bool = False) -> exp.Expression:
3302        args = self._parse_csv(self._parse_bitwise)
3303
3304        if self._match(TokenType.IN):
3305            return self.expression(
3306                exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0)
3307            )
3308
3309        if haystack_first:
3310            haystack = seq_get(args, 0)
3311            needle = seq_get(args, 1)
3312        else:
3313            needle = seq_get(args, 0)
3314            haystack = seq_get(args, 1)
3315
3316        this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2))
3317
3318        self.validate_expression(this, args)
3319
3320        return this
3321
3322    def _parse_join_hint(self, func_name: str) -> exp.Expression:
3323        args = self._parse_csv(self._parse_table)
3324        return exp.JoinHint(this=func_name.upper(), expressions=args)
3325
3326    def _parse_substring(self) -> exp.Expression:
3327        # Postgres supports the form: substring(string [from int] [for int])
3328        # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
3329
3330        args = self._parse_csv(self._parse_bitwise)
3331
3332        if self._match(TokenType.FROM):
3333            args.append(self._parse_bitwise())
3334            if self._match(TokenType.FOR):
3335                args.append(self._parse_bitwise())
3336
3337        this = exp.Substring.from_arg_list(args)
3338        self.validate_expression(this, args)
3339
3340        return this
3341
3342    def _parse_trim(self) -> exp.Expression:
3343        # https://www.w3resource.com/sql/character-functions/trim.php
3344        # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html
3345
3346        position = None
3347        collation = None
3348
3349        if self._match_set(self.TRIM_TYPES):
3350            position = self._prev.text.upper()
3351
3352        expression = self._parse_term()
3353        if self._match_set((TokenType.FROM, TokenType.COMMA)):
3354            this = self._parse_term()
3355        else:
3356            this = expression
3357            expression = None
3358
3359        if self._match(TokenType.COLLATE):
3360            collation = self._parse_term()
3361
3362        return self.expression(
3363            exp.Trim,
3364            this=this,
3365            position=position,
3366            expression=expression,
3367            collation=collation,
3368        )
3369
3370    def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3371        return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window)
3372
3373    def _parse_named_window(self) -> t.Optional[exp.Expression]:
3374        return self._parse_window(self._parse_id_var(), alias=True)
3375
3376    def _parse_window(
3377        self, this: t.Optional[exp.Expression], alias: bool = False
3378    ) -> t.Optional[exp.Expression]:
3379        if self._match(TokenType.FILTER):
3380            where = self._parse_wrapped(self._parse_where)
3381            this = self.expression(exp.Filter, this=this, expression=where)
3382
3383        # T-SQL allows the OVER (...) syntax after WITHIN GROUP.
3384        # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
3385        if self._match(TokenType.WITHIN_GROUP):
3386            order = self._parse_wrapped(self._parse_order)
3387            this = self.expression(exp.WithinGroup, this=this, expression=order)
3388
3389        # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
3390        # Some dialects choose to implement and some do not.
3391        # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
3392
3393        # There is some code above in _parse_lambda that handles
3394        #   SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ...
3395
3396        # The below changes handle
3397        #   SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ...
3398
3399        # Oracle allows both formats
3400        #   (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html)
3401        #   and Snowflake chose to do the same for familiarity
3402        #   https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes
3403        if self._match(TokenType.IGNORE_NULLS):
3404            this = self.expression(exp.IgnoreNulls, this=this)
3405        elif self._match(TokenType.RESPECT_NULLS):
3406            this = self.expression(exp.RespectNulls, this=this)
3407
3408        # bigquery select from window x AS (partition by ...)
3409        if alias:
3410            self._match(TokenType.ALIAS)
3411        elif not self._match(TokenType.OVER):
3412            return this
3413
3414        if not self._match(TokenType.L_PAREN):
3415            return self.expression(exp.Window, this=this, alias=self._parse_id_var(False))
3416
3417        window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
3418        partition = self._parse_partition_by()
3419        order = self._parse_order()
3420        kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text
3421
3422        if kind:
3423            self._match(TokenType.BETWEEN)
3424            start = self._parse_window_spec()
3425            self._match(TokenType.AND)
3426            end = self._parse_window_spec()
3427
3428            spec = self.expression(
3429                exp.WindowSpec,
3430                kind=kind,
3431                start=start["value"],
3432                start_side=start["side"],
3433                end=end["value"],
3434                end_side=end["side"],
3435            )
3436        else:
3437            spec = None
3438
3439        self._match_r_paren()
3440
3441        return self.expression(
3442            exp.Window,
3443            this=this,
3444            partition_by=partition,
3445            order=order,
3446            spec=spec,
3447            alias=window_alias,
3448        )
3449
3450    def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]:
3451        self._match(TokenType.BETWEEN)
3452
3453        return {
3454            "value": (
3455                self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text
3456            )
3457            or self._parse_bitwise(),
3458            "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text,
3459        }
3460
3461    def _parse_alias(
3462        self, this: t.Optional[exp.Expression], explicit: bool = False
3463    ) -> t.Optional[exp.Expression]:
3464        any_token = self._match(TokenType.ALIAS)
3465
3466        if explicit and not any_token:
3467            return this
3468
3469        if self._match(TokenType.L_PAREN):
3470            aliases = self.expression(
3471                exp.Aliases,
3472                this=this,
3473                expressions=self._parse_csv(lambda: self._parse_id_var(any_token)),
3474            )
3475            self._match_r_paren(aliases)
3476            return aliases
3477
3478        alias = self._parse_id_var(any_token)
3479
3480        if alias:
3481            return self.expression(exp.Alias, this=this, alias=alias)
3482
3483        return this
3484
3485    def _parse_id_var(
3486        self,
3487        any_token: bool = True,
3488        tokens: t.Optional[t.Collection[TokenType]] = None,
3489        prefix_tokens: t.Optional[t.Collection[TokenType]] = None,
3490    ) -> t.Optional[exp.Expression]:
3491        identifier = self._parse_identifier()
3492
3493        if identifier:
3494            return identifier
3495
3496        prefix = ""
3497
3498        if prefix_tokens:
3499            while self._match_set(prefix_tokens):
3500                prefix += self._prev.text
3501
3502        if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS):
3503            quoted = self._prev.token_type == TokenType.STRING
3504            return exp.Identifier(this=prefix + self._prev.text, quoted=quoted)
3505
3506        return None
3507
3508    def _parse_string(self) -> t.Optional[exp.Expression]:
3509        if self._match(TokenType.STRING):
3510            return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev)
3511        return self._parse_placeholder()
3512
3513    def _parse_number(self) -> t.Optional[exp.Expression]:
3514        if self._match(TokenType.NUMBER):
3515            return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev)
3516        return self._parse_placeholder()
3517
3518    def _parse_identifier(self) -> t.Optional[exp.Expression]:
3519        if self._match(TokenType.IDENTIFIER):
3520            return self.expression(exp.Identifier, this=self._prev.text, quoted=True)
3521        return self._parse_placeholder()
3522
3523    def _parse_var(
3524        self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None
3525    ) -> t.Optional[exp.Expression]:
3526        if (
3527            (any_token and self._advance_any())
3528            or self._match(TokenType.VAR)
3529            or (self._match_set(tokens) if tokens else False)
3530        ):
3531            return self.expression(exp.Var, this=self._prev.text)
3532        return self._parse_placeholder()
3533
3534    def _advance_any(self) -> t.Optional[Token]:
3535        if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS:
3536            self._advance()
3537            return self._prev
3538        return None
3539
3540    def _parse_var_or_string(self) -> t.Optional[exp.Expression]:
3541        return self._parse_var() or self._parse_string()
3542
3543    def _parse_null(self) -> t.Optional[exp.Expression]:
3544        if self._match(TokenType.NULL):
3545            return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev)
3546        return None
3547
3548    def _parse_boolean(self) -> t.Optional[exp.Expression]:
3549        if self._match(TokenType.TRUE):
3550            return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev)
3551        if self._match(TokenType.FALSE):
3552            return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev)
3553        return None
3554
3555    def _parse_star(self) -> t.Optional[exp.Expression]:
3556        if self._match(TokenType.STAR):
3557            return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev)
3558        return None
3559
3560    def _parse_parameter(self) -> exp.Expression:
3561        wrapped = self._match(TokenType.L_BRACE)
3562        this = self._parse_var() or self._parse_primary()
3563        self._match(TokenType.R_BRACE)
3564        return self.expression(exp.Parameter, this=this, wrapped=wrapped)
3565
3566    def _parse_placeholder(self) -> t.Optional[exp.Expression]:
3567        if self._match_set(self.PLACEHOLDER_PARSERS):
3568            placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self)
3569            if placeholder:
3570                return placeholder
3571            self._advance(-1)
3572        return None
3573
3574    def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3575        if not self._match(TokenType.EXCEPT):
3576            return None
3577        if self._match(TokenType.L_PAREN, advance=False):
3578            return self._parse_wrapped_csv(self._parse_column)
3579        return self._parse_csv(self._parse_column)
3580
3581    def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3582        if not self._match(TokenType.REPLACE):
3583            return None
3584        if self._match(TokenType.L_PAREN, advance=False):
3585            return self._parse_wrapped_csv(self._parse_expression)
3586        return self._parse_csv(self._parse_expression)
3587
3588    def _parse_csv(
3589        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
3590    ) -> t.List[t.Optional[exp.Expression]]:
3591        parse_result = parse_method()
3592        items = [parse_result] if parse_result is not None else []
3593
3594        while self._match(sep):
3595            if parse_result and self._prev_comments:
3596                parse_result.comments = self._prev_comments
3597
3598            parse_result = parse_method()
3599            if parse_result is not None:
3600                items.append(parse_result)
3601
3602        return items
3603
3604    def _parse_tokens(
3605        self, parse_method: t.Callable, expressions: t.Dict
3606    ) -> t.Optional[exp.Expression]:
3607        this = parse_method()
3608
3609        while self._match_set(expressions):
3610            this = self.expression(
3611                expressions[self._prev.token_type],
3612                this=this,
3613                comments=self._prev_comments,
3614                expression=parse_method(),
3615            )
3616
3617        return this
3618
3619    def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]:
3620        return self._parse_wrapped_csv(self._parse_id_var)
3621
3622    def _parse_wrapped_csv(
3623        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
3624    ) -> t.List[t.Optional[exp.Expression]]:
3625        return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep))
3626
3627    def _parse_wrapped(self, parse_method: t.Callable) -> t.Any:
3628        self._match_l_paren()
3629        parse_result = parse_method()
3630        self._match_r_paren()
3631        return parse_result
3632
3633    def _parse_select_or_expression(self) -> t.Optional[exp.Expression]:
3634        return self._parse_select() or self._parse_expression()
3635
3636    def _parse_ddl_select(self) -> t.Optional[exp.Expression]:
3637        return self._parse_set_operations(
3638            self._parse_select(nested=True, parse_subquery_alias=False)
3639        )
3640
3641    def _parse_transaction(self) -> exp.Expression:
3642        this = None
3643        if self._match_texts(self.TRANSACTION_KIND):
3644            this = self._prev.text
3645
3646        self._match_texts({"TRANSACTION", "WORK"})
3647
3648        modes = []
3649        while True:
3650            mode = []
3651            while self._match(TokenType.VAR):
3652                mode.append(self._prev.text)
3653
3654            if mode:
3655                modes.append(" ".join(mode))
3656            if not self._match(TokenType.COMMA):
3657                break
3658
3659        return self.expression(exp.Transaction, this=this, modes=modes)
3660
3661    def _parse_commit_or_rollback(self) -> exp.Expression:
3662        chain = None
3663        savepoint = None
3664        is_rollback = self._prev.token_type == TokenType.ROLLBACK
3665
3666        self._match_texts({"TRANSACTION", "WORK"})
3667
3668        if self._match_text_seq("TO"):
3669            self._match_text_seq("SAVEPOINT")
3670            savepoint = self._parse_id_var()
3671
3672        if self._match(TokenType.AND):
3673            chain = not self._match_text_seq("NO")
3674            self._match_text_seq("CHAIN")
3675
3676        if is_rollback:
3677            return self.expression(exp.Rollback, savepoint=savepoint)
3678        return self.expression(exp.Commit, chain=chain)
3679
3680    def _parse_add_column(self) -> t.Optional[exp.Expression]:
3681        if not self._match_text_seq("ADD"):
3682            return None
3683
3684        self._match(TokenType.COLUMN)
3685        exists_column = self._parse_exists(not_=True)
3686        expression = self._parse_column_def(self._parse_field(any_token=True))
3687
3688        if expression:
3689            expression.set("exists", exists_column)
3690
3691        return expression
3692
3693    def _parse_drop_column(self) -> t.Optional[exp.Expression]:
3694        return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN")
3695
3696    # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html
3697    def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression:
3698        return self.expression(
3699            exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists
3700        )
3701
3702    def _parse_add_constraint(self) -> t.Optional[exp.Expression]:
3703        this = None
3704        kind = self._prev.token_type
3705
3706        if kind == TokenType.CONSTRAINT:
3707            this = self._parse_id_var()
3708
3709            if self._match_text_seq("CHECK"):
3710                expression = self._parse_wrapped(self._parse_conjunction)
3711                enforced = self._match_text_seq("ENFORCED")
3712
3713                return self.expression(
3714                    exp.AddConstraint, this=this, expression=expression, enforced=enforced
3715                )
3716
3717        if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY):
3718            expression = self._parse_foreign_key()
3719        elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY):
3720            expression = self._parse_primary_key()
3721
3722        return self.expression(exp.AddConstraint, this=this, expression=expression)
3723
3724    def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]:
3725        index = self._index - 1
3726
3727        if self._match_set(self.ADD_CONSTRAINT_TOKENS):
3728            return self._parse_csv(self._parse_add_constraint)
3729
3730        self._retreat(index)
3731        return self._parse_csv(self._parse_add_column)
3732
3733    def _parse_alter_table_alter(self) -> exp.Expression:
3734        self._match(TokenType.COLUMN)
3735        column = self._parse_field(any_token=True)
3736
3737        if self._match_pair(TokenType.DROP, TokenType.DEFAULT):
3738            return self.expression(exp.AlterColumn, this=column, drop=True)
3739        if self._match_pair(TokenType.SET, TokenType.DEFAULT):
3740            return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction())
3741
3742        self._match_text_seq("SET", "DATA")
3743        return self.expression(
3744            exp.AlterColumn,
3745            this=column,
3746            dtype=self._match_text_seq("TYPE") and self._parse_types(),
3747            collate=self._match(TokenType.COLLATE) and self._parse_term(),
3748            using=self._match(TokenType.USING) and self._parse_conjunction(),
3749        )
3750
3751    def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]:
3752        index = self._index - 1
3753
3754        partition_exists = self._parse_exists()
3755        if self._match(TokenType.PARTITION, advance=False):
3756            return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists))
3757
3758        self._retreat(index)
3759        return self._parse_csv(self._parse_drop_column)
3760
3761    def _parse_alter_table_rename(self) -> exp.Expression:
3762        self._match_text_seq("TO")
3763        return self.expression(exp.RenameTable, this=self._parse_table(schema=True))
3764
3765    def _parse_alter(self) -> t.Optional[exp.Expression]:
3766        start = self._prev
3767
3768        if not self._match(TokenType.TABLE):
3769            return self._parse_as_command(start)
3770
3771        exists = self._parse_exists()
3772        this = self._parse_table(schema=True)
3773
3774        if self._next:
3775            self._advance()
3776        parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None
3777
3778        if parser:
3779            return self.expression(
3780                exp.AlterTable,
3781                this=this,
3782                exists=exists,
3783                actions=ensure_list(parser(self)),
3784            )
3785        return self._parse_as_command(start)
3786
3787    def _parse_show(self) -> t.Optional[exp.Expression]:
3788        parser = self._find_parser(self.SHOW_PARSERS, self._show_trie)  # type: ignore
3789        if parser:
3790            return parser(self)
3791        self._advance()
3792        return self.expression(exp.Show, this=self._prev.text.upper())
3793
3794    def _default_parse_set_item(self) -> exp.Expression:
3795        return self.expression(
3796            exp.SetItem,
3797            this=self._parse_statement(),
3798        )
3799
3800    def _parse_set_item(self) -> t.Optional[exp.Expression]:
3801        parser = self._find_parser(self.SET_PARSERS, self._set_trie)  # type: ignore
3802        return parser(self) if parser else self._default_parse_set_item()
3803
3804    def _parse_merge(self) -> exp.Expression:
3805        self._match(TokenType.INTO)
3806        target = self._parse_table()
3807
3808        self._match(TokenType.USING)
3809        using = self._parse_table()
3810
3811        self._match(TokenType.ON)
3812        on = self._parse_conjunction()
3813
3814        whens = []
3815        while self._match(TokenType.WHEN):
3816            matched = not self._match(TokenType.NOT)
3817            self._match_text_seq("MATCHED")
3818            source = (
3819                False
3820                if self._match_text_seq("BY", "TARGET")
3821                else self._match_text_seq("BY", "SOURCE")
3822            )
3823            condition = self._parse_conjunction() if self._match(TokenType.AND) else None
3824
3825            self._match(TokenType.THEN)
3826
3827            if self._match(TokenType.INSERT):
3828                _this = self._parse_star()
3829                if _this:
3830                    then = self.expression(exp.Insert, this=_this)
3831                else:
3832                    then = self.expression(
3833                        exp.Insert,
3834                        this=self._parse_value(),
3835                        expression=self._match(TokenType.VALUES) and self._parse_value(),
3836                    )
3837            elif self._match(TokenType.UPDATE):
3838                expressions = self._parse_star()
3839                if expressions:
3840                    then = self.expression(exp.Update, expressions=expressions)
3841                else:
3842                    then = self.expression(
3843                        exp.Update,
3844                        expressions=self._match(TokenType.SET)
3845                        and self._parse_csv(self._parse_equality),
3846                    )
3847            elif self._match(TokenType.DELETE):
3848                then = self.expression(exp.Var, this=self._prev.text)
3849            else:
3850                then = None
3851
3852            whens.append(
3853                self.expression(
3854                    exp.When,
3855                    matched=matched,
3856                    source=source,
3857                    condition=condition,
3858                    then=then,
3859                )
3860            )
3861
3862        return self.expression(
3863            exp.Merge,
3864            this=target,
3865            using=using,
3866            on=on,
3867            expressions=whens,
3868        )
3869
3870    def _parse_set(self) -> exp.Expression:
3871        return self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item))
3872
3873    def _parse_as_command(self, start: Token) -> exp.Command:
3874        while self._curr:
3875            self._advance()
3876        text = self._find_sql(start, self._prev)
3877        size = len(start.text)
3878        return exp.Command(this=text[:size], expression=text[size:])
3879
3880    def _find_parser(
3881        self, parsers: t.Dict[str, t.Callable], trie: t.Dict
3882    ) -> t.Optional[t.Callable]:
3883        index = self._index
3884        this = []
3885        while True:
3886            # The current token might be multiple words
3887            curr = self._curr.text.upper()
3888            key = curr.split(" ")
3889            this.append(curr)
3890            self._advance()
3891            result, trie = in_trie(trie, key)
3892            if result == 0:
3893                break
3894            if result == 2:
3895                subparser = parsers[" ".join(this)]
3896                return subparser
3897        self._retreat(index)
3898        return None
3899
3900    def _match(self, token_type, advance=True):
3901        if not self._curr:
3902            return None
3903
3904        if self._curr.token_type == token_type:
3905            if advance:
3906                self._advance()
3907            return True
3908
3909        return None
3910
3911    def _match_set(self, types, advance=True):
3912        if not self._curr:
3913            return None
3914
3915        if self._curr.token_type in types:
3916            if advance:
3917                self._advance()
3918            return True
3919
3920        return None
3921
3922    def _match_pair(self, token_type_a, token_type_b, advance=True):
3923        if not self._curr or not self._next:
3924            return None
3925
3926        if self._curr.token_type == token_type_a and self._next.token_type == token_type_b:
3927            if advance:
3928                self._advance(2)
3929            return True
3930
3931        return None
3932
3933    def _match_l_paren(self, expression=None):
3934        if not self._match(TokenType.L_PAREN):
3935            self.raise_error("Expecting (")
3936        if expression and self._prev_comments:
3937            expression.comments = self._prev_comments
3938
3939    def _match_r_paren(self, expression=None):
3940        if not self._match(TokenType.R_PAREN):
3941            self.raise_error("Expecting )")
3942        if expression and self._prev_comments:
3943            expression.comments = self._prev_comments
3944
3945    def _match_texts(self, texts, advance=True):
3946        if self._curr and self._curr.text.upper() in texts:
3947            if advance:
3948                self._advance()
3949            return True
3950        return False
3951
3952    def _match_text_seq(self, *texts, advance=True):
3953        index = self._index
3954        for text in texts:
3955            if self._curr and self._curr.text.upper() == text:
3956                self._advance()
3957            else:
3958                self._retreat(index)
3959                return False
3960
3961        if not advance:
3962            self._retreat(index)
3963
3964        return True
3965
3966    def _replace_columns_with_dots(self, this):
3967        if isinstance(this, exp.Dot):
3968            exp.replace_children(this, self._replace_columns_with_dots)
3969        elif isinstance(this, exp.Column):
3970            exp.replace_children(this, self._replace_columns_with_dots)
3971            table = this.args.get("table")
3972            this = (
3973                self.expression(exp.Dot, this=table, expression=this.this)
3974                if table
3975                else self.expression(exp.Var, this=this.name)
3976            )
3977        elif isinstance(this, exp.Identifier):
3978            this = self.expression(exp.Var, this=this.name)
3979        return this
3980
3981    def _replace_lambda(self, node, lambda_variables):
3982        if isinstance(node, exp.Column):
3983            if node.name in lambda_variables:
3984                return node.this
3985        return node

Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer and produces a parsed syntax tree.

Arguments:
  • error_level: the desired error level. Default: ErrorLevel.RAISE
  • error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
  • index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
  • alias_post_tablesample: If the table alias comes after tablesample. Default: False
  • max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
  • null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
Parser( error_level: Optional[sqlglot.errors.ErrorLevel] = None, error_message_context: int = 100, index_offset: int = 0, unnest_column_only: bool = False, alias_post_tablesample: bool = False, max_errors: int = 3, null_ordering: Optional[str] = None)
730    def __init__(
731        self,
732        error_level: t.Optional[ErrorLevel] = None,
733        error_message_context: int = 100,
734        index_offset: int = 0,
735        unnest_column_only: bool = False,
736        alias_post_tablesample: bool = False,
737        max_errors: int = 3,
738        null_ordering: t.Optional[str] = None,
739    ):
740        self.error_level = error_level or ErrorLevel.IMMEDIATE
741        self.error_message_context = error_message_context
742        self.index_offset = index_offset
743        self.unnest_column_only = unnest_column_only
744        self.alias_post_tablesample = alias_post_tablesample
745        self.max_errors = max_errors
746        self.null_ordering = null_ordering
747        self.reset()
def reset(self):
749    def reset(self):
750        self.sql = ""
751        self.errors = []
752        self._tokens = []
753        self._index = 0
754        self._curr = None
755        self._next = None
756        self._prev = None
757        self._prev_comments = None
def parse( self, raw_tokens: List[sqlglot.tokens.Token], sql: Optional[str] = None) -> List[Optional[sqlglot.expressions.Expression]]:
759    def parse(
760        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
761    ) -> t.List[t.Optional[exp.Expression]]:
762        """
763        Parses a list of tokens and returns a list of syntax trees, one tree
764        per parsed SQL statement.
765
766        Args:
767            raw_tokens: the list of tokens.
768            sql: the original SQL string, used to produce helpful debug messages.
769
770        Returns:
771            The list of syntax trees.
772        """
773        return self._parse(
774            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
775        )

Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.

Arguments:
  • raw_tokens: the list of tokens.
  • sql: the original SQL string, used to produce helpful debug messages.
Returns:

The list of syntax trees.

def parse_into( self, expression_types: Union[str, Type[sqlglot.expressions.Expression], Collection[Union[str, Type[sqlglot.expressions.Expression]]]], raw_tokens: List[sqlglot.tokens.Token], sql: Optional[str] = None) -> List[Optional[sqlglot.expressions.Expression]]:
777    def parse_into(
778        self,
779        expression_types: exp.IntoType,
780        raw_tokens: t.List[Token],
781        sql: t.Optional[str] = None,
782    ) -> t.List[t.Optional[exp.Expression]]:
783        """
784        Parses a list of tokens into a given Expression type. If a collection of Expression
785        types is given instead, this method will try to parse the token list into each one
786        of them, stopping at the first for which the parsing succeeds.
787
788        Args:
789            expression_types: the expression type(s) to try and parse the token list into.
790            raw_tokens: the list of tokens.
791            sql: the original SQL string, used to produce helpful debug messages.
792
793        Returns:
794            The target Expression.
795        """
796        errors = []
797        for expression_type in ensure_collection(expression_types):
798            parser = self.EXPRESSION_PARSERS.get(expression_type)
799            if not parser:
800                raise TypeError(f"No parser registered for {expression_type}")
801            try:
802                return self._parse(parser, raw_tokens, sql)
803            except ParseError as e:
804                e.errors[0]["into_expression"] = expression_type
805                errors.append(e)
806        raise ParseError(
807            f"Failed to parse into {expression_types}",
808            errors=merge_errors(errors),
809        ) from errors[-1]

Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.

Arguments:
  • expression_types: the expression type(s) to try and parse the token list into.
  • raw_tokens: the list of tokens.
  • sql: the original SQL string, used to produce helpful debug messages.
Returns:

The target Expression.

def check_errors(self) -> None:
845    def check_errors(self) -> None:
846        """
847        Logs or raises any found errors, depending on the chosen error level setting.
848        """
849        if self.error_level == ErrorLevel.WARN:
850            for error in self.errors:
851                logger.error(str(error))
852        elif self.error_level == ErrorLevel.RAISE and self.errors:
853            raise ParseError(
854                concat_messages(self.errors, self.max_errors),
855                errors=merge_errors(self.errors),
856            )

Logs or raises any found errors, depending on the chosen error level setting.

def raise_error(self, message: str, token: Optional[sqlglot.tokens.Token] = None) -> None:
858    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
859        """
860        Appends an error in the list of recorded errors or raises it, depending on the chosen
861        error level setting.
862        """
863        token = token or self._curr or self._prev or Token.string("")
864        start = self._find_token(token)
865        end = start + len(token.text)
866        start_context = self.sql[max(start - self.error_message_context, 0) : start]
867        highlight = self.sql[start:end]
868        end_context = self.sql[end : end + self.error_message_context]
869
870        error = ParseError.new(
871            f"{message}. Line {token.line}, Col: {token.col}.\n"
872            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
873            description=message,
874            line=token.line,
875            col=token.col,
876            start_context=start_context,
877            highlight=highlight,
878            end_context=end_context,
879        )
880
881        if self.error_level == ErrorLevel.IMMEDIATE:
882            raise error
883
884        self.errors.append(error)

Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.

def expression( self, exp_class: Type[sqlglot.expressions.Expression], comments: Optional[List[str]] = None, **kwargs) -> sqlglot.expressions.Expression:
886    def expression(
887        self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs
888    ) -> exp.Expression:
889        """
890        Creates a new, validated Expression.
891
892        Args:
893            exp_class: the expression class to instantiate.
894            comments: an optional list of comments to attach to the expression.
895            kwargs: the arguments to set for the expression along with their respective values.
896
897        Returns:
898            The target expression.
899        """
900        instance = exp_class(**kwargs)
901        if self._prev_comments:
902            instance.comments = self._prev_comments
903            self._prev_comments = None
904        if comments:
905            instance.comments = comments
906        self.validate_expression(instance)
907        return instance

Creates a new, validated Expression.

Arguments:
  • exp_class: the expression class to instantiate.
  • comments: an optional list of comments to attach to the expression.
  • kwargs: the arguments to set for the expression along with their respective values.
Returns:

The target expression.

def validate_expression( self, expression: sqlglot.expressions.Expression, args: Optional[List] = None) -> None:
909    def validate_expression(
910        self, expression: exp.Expression, args: t.Optional[t.List] = None
911    ) -> None:
912        """
913        Validates an already instantiated expression, making sure that all its mandatory arguments
914        are set.
915
916        Args:
917            expression: the expression to validate.
918            args: an optional list of items that was used to instantiate the expression, if it's a Func.
919        """
920        if self.error_level == ErrorLevel.IGNORE:
921            return
922
923        for error_message in expression.error_messages(args):
924            self.raise_error(error_message)

Validates an already instantiated expression, making sure that all its mandatory arguments are set.

Arguments:
  • expression: the expression to validate.
  • args: an optional list of items that was used to instantiate the expression, if it's a Func.