Edit on GitHub

sqlglot.parser

   1from __future__ import annotations
   2
   3import logging
   4import typing as t
   5from collections import defaultdict
   6
   7from sqlglot import exp
   8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors
   9from sqlglot.helper import apply_index_offset, ensure_collection, ensure_list, seq_get
  10from sqlglot.tokens import Token, Tokenizer, TokenType
  11from sqlglot.trie import in_trie, new_trie
  12
  13if t.TYPE_CHECKING:
  14    from sqlglot._typing import E
  15
  16logger = logging.getLogger("sqlglot")
  17
  18
  19def parse_var_map(args: t.List) -> exp.Expression:
  20    if len(args) == 1 and args[0].is_star:
  21        return exp.StarMap(this=args[0])
  22
  23    keys = []
  24    values = []
  25    for i in range(0, len(args), 2):
  26        keys.append(args[i])
  27        values.append(args[i + 1])
  28    return exp.VarMap(
  29        keys=exp.Array(expressions=keys),
  30        values=exp.Array(expressions=values),
  31    )
  32
  33
  34def parse_like(args: t.List) -> exp.Expression:
  35    like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0))
  36    return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like
  37
  38
  39def binary_range_parser(
  40    expr_type: t.Type[exp.Expression],
  41) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]:
  42    return lambda self, this: self._parse_escape(
  43        self.expression(expr_type, this=this, expression=self._parse_bitwise())
  44    )
  45
  46
  47class _Parser(type):
  48    def __new__(cls, clsname, bases, attrs):
  49        klass = super().__new__(cls, clsname, bases, attrs)
  50        klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS)
  51        klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS)
  52
  53        return klass
  54
  55
  56class Parser(metaclass=_Parser):
  57    """
  58    Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces
  59    a parsed syntax tree.
  60
  61    Args:
  62        error_level: the desired error level.
  63            Default: ErrorLevel.RAISE
  64        error_message_context: determines the amount of context to capture from a
  65            query string when displaying the error message (in number of characters).
  66            Default: 50.
  67        index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list.
  68            Default: 0
  69        alias_post_tablesample: If the table alias comes after tablesample.
  70            Default: False
  71        max_errors: Maximum number of error messages to include in a raised ParseError.
  72            This is only relevant if error_level is ErrorLevel.RAISE.
  73            Default: 3
  74        null_ordering: Indicates the default null ordering method to use if not explicitly set.
  75            Options are "nulls_are_small", "nulls_are_large", "nulls_are_last".
  76            Default: "nulls_are_small"
  77    """
  78
  79    FUNCTIONS: t.Dict[str, t.Callable] = {
  80        **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()},
  81        "DATE_TO_DATE_STR": lambda args: exp.Cast(
  82            this=seq_get(args, 0),
  83            to=exp.DataType(this=exp.DataType.Type.TEXT),
  84        ),
  85        "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)),
  86        "IFNULL": exp.Coalesce.from_arg_list,
  87        "LIKE": parse_like,
  88        "TIME_TO_TIME_STR": lambda args: exp.Cast(
  89            this=seq_get(args, 0),
  90            to=exp.DataType(this=exp.DataType.Type.TEXT),
  91        ),
  92        "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
  93            this=exp.Cast(
  94                this=seq_get(args, 0),
  95                to=exp.DataType(this=exp.DataType.Type.TEXT),
  96            ),
  97            start=exp.Literal.number(1),
  98            length=exp.Literal.number(10),
  99        ),
 100        "VAR_MAP": parse_var_map,
 101    }
 102
 103    NO_PAREN_FUNCTIONS = {
 104        TokenType.CURRENT_DATE: exp.CurrentDate,
 105        TokenType.CURRENT_DATETIME: exp.CurrentDate,
 106        TokenType.CURRENT_TIME: exp.CurrentTime,
 107        TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
 108        TokenType.CURRENT_USER: exp.CurrentUser,
 109    }
 110
 111    JOIN_HINTS: t.Set[str] = set()
 112
 113    NESTED_TYPE_TOKENS = {
 114        TokenType.ARRAY,
 115        TokenType.MAP,
 116        TokenType.NULLABLE,
 117        TokenType.STRUCT,
 118    }
 119
 120    TYPE_TOKENS = {
 121        TokenType.BIT,
 122        TokenType.BOOLEAN,
 123        TokenType.TINYINT,
 124        TokenType.UTINYINT,
 125        TokenType.SMALLINT,
 126        TokenType.USMALLINT,
 127        TokenType.INT,
 128        TokenType.UINT,
 129        TokenType.BIGINT,
 130        TokenType.UBIGINT,
 131        TokenType.INT128,
 132        TokenType.UINT128,
 133        TokenType.INT256,
 134        TokenType.UINT256,
 135        TokenType.FLOAT,
 136        TokenType.DOUBLE,
 137        TokenType.CHAR,
 138        TokenType.NCHAR,
 139        TokenType.VARCHAR,
 140        TokenType.NVARCHAR,
 141        TokenType.TEXT,
 142        TokenType.MEDIUMTEXT,
 143        TokenType.LONGTEXT,
 144        TokenType.MEDIUMBLOB,
 145        TokenType.LONGBLOB,
 146        TokenType.BINARY,
 147        TokenType.VARBINARY,
 148        TokenType.JSON,
 149        TokenType.JSONB,
 150        TokenType.INTERVAL,
 151        TokenType.TIME,
 152        TokenType.TIMESTAMP,
 153        TokenType.TIMESTAMPTZ,
 154        TokenType.TIMESTAMPLTZ,
 155        TokenType.DATETIME,
 156        TokenType.DATETIME64,
 157        TokenType.DATE,
 158        TokenType.DECIMAL,
 159        TokenType.BIGDECIMAL,
 160        TokenType.UUID,
 161        TokenType.GEOGRAPHY,
 162        TokenType.GEOMETRY,
 163        TokenType.HLLSKETCH,
 164        TokenType.HSTORE,
 165        TokenType.PSEUDO_TYPE,
 166        TokenType.SUPER,
 167        TokenType.SERIAL,
 168        TokenType.SMALLSERIAL,
 169        TokenType.BIGSERIAL,
 170        TokenType.XML,
 171        TokenType.UNIQUEIDENTIFIER,
 172        TokenType.MONEY,
 173        TokenType.SMALLMONEY,
 174        TokenType.ROWVERSION,
 175        TokenType.IMAGE,
 176        TokenType.VARIANT,
 177        TokenType.OBJECT,
 178        TokenType.INET,
 179        *NESTED_TYPE_TOKENS,
 180    }
 181
 182    SUBQUERY_PREDICATES = {
 183        TokenType.ANY: exp.Any,
 184        TokenType.ALL: exp.All,
 185        TokenType.EXISTS: exp.Exists,
 186        TokenType.SOME: exp.Any,
 187    }
 188
 189    RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT}
 190
 191    DB_CREATABLES = {
 192        TokenType.DATABASE,
 193        TokenType.SCHEMA,
 194        TokenType.TABLE,
 195        TokenType.VIEW,
 196    }
 197
 198    CREATABLES = {
 199        TokenType.COLUMN,
 200        TokenType.FUNCTION,
 201        TokenType.INDEX,
 202        TokenType.PROCEDURE,
 203        *DB_CREATABLES,
 204    }
 205
 206    ID_VAR_TOKENS = {
 207        TokenType.VAR,
 208        TokenType.ANTI,
 209        TokenType.APPLY,
 210        TokenType.ASC,
 211        TokenType.AUTO_INCREMENT,
 212        TokenType.BEGIN,
 213        TokenType.CACHE,
 214        TokenType.COLLATE,
 215        TokenType.COMMAND,
 216        TokenType.COMMENT,
 217        TokenType.COMMIT,
 218        TokenType.CONSTRAINT,
 219        TokenType.DEFAULT,
 220        TokenType.DELETE,
 221        TokenType.DESC,
 222        TokenType.DESCRIBE,
 223        TokenType.DIV,
 224        TokenType.END,
 225        TokenType.EXECUTE,
 226        TokenType.ESCAPE,
 227        TokenType.FALSE,
 228        TokenType.FIRST,
 229        TokenType.FILTER,
 230        TokenType.FORMAT,
 231        TokenType.FULL,
 232        TokenType.IF,
 233        TokenType.IS,
 234        TokenType.ISNULL,
 235        TokenType.INTERVAL,
 236        TokenType.KEEP,
 237        TokenType.LEFT,
 238        TokenType.LOAD,
 239        TokenType.MERGE,
 240        TokenType.NATURAL,
 241        TokenType.NEXT,
 242        TokenType.OFFSET,
 243        TokenType.ORDINALITY,
 244        TokenType.OVERWRITE,
 245        TokenType.PARTITION,
 246        TokenType.PERCENT,
 247        TokenType.PIVOT,
 248        TokenType.PRAGMA,
 249        TokenType.RANGE,
 250        TokenType.REFERENCES,
 251        TokenType.RIGHT,
 252        TokenType.ROW,
 253        TokenType.ROWS,
 254        TokenType.SEMI,
 255        TokenType.SET,
 256        TokenType.SETTINGS,
 257        TokenType.SHOW,
 258        TokenType.TEMPORARY,
 259        TokenType.TOP,
 260        TokenType.TRUE,
 261        TokenType.UNIQUE,
 262        TokenType.UNPIVOT,
 263        TokenType.VOLATILE,
 264        TokenType.WINDOW,
 265        *CREATABLES,
 266        *SUBQUERY_PREDICATES,
 267        *TYPE_TOKENS,
 268        *NO_PAREN_FUNCTIONS,
 269    }
 270
 271    INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END}
 272
 273    TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {
 274        TokenType.APPLY,
 275        TokenType.FULL,
 276        TokenType.LEFT,
 277        TokenType.LOCK,
 278        TokenType.NATURAL,
 279        TokenType.OFFSET,
 280        TokenType.RIGHT,
 281        TokenType.WINDOW,
 282    }
 283
 284    COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS}
 285
 286    UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
 287
 288    TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"}
 289
 290    FUNC_TOKENS = {
 291        TokenType.COMMAND,
 292        TokenType.CURRENT_DATE,
 293        TokenType.CURRENT_DATETIME,
 294        TokenType.CURRENT_TIMESTAMP,
 295        TokenType.CURRENT_TIME,
 296        TokenType.CURRENT_USER,
 297        TokenType.FILTER,
 298        TokenType.FIRST,
 299        TokenType.FORMAT,
 300        TokenType.GLOB,
 301        TokenType.IDENTIFIER,
 302        TokenType.INDEX,
 303        TokenType.ISNULL,
 304        TokenType.ILIKE,
 305        TokenType.LIKE,
 306        TokenType.MERGE,
 307        TokenType.OFFSET,
 308        TokenType.PRIMARY_KEY,
 309        TokenType.RANGE,
 310        TokenType.REPLACE,
 311        TokenType.ROW,
 312        TokenType.UNNEST,
 313        TokenType.VAR,
 314        TokenType.LEFT,
 315        TokenType.RIGHT,
 316        TokenType.DATE,
 317        TokenType.DATETIME,
 318        TokenType.TABLE,
 319        TokenType.TIMESTAMP,
 320        TokenType.TIMESTAMPTZ,
 321        TokenType.WINDOW,
 322        *TYPE_TOKENS,
 323        *SUBQUERY_PREDICATES,
 324    }
 325
 326    CONJUNCTION = {
 327        TokenType.AND: exp.And,
 328        TokenType.OR: exp.Or,
 329    }
 330
 331    EQUALITY = {
 332        TokenType.EQ: exp.EQ,
 333        TokenType.NEQ: exp.NEQ,
 334        TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
 335    }
 336
 337    COMPARISON = {
 338        TokenType.GT: exp.GT,
 339        TokenType.GTE: exp.GTE,
 340        TokenType.LT: exp.LT,
 341        TokenType.LTE: exp.LTE,
 342    }
 343
 344    BITWISE = {
 345        TokenType.AMP: exp.BitwiseAnd,
 346        TokenType.CARET: exp.BitwiseXor,
 347        TokenType.PIPE: exp.BitwiseOr,
 348        TokenType.DPIPE: exp.DPipe,
 349    }
 350
 351    TERM = {
 352        TokenType.DASH: exp.Sub,
 353        TokenType.PLUS: exp.Add,
 354        TokenType.MOD: exp.Mod,
 355        TokenType.COLLATE: exp.Collate,
 356    }
 357
 358    FACTOR = {
 359        TokenType.DIV: exp.IntDiv,
 360        TokenType.LR_ARROW: exp.Distance,
 361        TokenType.SLASH: exp.Div,
 362        TokenType.STAR: exp.Mul,
 363    }
 364
 365    TIMESTAMPS = {
 366        TokenType.TIME,
 367        TokenType.TIMESTAMP,
 368        TokenType.TIMESTAMPTZ,
 369        TokenType.TIMESTAMPLTZ,
 370    }
 371
 372    SET_OPERATIONS = {
 373        TokenType.UNION,
 374        TokenType.INTERSECT,
 375        TokenType.EXCEPT,
 376    }
 377
 378    JOIN_SIDES = {
 379        TokenType.LEFT,
 380        TokenType.RIGHT,
 381        TokenType.FULL,
 382    }
 383
 384    JOIN_KINDS = {
 385        TokenType.INNER,
 386        TokenType.OUTER,
 387        TokenType.CROSS,
 388        TokenType.SEMI,
 389        TokenType.ANTI,
 390    }
 391
 392    LAMBDAS = {
 393        TokenType.ARROW: lambda self, expressions: self.expression(
 394            exp.Lambda,
 395            this=self._replace_lambda(
 396                self._parse_conjunction(),
 397                {node.name for node in expressions},
 398            ),
 399            expressions=expressions,
 400        ),
 401        TokenType.FARROW: lambda self, expressions: self.expression(
 402            exp.Kwarg,
 403            this=exp.Var(this=expressions[0].name),
 404            expression=self._parse_conjunction(),
 405        ),
 406    }
 407
 408    COLUMN_OPERATORS = {
 409        TokenType.DOT: None,
 410        TokenType.DCOLON: lambda self, this, to: self.expression(
 411            exp.Cast if self.STRICT_CAST else exp.TryCast,
 412            this=this,
 413            to=to,
 414        ),
 415        TokenType.ARROW: lambda self, this, path: self.expression(
 416            exp.JSONExtract,
 417            this=this,
 418            expression=path,
 419        ),
 420        TokenType.DARROW: lambda self, this, path: self.expression(
 421            exp.JSONExtractScalar,
 422            this=this,
 423            expression=path,
 424        ),
 425        TokenType.HASH_ARROW: lambda self, this, path: self.expression(
 426            exp.JSONBExtract,
 427            this=this,
 428            expression=path,
 429        ),
 430        TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
 431            exp.JSONBExtractScalar,
 432            this=this,
 433            expression=path,
 434        ),
 435        TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
 436            exp.JSONBContains,
 437            this=this,
 438            expression=key,
 439        ),
 440    }
 441
 442    EXPRESSION_PARSERS = {
 443        exp.Column: lambda self: self._parse_column(),
 444        exp.DataType: lambda self: self._parse_types(),
 445        exp.From: lambda self: self._parse_from(),
 446        exp.Group: lambda self: self._parse_group(),
 447        exp.Identifier: lambda self: self._parse_id_var(),
 448        exp.Lateral: lambda self: self._parse_lateral(),
 449        exp.Join: lambda self: self._parse_join(),
 450        exp.Order: lambda self: self._parse_order(),
 451        exp.Cluster: lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"),
 452        exp.Sort: lambda self: self._parse_sort(exp.Sort, "SORT", "BY"),
 453        exp.Lambda: lambda self: self._parse_lambda(),
 454        exp.Limit: lambda self: self._parse_limit(),
 455        exp.Offset: lambda self: self._parse_offset(),
 456        exp.TableAlias: lambda self: self._parse_table_alias(),
 457        exp.Table: lambda self: self._parse_table_parts(),
 458        exp.Condition: lambda self: self._parse_conjunction(),
 459        exp.Expression: lambda self: self._parse_statement(),
 460        exp.Properties: lambda self: self._parse_properties(),
 461        exp.Where: lambda self: self._parse_where(),
 462        exp.Ordered: lambda self: self._parse_ordered(),
 463        exp.Having: lambda self: self._parse_having(),
 464        exp.With: lambda self: self._parse_with(),
 465        exp.Window: lambda self: self._parse_named_window(),
 466        exp.Qualify: lambda self: self._parse_qualify(),
 467        exp.Returning: lambda self: self._parse_returning(),
 468        "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(),
 469    }
 470
 471    STATEMENT_PARSERS = {
 472        TokenType.ALTER: lambda self: self._parse_alter(),
 473        TokenType.BEGIN: lambda self: self._parse_transaction(),
 474        TokenType.CACHE: lambda self: self._parse_cache(),
 475        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
 476        TokenType.COMMENT: lambda self: self._parse_comment(),
 477        TokenType.CREATE: lambda self: self._parse_create(),
 478        TokenType.DELETE: lambda self: self._parse_delete(),
 479        TokenType.DESC: lambda self: self._parse_describe(),
 480        TokenType.DESCRIBE: lambda self: self._parse_describe(),
 481        TokenType.DROP: lambda self: self._parse_drop(),
 482        TokenType.END: lambda self: self._parse_commit_or_rollback(),
 483        TokenType.INSERT: lambda self: self._parse_insert(),
 484        TokenType.LOAD: lambda self: self._parse_load(),
 485        TokenType.MERGE: lambda self: self._parse_merge(),
 486        TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()),
 487        TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
 488        TokenType.SET: lambda self: self._parse_set(),
 489        TokenType.UNCACHE: lambda self: self._parse_uncache(),
 490        TokenType.UPDATE: lambda self: self._parse_update(),
 491        TokenType.USE: lambda self: self.expression(
 492            exp.Use,
 493            kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"))
 494            and exp.Var(this=self._prev.text),
 495            this=self._parse_table(schema=False),
 496        ),
 497    }
 498
 499    UNARY_PARSERS = {
 500        TokenType.PLUS: lambda self: self._parse_unary(),  # Unary + is handled as a no-op
 501        TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()),
 502        TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()),
 503        TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()),
 504    }
 505
 506    PRIMARY_PARSERS = {
 507        TokenType.STRING: lambda self, token: self.expression(
 508            exp.Literal, this=token.text, is_string=True
 509        ),
 510        TokenType.NUMBER: lambda self, token: self.expression(
 511            exp.Literal, this=token.text, is_string=False
 512        ),
 513        TokenType.STAR: lambda self, _: self.expression(
 514            exp.Star,
 515            **{"except": self._parse_except(), "replace": self._parse_replace()},
 516        ),
 517        TokenType.NULL: lambda self, _: self.expression(exp.Null),
 518        TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True),
 519        TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False),
 520        TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text),
 521        TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text),
 522        TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text),
 523        TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
 524        TokenType.NATIONAL_STRING: lambda self, token: self.expression(
 525            exp.National, this=token.text
 526        ),
 527        TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text),
 528        TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
 529    }
 530
 531    PLACEHOLDER_PARSERS = {
 532        TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
 533        TokenType.PARAMETER: lambda self: self._parse_parameter(),
 534        TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text)
 535        if self._match_set((TokenType.NUMBER, TokenType.VAR))
 536        else None,
 537    }
 538
 539    RANGE_PARSERS = {
 540        TokenType.BETWEEN: lambda self, this: self._parse_between(this),
 541        TokenType.GLOB: binary_range_parser(exp.Glob),
 542        TokenType.ILIKE: binary_range_parser(exp.ILike),
 543        TokenType.IN: lambda self, this: self._parse_in(this),
 544        TokenType.IRLIKE: binary_range_parser(exp.RegexpILike),
 545        TokenType.IS: lambda self, this: self._parse_is(this),
 546        TokenType.LIKE: binary_range_parser(exp.Like),
 547        TokenType.OVERLAPS: binary_range_parser(exp.Overlaps),
 548        TokenType.RLIKE: binary_range_parser(exp.RegexpLike),
 549        TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo),
 550    }
 551
 552    PROPERTY_PARSERS: t.Dict[str, t.Callable] = {
 553        "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
 554        "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
 555        "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
 556        "CHARACTER SET": lambda self: self._parse_character_set(),
 557        "CHECKSUM": lambda self: self._parse_checksum(),
 558        "CLUSTER": lambda self: self._parse_cluster(),
 559        "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty),
 560        "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
 561        "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs),
 562        "DEFINER": lambda self: self._parse_definer(),
 563        "DETERMINISTIC": lambda self: self.expression(
 564            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 565        ),
 566        "DISTKEY": lambda self: self._parse_distkey(),
 567        "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
 568        "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty),
 569        "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
 570        "EXTERNAL": lambda self: self.expression(exp.ExternalProperty),
 571        "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs),
 572        "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 573        "FREESPACE": lambda self: self._parse_freespace(),
 574        "IMMUTABLE": lambda self: self.expression(
 575            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 576        ),
 577        "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs),
 578        "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
 579        "LIKE": lambda self: self._parse_create_like(),
 580        "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
 581        "LOCK": lambda self: self._parse_locking(),
 582        "LOCKING": lambda self: self._parse_locking(),
 583        "LOG": lambda self, **kwargs: self._parse_log(**kwargs),
 584        "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty),
 585        "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs),
 586        "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True),
 587        "NO": lambda self: self._parse_no_property(),
 588        "ON": lambda self: self._parse_on_property(),
 589        "ORDER BY": lambda self: self._parse_order(skip_order_token=True),
 590        "PARTITION BY": lambda self: self._parse_partitioned_by(),
 591        "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
 592        "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
 593        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 594        "RETURNS": lambda self: self._parse_returns(),
 595        "ROW": lambda self: self._parse_row(),
 596        "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty),
 597        "SET": lambda self: self.expression(exp.SetProperty, multi=False),
 598        "SETTINGS": lambda self: self.expression(
 599            exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item)
 600        ),
 601        "SORTKEY": lambda self: self._parse_sortkey(),
 602        "STABLE": lambda self: self.expression(
 603            exp.StabilityProperty, this=exp.Literal.string("STABLE")
 604        ),
 605        "STORED": lambda self: self._parse_stored(),
 606        "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property),
 607        "TEMP": lambda self: self.expression(exp.TemporaryProperty),
 608        "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty),
 609        "TRANSIENT": lambda self: self.expression(exp.TransientProperty),
 610        "TTL": lambda self: self._parse_ttl(),
 611        "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 612        "VOLATILE": lambda self: self._parse_volatile_property(),
 613        "WITH": lambda self: self._parse_with_property(),
 614    }
 615
 616    CONSTRAINT_PARSERS = {
 617        "AUTOINCREMENT": lambda self: self._parse_auto_increment(),
 618        "AUTO_INCREMENT": lambda self: self._parse_auto_increment(),
 619        "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False),
 620        "CHARACTER SET": lambda self: self.expression(
 621            exp.CharacterSetColumnConstraint, this=self._parse_var_or_string()
 622        ),
 623        "CHECK": lambda self: self.expression(
 624            exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction)
 625        ),
 626        "COLLATE": lambda self: self.expression(
 627            exp.CollateColumnConstraint, this=self._parse_var()
 628        ),
 629        "COMMENT": lambda self: self.expression(
 630            exp.CommentColumnConstraint, this=self._parse_string()
 631        ),
 632        "COMPRESS": lambda self: self._parse_compress(),
 633        "DEFAULT": lambda self: self.expression(
 634            exp.DefaultColumnConstraint, this=self._parse_bitwise()
 635        ),
 636        "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()),
 637        "FOREIGN KEY": lambda self: self._parse_foreign_key(),
 638        "FORMAT": lambda self: self.expression(
 639            exp.DateFormatColumnConstraint, this=self._parse_var_or_string()
 640        ),
 641        "GENERATED": lambda self: self._parse_generated_as_identity(),
 642        "IDENTITY": lambda self: self._parse_auto_increment(),
 643        "INLINE": lambda self: self._parse_inline(),
 644        "LIKE": lambda self: self._parse_create_like(),
 645        "NOT": lambda self: self._parse_not_constraint(),
 646        "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True),
 647        "ON": lambda self: self._match(TokenType.UPDATE)
 648        and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()),
 649        "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()),
 650        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 651        "REFERENCES": lambda self: self._parse_references(match=False),
 652        "TITLE": lambda self: self.expression(
 653            exp.TitleColumnConstraint, this=self._parse_var_or_string()
 654        ),
 655        "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]),
 656        "UNIQUE": lambda self: self._parse_unique(),
 657        "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint),
 658    }
 659
 660    ALTER_PARSERS = {
 661        "ADD": lambda self: self._parse_alter_table_add(),
 662        "ALTER": lambda self: self._parse_alter_table_alter(),
 663        "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()),
 664        "DROP": lambda self: self._parse_alter_table_drop(),
 665        "RENAME": lambda self: self._parse_alter_table_rename(),
 666    }
 667
 668    SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"}
 669
 670    NO_PAREN_FUNCTION_PARSERS = {
 671        TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()),
 672        TokenType.CASE: lambda self: self._parse_case(),
 673        TokenType.IF: lambda self: self._parse_if(),
 674        TokenType.NEXT_VALUE_FOR: lambda self: self.expression(
 675            exp.NextValueFor,
 676            this=self._parse_column(),
 677            order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order),
 678        ),
 679    }
 680
 681    FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"}
 682
 683    FUNCTION_PARSERS: t.Dict[str, t.Callable] = {
 684        "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
 685        "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
 686        "DECODE": lambda self: self._parse_decode(),
 687        "EXTRACT": lambda self: self._parse_extract(),
 688        "JSON_OBJECT": lambda self: self._parse_json_object(),
 689        "LOG": lambda self: self._parse_logarithm(),
 690        "MATCH": lambda self: self._parse_match_against(),
 691        "OPENJSON": lambda self: self._parse_open_json(),
 692        "POSITION": lambda self: self._parse_position(),
 693        "SAFE_CAST": lambda self: self._parse_cast(False),
 694        "STRING_AGG": lambda self: self._parse_string_agg(),
 695        "SUBSTRING": lambda self: self._parse_substring(),
 696        "TRIM": lambda self: self._parse_trim(),
 697        "TRY_CAST": lambda self: self._parse_cast(False),
 698        "TRY_CONVERT": lambda self: self._parse_convert(False),
 699    }
 700
 701    QUERY_MODIFIER_PARSERS = {
 702        "joins": lambda self: list(iter(self._parse_join, None)),
 703        "laterals": lambda self: list(iter(self._parse_lateral, None)),
 704        "match": lambda self: self._parse_match_recognize(),
 705        "where": lambda self: self._parse_where(),
 706        "group": lambda self: self._parse_group(),
 707        "having": lambda self: self._parse_having(),
 708        "qualify": lambda self: self._parse_qualify(),
 709        "windows": lambda self: self._parse_window_clause(),
 710        "order": lambda self: self._parse_order(),
 711        "limit": lambda self: self._parse_limit(),
 712        "offset": lambda self: self._parse_offset(),
 713        "locks": lambda self: self._parse_locks(),
 714        "sample": lambda self: self._parse_table_sample(as_modifier=True),
 715    }
 716
 717    SET_PARSERS = {
 718        "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"),
 719        "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"),
 720        "SESSION": lambda self: self._parse_set_item_assignment("SESSION"),
 721        "TRANSACTION": lambda self: self._parse_set_transaction(),
 722    }
 723
 724    SHOW_PARSERS: t.Dict[str, t.Callable] = {}
 725
 726    TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {}
 727
 728    MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
 729
 730    TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
 731
 732    TRANSACTION_CHARACTERISTICS = {
 733        "ISOLATION LEVEL REPEATABLE READ",
 734        "ISOLATION LEVEL READ COMMITTED",
 735        "ISOLATION LEVEL READ UNCOMMITTED",
 736        "ISOLATION LEVEL SERIALIZABLE",
 737        "READ WRITE",
 738        "READ ONLY",
 739    }
 740
 741    INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
 742
 743    CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"}
 744
 745    WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
 746    WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER}
 747
 748    ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY}
 749
 750    STRICT_CAST = True
 751
 752    CONVERT_TYPE_FIRST = False
 753
 754    PREFIXED_PIVOT_COLUMNS = False
 755    IDENTIFY_PIVOT_STRINGS = False
 756
 757    LOG_BASE_FIRST = True
 758    LOG_DEFAULTS_TO_LN = False
 759
 760    __slots__ = (
 761        "error_level",
 762        "error_message_context",
 763        "sql",
 764        "errors",
 765        "index_offset",
 766        "unnest_column_only",
 767        "alias_post_tablesample",
 768        "max_errors",
 769        "null_ordering",
 770        "_tokens",
 771        "_index",
 772        "_curr",
 773        "_next",
 774        "_prev",
 775        "_prev_comments",
 776        "_show_trie",
 777        "_set_trie",
 778    )
 779
 780    def __init__(
 781        self,
 782        error_level: t.Optional[ErrorLevel] = None,
 783        error_message_context: int = 100,
 784        index_offset: int = 0,
 785        unnest_column_only: bool = False,
 786        alias_post_tablesample: bool = False,
 787        max_errors: int = 3,
 788        null_ordering: t.Optional[str] = None,
 789    ):
 790        self.error_level = error_level or ErrorLevel.IMMEDIATE
 791        self.error_message_context = error_message_context
 792        self.index_offset = index_offset
 793        self.unnest_column_only = unnest_column_only
 794        self.alias_post_tablesample = alias_post_tablesample
 795        self.max_errors = max_errors
 796        self.null_ordering = null_ordering
 797        self.reset()
 798
 799    def reset(self):
 800        self.sql = ""
 801        self.errors = []
 802        self._tokens = []
 803        self._index = 0
 804        self._curr = None
 805        self._next = None
 806        self._prev = None
 807        self._prev_comments = None
 808
 809    def parse(
 810        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
 811    ) -> t.List[t.Optional[exp.Expression]]:
 812        """
 813        Parses a list of tokens and returns a list of syntax trees, one tree
 814        per parsed SQL statement.
 815
 816        Args:
 817            raw_tokens: the list of tokens.
 818            sql: the original SQL string, used to produce helpful debug messages.
 819
 820        Returns:
 821            The list of syntax trees.
 822        """
 823        return self._parse(
 824            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
 825        )
 826
 827    def parse_into(
 828        self,
 829        expression_types: exp.IntoType,
 830        raw_tokens: t.List[Token],
 831        sql: t.Optional[str] = None,
 832    ) -> t.List[t.Optional[exp.Expression]]:
 833        """
 834        Parses a list of tokens into a given Expression type. If a collection of Expression
 835        types is given instead, this method will try to parse the token list into each one
 836        of them, stopping at the first for which the parsing succeeds.
 837
 838        Args:
 839            expression_types: the expression type(s) to try and parse the token list into.
 840            raw_tokens: the list of tokens.
 841            sql: the original SQL string, used to produce helpful debug messages.
 842
 843        Returns:
 844            The target Expression.
 845        """
 846        errors = []
 847        for expression_type in ensure_collection(expression_types):
 848            parser = self.EXPRESSION_PARSERS.get(expression_type)
 849            if not parser:
 850                raise TypeError(f"No parser registered for {expression_type}")
 851            try:
 852                return self._parse(parser, raw_tokens, sql)
 853            except ParseError as e:
 854                e.errors[0]["into_expression"] = expression_type
 855                errors.append(e)
 856        raise ParseError(
 857            f"Failed to parse into {expression_types}",
 858            errors=merge_errors(errors),
 859        ) from errors[-1]
 860
 861    def _parse(
 862        self,
 863        parse_method: t.Callable[[Parser], t.Optional[exp.Expression]],
 864        raw_tokens: t.List[Token],
 865        sql: t.Optional[str] = None,
 866    ) -> t.List[t.Optional[exp.Expression]]:
 867        self.reset()
 868        self.sql = sql or ""
 869        total = len(raw_tokens)
 870        chunks: t.List[t.List[Token]] = [[]]
 871
 872        for i, token in enumerate(raw_tokens):
 873            if token.token_type == TokenType.SEMICOLON:
 874                if i < total - 1:
 875                    chunks.append([])
 876            else:
 877                chunks[-1].append(token)
 878
 879        expressions = []
 880
 881        for tokens in chunks:
 882            self._index = -1
 883            self._tokens = tokens
 884            self._advance()
 885
 886            expressions.append(parse_method(self))
 887
 888            if self._index < len(self._tokens):
 889                self.raise_error("Invalid expression / Unexpected token")
 890
 891            self.check_errors()
 892
 893        return expressions
 894
 895    def check_errors(self) -> None:
 896        """
 897        Logs or raises any found errors, depending on the chosen error level setting.
 898        """
 899        if self.error_level == ErrorLevel.WARN:
 900            for error in self.errors:
 901                logger.error(str(error))
 902        elif self.error_level == ErrorLevel.RAISE and self.errors:
 903            raise ParseError(
 904                concat_messages(self.errors, self.max_errors),
 905                errors=merge_errors(self.errors),
 906            )
 907
 908    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
 909        """
 910        Appends an error in the list of recorded errors or raises it, depending on the chosen
 911        error level setting.
 912        """
 913        token = token or self._curr or self._prev or Token.string("")
 914        start = token.start
 915        end = token.end + 1
 916        start_context = self.sql[max(start - self.error_message_context, 0) : start]
 917        highlight = self.sql[start:end]
 918        end_context = self.sql[end : end + self.error_message_context]
 919
 920        error = ParseError.new(
 921            f"{message}. Line {token.line}, Col: {token.col}.\n"
 922            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
 923            description=message,
 924            line=token.line,
 925            col=token.col,
 926            start_context=start_context,
 927            highlight=highlight,
 928            end_context=end_context,
 929        )
 930
 931        if self.error_level == ErrorLevel.IMMEDIATE:
 932            raise error
 933
 934        self.errors.append(error)
 935
 936    def expression(
 937        self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs
 938    ) -> E:
 939        """
 940        Creates a new, validated Expression.
 941
 942        Args:
 943            exp_class: the expression class to instantiate.
 944            comments: an optional list of comments to attach to the expression.
 945            kwargs: the arguments to set for the expression along with their respective values.
 946
 947        Returns:
 948            The target expression.
 949        """
 950        instance = exp_class(**kwargs)
 951        instance.add_comments(comments) if comments else self._add_comments(instance)
 952        self.validate_expression(instance)
 953        return instance
 954
 955    def _add_comments(self, expression: t.Optional[exp.Expression]) -> None:
 956        if expression and self._prev_comments:
 957            expression.add_comments(self._prev_comments)
 958            self._prev_comments = None
 959
 960    def validate_expression(
 961        self, expression: exp.Expression, args: t.Optional[t.List] = None
 962    ) -> None:
 963        """
 964        Validates an already instantiated expression, making sure that all its mandatory arguments
 965        are set.
 966
 967        Args:
 968            expression: the expression to validate.
 969            args: an optional list of items that was used to instantiate the expression, if it's a Func.
 970        """
 971        if self.error_level == ErrorLevel.IGNORE:
 972            return
 973
 974        for error_message in expression.error_messages(args):
 975            self.raise_error(error_message)
 976
 977    def _find_sql(self, start: Token, end: Token) -> str:
 978        return self.sql[start.start : end.end + 1]
 979
 980    def _advance(self, times: int = 1) -> None:
 981        self._index += times
 982        self._curr = seq_get(self._tokens, self._index)
 983        self._next = seq_get(self._tokens, self._index + 1)
 984        if self._index > 0:
 985            self._prev = self._tokens[self._index - 1]
 986            self._prev_comments = self._prev.comments
 987        else:
 988            self._prev = None
 989            self._prev_comments = None
 990
 991    def _retreat(self, index: int) -> None:
 992        if index != self._index:
 993            self._advance(index - self._index)
 994
 995    def _parse_command(self) -> exp.Command:
 996        return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string())
 997
 998    def _parse_comment(self, allow_exists: bool = True) -> exp.Expression:
 999        start = self._prev
1000        exists = self._parse_exists() if allow_exists else None
1001
1002        self._match(TokenType.ON)
1003
1004        kind = self._match_set(self.CREATABLES) and self._prev
1005
1006        if not kind:
1007            return self._parse_as_command(start)
1008
1009        if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1010            this = self._parse_user_defined_function(kind=kind.token_type)
1011        elif kind.token_type == TokenType.TABLE:
1012            this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS)
1013        elif kind.token_type == TokenType.COLUMN:
1014            this = self._parse_column()
1015        else:
1016            this = self._parse_id_var()
1017
1018        self._match(TokenType.IS)
1019
1020        return self.expression(
1021            exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists
1022        )
1023
1024    # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl
1025    def _parse_ttl(self) -> exp.Expression:
1026        def _parse_ttl_action() -> t.Optional[exp.Expression]:
1027            this = self._parse_bitwise()
1028
1029            if self._match_text_seq("DELETE"):
1030                return self.expression(exp.MergeTreeTTLAction, this=this, delete=True)
1031            if self._match_text_seq("RECOMPRESS"):
1032                return self.expression(
1033                    exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise()
1034                )
1035            if self._match_text_seq("TO", "DISK"):
1036                return self.expression(
1037                    exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string()
1038                )
1039            if self._match_text_seq("TO", "VOLUME"):
1040                return self.expression(
1041                    exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string()
1042                )
1043
1044            return this
1045
1046        expressions = self._parse_csv(_parse_ttl_action)
1047        where = self._parse_where()
1048        group = self._parse_group()
1049
1050        aggregates = None
1051        if group and self._match(TokenType.SET):
1052            aggregates = self._parse_csv(self._parse_set_item)
1053
1054        return self.expression(
1055            exp.MergeTreeTTL,
1056            expressions=expressions,
1057            where=where,
1058            group=group,
1059            aggregates=aggregates,
1060        )
1061
1062    def _parse_statement(self) -> t.Optional[exp.Expression]:
1063        if self._curr is None:
1064            return None
1065
1066        if self._match_set(self.STATEMENT_PARSERS):
1067            return self.STATEMENT_PARSERS[self._prev.token_type](self)
1068
1069        if self._match_set(Tokenizer.COMMANDS):
1070            return self._parse_command()
1071
1072        expression = self._parse_expression()
1073        expression = self._parse_set_operations(expression) if expression else self._parse_select()
1074        return self._parse_query_modifiers(expression)
1075
1076    def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]:
1077        start = self._prev
1078        temporary = self._match(TokenType.TEMPORARY)
1079        materialized = self._match_text_seq("MATERIALIZED")
1080        kind = self._match_set(self.CREATABLES) and self._prev.text
1081        if not kind:
1082            return self._parse_as_command(start)
1083
1084        return self.expression(
1085            exp.Drop,
1086            exists=self._parse_exists(),
1087            this=self._parse_table(schema=True),
1088            kind=kind,
1089            temporary=temporary,
1090            materialized=materialized,
1091            cascade=self._match_text_seq("CASCADE"),
1092            constraints=self._match_text_seq("CONSTRAINTS"),
1093            purge=self._match_text_seq("PURGE"),
1094        )
1095
1096    def _parse_exists(self, not_: bool = False) -> t.Optional[bool]:
1097        return (
1098            self._match(TokenType.IF)
1099            and (not not_ or self._match(TokenType.NOT))
1100            and self._match(TokenType.EXISTS)
1101        )
1102
1103    def _parse_create(self) -> t.Optional[exp.Expression]:
1104        start = self._prev
1105        replace = self._prev.text.upper() == "REPLACE" or self._match_pair(
1106            TokenType.OR, TokenType.REPLACE
1107        )
1108        unique = self._match(TokenType.UNIQUE)
1109
1110        if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
1111            self._match(TokenType.TABLE)
1112
1113        properties = None
1114        create_token = self._match_set(self.CREATABLES) and self._prev
1115
1116        if not create_token:
1117            properties = self._parse_properties()  # exp.Properties.Location.POST_CREATE
1118            create_token = self._match_set(self.CREATABLES) and self._prev
1119
1120            if not properties or not create_token:
1121                return self._parse_as_command(start)
1122
1123        exists = self._parse_exists(not_=True)
1124        this = None
1125        expression = None
1126        indexes = None
1127        no_schema_binding = None
1128        begin = None
1129        clone = None
1130
1131        if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1132            this = self._parse_user_defined_function(kind=create_token.token_type)
1133            temp_properties = self._parse_properties()
1134            if properties and temp_properties:
1135                properties.expressions.extend(temp_properties.expressions)
1136            elif temp_properties:
1137                properties = temp_properties
1138
1139            self._match(TokenType.ALIAS)
1140            begin = self._match(TokenType.BEGIN)
1141            return_ = self._match_text_seq("RETURN")
1142            expression = self._parse_statement()
1143
1144            if return_:
1145                expression = self.expression(exp.Return, this=expression)
1146        elif create_token.token_type == TokenType.INDEX:
1147            this = self._parse_index(index=self._parse_id_var())
1148        elif create_token.token_type in self.DB_CREATABLES:
1149            table_parts = self._parse_table_parts(schema=True)
1150
1151            # exp.Properties.Location.POST_NAME
1152            if self._match(TokenType.COMMA):
1153                temp_properties = self._parse_properties(before=True)
1154                if properties and temp_properties:
1155                    properties.expressions.extend(temp_properties.expressions)
1156                elif temp_properties:
1157                    properties = temp_properties
1158
1159            this = self._parse_schema(this=table_parts)
1160
1161            # exp.Properties.Location.POST_SCHEMA and POST_WITH
1162            temp_properties = self._parse_properties()
1163            if properties and temp_properties:
1164                properties.expressions.extend(temp_properties.expressions)
1165            elif temp_properties:
1166                properties = temp_properties
1167
1168            self._match(TokenType.ALIAS)
1169
1170            # exp.Properties.Location.POST_ALIAS
1171            if not (
1172                self._match(TokenType.SELECT, advance=False)
1173                or self._match(TokenType.WITH, advance=False)
1174                or self._match(TokenType.L_PAREN, advance=False)
1175            ):
1176                temp_properties = self._parse_properties()
1177                if properties and temp_properties:
1178                    properties.expressions.extend(temp_properties.expressions)
1179                elif temp_properties:
1180                    properties = temp_properties
1181
1182            expression = self._parse_ddl_select()
1183
1184            if create_token.token_type == TokenType.TABLE:
1185                indexes = []
1186                while True:
1187                    index = self._parse_index()
1188
1189                    # exp.Properties.Location.POST_EXPRESSION or exp.Properties.Location.POST_INDEX
1190                    temp_properties = self._parse_properties()
1191                    if properties and temp_properties:
1192                        properties.expressions.extend(temp_properties.expressions)
1193                    elif temp_properties:
1194                        properties = temp_properties
1195
1196                    if not index:
1197                        break
1198                    else:
1199                        self._match(TokenType.COMMA)
1200                        indexes.append(index)
1201            elif create_token.token_type == TokenType.VIEW:
1202                if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
1203                    no_schema_binding = True
1204
1205            if self._match_text_seq("CLONE"):
1206                clone = self._parse_table(schema=True)
1207                when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper()
1208                clone_kind = (
1209                    self._match(TokenType.L_PAREN)
1210                    and self._match_texts(self.CLONE_KINDS)
1211                    and self._prev.text.upper()
1212                )
1213                clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise()
1214                self._match(TokenType.R_PAREN)
1215                clone = self.expression(
1216                    exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression
1217                )
1218
1219        return self.expression(
1220            exp.Create,
1221            this=this,
1222            kind=create_token.text,
1223            replace=replace,
1224            unique=unique,
1225            expression=expression,
1226            exists=exists,
1227            properties=properties,
1228            indexes=indexes,
1229            no_schema_binding=no_schema_binding,
1230            begin=begin,
1231            clone=clone,
1232        )
1233
1234    def _parse_property_before(self) -> t.Optional[exp.Expression]:
1235        # only used for teradata currently
1236        self._match(TokenType.COMMA)
1237
1238        kwargs = {
1239            "no": self._match_text_seq("NO"),
1240            "dual": self._match_text_seq("DUAL"),
1241            "before": self._match_text_seq("BEFORE"),
1242            "default": self._match_text_seq("DEFAULT"),
1243            "local": (self._match_text_seq("LOCAL") and "LOCAL")
1244            or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"),
1245            "after": self._match_text_seq("AFTER"),
1246            "minimum": self._match_texts(("MIN", "MINIMUM")),
1247            "maximum": self._match_texts(("MAX", "MAXIMUM")),
1248        }
1249
1250        if self._match_texts(self.PROPERTY_PARSERS):
1251            parser = self.PROPERTY_PARSERS[self._prev.text.upper()]
1252            try:
1253                return parser(self, **{k: v for k, v in kwargs.items() if v})
1254            except TypeError:
1255                self.raise_error(f"Cannot parse property '{self._prev.text}'")
1256
1257        return None
1258
1259    def _parse_property(self) -> t.Optional[exp.Expression]:
1260        if self._match_texts(self.PROPERTY_PARSERS):
1261            return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
1262
1263        if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET):
1264            return self._parse_character_set(default=True)
1265
1266        if self._match_text_seq("COMPOUND", "SORTKEY"):
1267            return self._parse_sortkey(compound=True)
1268
1269        if self._match_text_seq("SQL", "SECURITY"):
1270            return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER"))
1271
1272        assignment = self._match_pair(
1273            TokenType.VAR, TokenType.EQ, advance=False
1274        ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False)
1275
1276        if assignment:
1277            key = self._parse_var_or_string()
1278            self._match(TokenType.EQ)
1279            return self.expression(exp.Property, this=key, value=self._parse_column())
1280
1281        return None
1282
1283    def _parse_stored(self) -> exp.Expression:
1284        self._match(TokenType.ALIAS)
1285
1286        input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None
1287        output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None
1288
1289        return self.expression(
1290            exp.FileFormatProperty,
1291            this=self.expression(
1292                exp.InputOutputFormat, input_format=input_format, output_format=output_format
1293            )
1294            if input_format or output_format
1295            else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
1296        )
1297
1298    def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression:
1299        self._match(TokenType.EQ)
1300        self._match(TokenType.ALIAS)
1301        return self.expression(exp_class, this=self._parse_field())
1302
1303    def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Expression]:
1304        properties = []
1305
1306        while True:
1307            if before:
1308                prop = self._parse_property_before()
1309            else:
1310                prop = self._parse_property()
1311
1312            if not prop:
1313                break
1314            for p in ensure_list(prop):
1315                properties.append(p)
1316
1317        if properties:
1318            return self.expression(exp.Properties, expressions=properties)
1319
1320        return None
1321
1322    def _parse_fallback(self, no: bool = False) -> exp.Expression:
1323        return self.expression(
1324            exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
1325        )
1326
1327    def _parse_volatile_property(self) -> exp.Expression:
1328        if self._index >= 2:
1329            pre_volatile_token = self._tokens[self._index - 2]
1330        else:
1331            pre_volatile_token = None
1332
1333        if pre_volatile_token and pre_volatile_token.token_type in (
1334            TokenType.CREATE,
1335            TokenType.REPLACE,
1336            TokenType.UNIQUE,
1337        ):
1338            return exp.VolatileProperty()
1339
1340        return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE"))
1341
1342    def _parse_with_property(
1343        self,
1344    ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]:
1345        self._match(TokenType.WITH)
1346        if self._match(TokenType.L_PAREN, advance=False):
1347            return self._parse_wrapped_csv(self._parse_property)
1348
1349        if self._match_text_seq("JOURNAL"):
1350            return self._parse_withjournaltable()
1351
1352        if self._match_text_seq("DATA"):
1353            return self._parse_withdata(no=False)
1354        elif self._match_text_seq("NO", "DATA"):
1355            return self._parse_withdata(no=True)
1356
1357        if not self._next:
1358            return None
1359
1360        return self._parse_withisolatedloading()
1361
1362    # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
1363    def _parse_definer(self) -> t.Optional[exp.Expression]:
1364        self._match(TokenType.EQ)
1365
1366        user = self._parse_id_var()
1367        self._match(TokenType.PARAMETER)
1368        host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
1369
1370        if not user or not host:
1371            return None
1372
1373        return exp.DefinerProperty(this=f"{user}@{host}")
1374
1375    def _parse_withjournaltable(self) -> exp.Expression:
1376        self._match(TokenType.TABLE)
1377        self._match(TokenType.EQ)
1378        return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
1379
1380    def _parse_log(self, no: bool = False) -> exp.Expression:
1381        return self.expression(exp.LogProperty, no=no)
1382
1383    def _parse_journal(self, **kwargs) -> exp.Expression:
1384        return self.expression(exp.JournalProperty, **kwargs)
1385
1386    def _parse_checksum(self) -> exp.Expression:
1387        self._match(TokenType.EQ)
1388
1389        on = None
1390        if self._match(TokenType.ON):
1391            on = True
1392        elif self._match_text_seq("OFF"):
1393            on = False
1394        default = self._match(TokenType.DEFAULT)
1395
1396        return self.expression(
1397            exp.ChecksumProperty,
1398            on=on,
1399            default=default,
1400        )
1401
1402    def _parse_cluster(self) -> t.Optional[exp.Expression]:
1403        if not self._match_text_seq("BY"):
1404            self._retreat(self._index - 1)
1405            return None
1406        return self.expression(
1407            exp.Cluster,
1408            expressions=self._parse_csv(self._parse_ordered),
1409        )
1410
1411    def _parse_freespace(self) -> exp.Expression:
1412        self._match(TokenType.EQ)
1413        return self.expression(
1414            exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
1415        )
1416
1417    def _parse_mergeblockratio(self, no: bool = False, default: bool = False) -> exp.Expression:
1418        if self._match(TokenType.EQ):
1419            return self.expression(
1420                exp.MergeBlockRatioProperty,
1421                this=self._parse_number(),
1422                percent=self._match(TokenType.PERCENT),
1423            )
1424        return self.expression(
1425            exp.MergeBlockRatioProperty,
1426            no=no,
1427            default=default,
1428        )
1429
1430    def _parse_datablocksize(
1431        self,
1432        default: t.Optional[bool] = None,
1433        minimum: t.Optional[bool] = None,
1434        maximum: t.Optional[bool] = None,
1435    ) -> exp.Expression:
1436        self._match(TokenType.EQ)
1437        size = self._parse_number()
1438        units = None
1439        if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
1440            units = self._prev.text
1441        return self.expression(
1442            exp.DataBlocksizeProperty,
1443            size=size,
1444            units=units,
1445            default=default,
1446            minimum=minimum,
1447            maximum=maximum,
1448        )
1449
1450    def _parse_blockcompression(self) -> exp.Expression:
1451        self._match(TokenType.EQ)
1452        always = self._match_text_seq("ALWAYS")
1453        manual = self._match_text_seq("MANUAL")
1454        never = self._match_text_seq("NEVER")
1455        default = self._match_text_seq("DEFAULT")
1456        autotemp = None
1457        if self._match_text_seq("AUTOTEMP"):
1458            autotemp = self._parse_schema()
1459
1460        return self.expression(
1461            exp.BlockCompressionProperty,
1462            always=always,
1463            manual=manual,
1464            never=never,
1465            default=default,
1466            autotemp=autotemp,
1467        )
1468
1469    def _parse_withisolatedloading(self) -> exp.Expression:
1470        no = self._match_text_seq("NO")
1471        concurrent = self._match_text_seq("CONCURRENT")
1472        self._match_text_seq("ISOLATED", "LOADING")
1473        for_all = self._match_text_seq("FOR", "ALL")
1474        for_insert = self._match_text_seq("FOR", "INSERT")
1475        for_none = self._match_text_seq("FOR", "NONE")
1476        return self.expression(
1477            exp.IsolatedLoadingProperty,
1478            no=no,
1479            concurrent=concurrent,
1480            for_all=for_all,
1481            for_insert=for_insert,
1482            for_none=for_none,
1483        )
1484
1485    def _parse_locking(self) -> exp.Expression:
1486        if self._match(TokenType.TABLE):
1487            kind = "TABLE"
1488        elif self._match(TokenType.VIEW):
1489            kind = "VIEW"
1490        elif self._match(TokenType.ROW):
1491            kind = "ROW"
1492        elif self._match_text_seq("DATABASE"):
1493            kind = "DATABASE"
1494        else:
1495            kind = None
1496
1497        if kind in ("DATABASE", "TABLE", "VIEW"):
1498            this = self._parse_table_parts()
1499        else:
1500            this = None
1501
1502        if self._match(TokenType.FOR):
1503            for_or_in = "FOR"
1504        elif self._match(TokenType.IN):
1505            for_or_in = "IN"
1506        else:
1507            for_or_in = None
1508
1509        if self._match_text_seq("ACCESS"):
1510            lock_type = "ACCESS"
1511        elif self._match_texts(("EXCL", "EXCLUSIVE")):
1512            lock_type = "EXCLUSIVE"
1513        elif self._match_text_seq("SHARE"):
1514            lock_type = "SHARE"
1515        elif self._match_text_seq("READ"):
1516            lock_type = "READ"
1517        elif self._match_text_seq("WRITE"):
1518            lock_type = "WRITE"
1519        elif self._match_text_seq("CHECKSUM"):
1520            lock_type = "CHECKSUM"
1521        else:
1522            lock_type = None
1523
1524        override = self._match_text_seq("OVERRIDE")
1525
1526        return self.expression(
1527            exp.LockingProperty,
1528            this=this,
1529            kind=kind,
1530            for_or_in=for_or_in,
1531            lock_type=lock_type,
1532            override=override,
1533        )
1534
1535    def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]:
1536        if self._match(TokenType.PARTITION_BY):
1537            return self._parse_csv(self._parse_conjunction)
1538        return []
1539
1540    def _parse_partitioned_by(self) -> exp.Expression:
1541        self._match(TokenType.EQ)
1542        return self.expression(
1543            exp.PartitionedByProperty,
1544            this=self._parse_schema() or self._parse_bracket(self._parse_field()),
1545        )
1546
1547    def _parse_withdata(self, no: bool = False) -> exp.Expression:
1548        if self._match_text_seq("AND", "STATISTICS"):
1549            statistics = True
1550        elif self._match_text_seq("AND", "NO", "STATISTICS"):
1551            statistics = False
1552        else:
1553            statistics = None
1554
1555        return self.expression(exp.WithDataProperty, no=no, statistics=statistics)
1556
1557    def _parse_no_property(self) -> t.Optional[exp.Property]:
1558        if self._match_text_seq("PRIMARY", "INDEX"):
1559            return exp.NoPrimaryIndexProperty()
1560        return None
1561
1562    def _parse_on_property(self) -> t.Optional[exp.Property]:
1563        if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"):
1564            return exp.OnCommitProperty()
1565        elif self._match_text_seq("COMMIT", "DELETE", "ROWS"):
1566            return exp.OnCommitProperty(delete=True)
1567        return None
1568
1569    def _parse_distkey(self) -> exp.Expression:
1570        return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
1571
1572    def _parse_create_like(self) -> t.Optional[exp.Expression]:
1573        table = self._parse_table(schema=True)
1574        options = []
1575        while self._match_texts(("INCLUDING", "EXCLUDING")):
1576            this = self._prev.text.upper()
1577            id_var = self._parse_id_var()
1578
1579            if not id_var:
1580                return None
1581
1582            options.append(
1583                self.expression(
1584                    exp.Property,
1585                    this=this,
1586                    value=exp.Var(this=id_var.this.upper()),
1587                )
1588            )
1589        return self.expression(exp.LikeProperty, this=table, expressions=options)
1590
1591    def _parse_sortkey(self, compound: bool = False) -> exp.Expression:
1592        return self.expression(
1593            exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound
1594        )
1595
1596    def _parse_character_set(self, default: bool = False) -> exp.Expression:
1597        self._match(TokenType.EQ)
1598        return self.expression(
1599            exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default
1600        )
1601
1602    def _parse_returns(self) -> exp.Expression:
1603        value: t.Optional[exp.Expression]
1604        is_table = self._match(TokenType.TABLE)
1605
1606        if is_table:
1607            if self._match(TokenType.LT):
1608                value = self.expression(
1609                    exp.Schema,
1610                    this="TABLE",
1611                    expressions=self._parse_csv(self._parse_struct_types),
1612                )
1613                if not self._match(TokenType.GT):
1614                    self.raise_error("Expecting >")
1615            else:
1616                value = self._parse_schema(exp.Var(this="TABLE"))
1617        else:
1618            value = self._parse_types()
1619
1620        return self.expression(exp.ReturnsProperty, this=value, is_table=is_table)
1621
1622    def _parse_describe(self) -> exp.Expression:
1623        kind = self._match_set(self.CREATABLES) and self._prev.text
1624        this = self._parse_table()
1625
1626        return self.expression(exp.Describe, this=this, kind=kind)
1627
1628    def _parse_insert(self) -> exp.Expression:
1629        overwrite = self._match(TokenType.OVERWRITE)
1630        local = self._match_text_seq("LOCAL")
1631        alternative = None
1632
1633        if self._match_text_seq("DIRECTORY"):
1634            this: t.Optional[exp.Expression] = self.expression(
1635                exp.Directory,
1636                this=self._parse_var_or_string(),
1637                local=local,
1638                row_format=self._parse_row_format(match_row=True),
1639            )
1640        else:
1641            if self._match(TokenType.OR):
1642                alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text
1643
1644            self._match(TokenType.INTO)
1645            self._match(TokenType.TABLE)
1646            this = self._parse_table(schema=True)
1647
1648        return self.expression(
1649            exp.Insert,
1650            this=this,
1651            exists=self._parse_exists(),
1652            partition=self._parse_partition(),
1653            expression=self._parse_ddl_select(),
1654            conflict=self._parse_on_conflict(),
1655            returning=self._parse_returning(),
1656            overwrite=overwrite,
1657            alternative=alternative,
1658        )
1659
1660    def _parse_on_conflict(self) -> t.Optional[exp.Expression]:
1661        conflict = self._match_text_seq("ON", "CONFLICT")
1662        duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY")
1663
1664        if not (conflict or duplicate):
1665            return None
1666
1667        nothing = None
1668        expressions = None
1669        key = None
1670        constraint = None
1671
1672        if conflict:
1673            if self._match_text_seq("ON", "CONSTRAINT"):
1674                constraint = self._parse_id_var()
1675            else:
1676                key = self._parse_csv(self._parse_value)
1677
1678        self._match_text_seq("DO")
1679        if self._match_text_seq("NOTHING"):
1680            nothing = True
1681        else:
1682            self._match(TokenType.UPDATE)
1683            expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality)
1684
1685        return self.expression(
1686            exp.OnConflict,
1687            duplicate=duplicate,
1688            expressions=expressions,
1689            nothing=nothing,
1690            key=key,
1691            constraint=constraint,
1692        )
1693
1694    def _parse_returning(self) -> t.Optional[exp.Expression]:
1695        if not self._match(TokenType.RETURNING):
1696            return None
1697
1698        return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column))
1699
1700    def _parse_row(self) -> t.Optional[exp.Expression]:
1701        if not self._match(TokenType.FORMAT):
1702            return None
1703        return self._parse_row_format()
1704
1705    def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]:
1706        if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
1707            return None
1708
1709        if self._match_text_seq("SERDE"):
1710            return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string())
1711
1712        self._match_text_seq("DELIMITED")
1713
1714        kwargs = {}
1715
1716        if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
1717            kwargs["fields"] = self._parse_string()
1718            if self._match_text_seq("ESCAPED", "BY"):
1719                kwargs["escaped"] = self._parse_string()
1720        if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
1721            kwargs["collection_items"] = self._parse_string()
1722        if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
1723            kwargs["map_keys"] = self._parse_string()
1724        if self._match_text_seq("LINES", "TERMINATED", "BY"):
1725            kwargs["lines"] = self._parse_string()
1726        if self._match_text_seq("NULL", "DEFINED", "AS"):
1727            kwargs["null"] = self._parse_string()
1728
1729        return self.expression(exp.RowFormatDelimitedProperty, **kwargs)  # type: ignore
1730
1731    def _parse_load(self) -> exp.Expression:
1732        if self._match_text_seq("DATA"):
1733            local = self._match_text_seq("LOCAL")
1734            self._match_text_seq("INPATH")
1735            inpath = self._parse_string()
1736            overwrite = self._match(TokenType.OVERWRITE)
1737            self._match_pair(TokenType.INTO, TokenType.TABLE)
1738
1739            return self.expression(
1740                exp.LoadData,
1741                this=self._parse_table(schema=True),
1742                local=local,
1743                overwrite=overwrite,
1744                inpath=inpath,
1745                partition=self._parse_partition(),
1746                input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
1747                serde=self._match_text_seq("SERDE") and self._parse_string(),
1748            )
1749        return self._parse_as_command(self._prev)
1750
1751    def _parse_delete(self) -> exp.Expression:
1752        self._match(TokenType.FROM)
1753
1754        return self.expression(
1755            exp.Delete,
1756            this=self._parse_table(),
1757            using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()),
1758            where=self._parse_where(),
1759            returning=self._parse_returning(),
1760        )
1761
1762    def _parse_update(self) -> exp.Expression:
1763        return self.expression(
1764            exp.Update,
1765            **{  # type: ignore
1766                "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS),
1767                "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality),
1768                "from": self._parse_from(modifiers=True),
1769                "where": self._parse_where(),
1770                "returning": self._parse_returning(),
1771            },
1772        )
1773
1774    def _parse_uncache(self) -> exp.Expression:
1775        if not self._match(TokenType.TABLE):
1776            self.raise_error("Expecting TABLE after UNCACHE")
1777
1778        return self.expression(
1779            exp.Uncache,
1780            exists=self._parse_exists(),
1781            this=self._parse_table(schema=True),
1782        )
1783
1784    def _parse_cache(self) -> exp.Expression:
1785        lazy = self._match_text_seq("LAZY")
1786        self._match(TokenType.TABLE)
1787        table = self._parse_table(schema=True)
1788        options = []
1789
1790        if self._match_text_seq("OPTIONS"):
1791            self._match_l_paren()
1792            k = self._parse_string()
1793            self._match(TokenType.EQ)
1794            v = self._parse_string()
1795            options = [k, v]
1796            self._match_r_paren()
1797
1798        self._match(TokenType.ALIAS)
1799        return self.expression(
1800            exp.Cache,
1801            this=table,
1802            lazy=lazy,
1803            options=options,
1804            expression=self._parse_select(nested=True),
1805        )
1806
1807    def _parse_partition(self) -> t.Optional[exp.Expression]:
1808        if not self._match(TokenType.PARTITION):
1809            return None
1810
1811        return self.expression(
1812            exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction)
1813        )
1814
1815    def _parse_value(self) -> exp.Expression:
1816        if self._match(TokenType.L_PAREN):
1817            expressions = self._parse_csv(self._parse_conjunction)
1818            self._match_r_paren()
1819            return self.expression(exp.Tuple, expressions=expressions)
1820
1821        # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows.
1822        # Source: https://prestodb.io/docs/current/sql/values.html
1823        return self.expression(exp.Tuple, expressions=[self._parse_conjunction()])
1824
1825    def _parse_select(
1826        self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True
1827    ) -> t.Optional[exp.Expression]:
1828        cte = self._parse_with()
1829        if cte:
1830            this = self._parse_statement()
1831
1832            if not this:
1833                self.raise_error("Failed to parse any statement following CTE")
1834                return cte
1835
1836            if "with" in this.arg_types:
1837                this.set("with", cte)
1838            else:
1839                self.raise_error(f"{this.key} does not support CTE")
1840                this = cte
1841        elif self._match(TokenType.SELECT):
1842            comments = self._prev_comments
1843
1844            hint = self._parse_hint()
1845            all_ = self._match(TokenType.ALL)
1846            distinct = self._match(TokenType.DISTINCT)
1847
1848            kind = (
1849                self._match(TokenType.ALIAS)
1850                and self._match_texts(("STRUCT", "VALUE"))
1851                and self._prev.text
1852            )
1853
1854            if distinct:
1855                distinct = self.expression(
1856                    exp.Distinct,
1857                    on=self._parse_value() if self._match(TokenType.ON) else None,
1858                )
1859
1860            if all_ and distinct:
1861                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
1862
1863            limit = self._parse_limit(top=True)
1864            expressions = self._parse_csv(self._parse_expression)
1865
1866            this = self.expression(
1867                exp.Select,
1868                kind=kind,
1869                hint=hint,
1870                distinct=distinct,
1871                expressions=expressions,
1872                limit=limit,
1873            )
1874            this.comments = comments
1875
1876            into = self._parse_into()
1877            if into:
1878                this.set("into", into)
1879
1880            from_ = self._parse_from()
1881            if from_:
1882                this.set("from", from_)
1883
1884            this = self._parse_query_modifiers(this)
1885        elif (table or nested) and self._match(TokenType.L_PAREN):
1886            this = self._parse_table() if table else self._parse_select(nested=True)
1887            this = self._parse_set_operations(self._parse_query_modifiers(this))
1888            self._match_r_paren()
1889
1890            # early return so that subquery unions aren't parsed again
1891            # SELECT * FROM (SELECT 1) UNION ALL SELECT 1
1892            # Union ALL should be a property of the top select node, not the subquery
1893            return self._parse_subquery(this, parse_alias=parse_subquery_alias)
1894        elif self._match(TokenType.VALUES):
1895            this = self.expression(
1896                exp.Values,
1897                expressions=self._parse_csv(self._parse_value),
1898                alias=self._parse_table_alias(),
1899            )
1900        else:
1901            this = None
1902
1903        return self._parse_set_operations(this)
1904
1905    def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]:
1906        if not skip_with_token and not self._match(TokenType.WITH):
1907            return None
1908
1909        comments = self._prev_comments
1910        recursive = self._match(TokenType.RECURSIVE)
1911
1912        expressions = []
1913        while True:
1914            expressions.append(self._parse_cte())
1915
1916            if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
1917                break
1918            else:
1919                self._match(TokenType.WITH)
1920
1921        return self.expression(
1922            exp.With, comments=comments, expressions=expressions, recursive=recursive
1923        )
1924
1925    def _parse_cte(self) -> exp.Expression:
1926        alias = self._parse_table_alias()
1927        if not alias or not alias.this:
1928            self.raise_error("Expected CTE to have alias")
1929
1930        self._match(TokenType.ALIAS)
1931
1932        return self.expression(
1933            exp.CTE,
1934            this=self._parse_wrapped(self._parse_statement),
1935            alias=alias,
1936        )
1937
1938    def _parse_table_alias(
1939        self, alias_tokens: t.Optional[t.Collection[TokenType]] = None
1940    ) -> t.Optional[exp.Expression]:
1941        any_token = self._match(TokenType.ALIAS)
1942        alias = (
1943            self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
1944            or self._parse_string_as_identifier()
1945        )
1946
1947        index = self._index
1948        if self._match(TokenType.L_PAREN):
1949            columns = self._parse_csv(self._parse_function_parameter)
1950            self._match_r_paren() if columns else self._retreat(index)
1951        else:
1952            columns = None
1953
1954        if not alias and not columns:
1955            return None
1956
1957        return self.expression(exp.TableAlias, this=alias, columns=columns)
1958
1959    def _parse_subquery(
1960        self, this: t.Optional[exp.Expression], parse_alias: bool = True
1961    ) -> exp.Expression:
1962        return self.expression(
1963            exp.Subquery,
1964            this=this,
1965            pivots=self._parse_pivots(),
1966            alias=self._parse_table_alias() if parse_alias else None,
1967        )
1968
1969    def _parse_query_modifiers(
1970        self, this: t.Optional[exp.Expression]
1971    ) -> t.Optional[exp.Expression]:
1972        if isinstance(this, self.MODIFIABLES):
1973            for key, parser in self.QUERY_MODIFIER_PARSERS.items():
1974                expression = parser(self)
1975
1976                if expression:
1977                    this.set(key, expression)
1978        return this
1979
1980    def _parse_hint(self) -> t.Optional[exp.Expression]:
1981        if self._match(TokenType.HINT):
1982            hints = self._parse_csv(self._parse_function)
1983            if not self._match_pair(TokenType.STAR, TokenType.SLASH):
1984                self.raise_error("Expected */ after HINT")
1985            return self.expression(exp.Hint, expressions=hints)
1986
1987        return None
1988
1989    def _parse_into(self) -> t.Optional[exp.Expression]:
1990        if not self._match(TokenType.INTO):
1991            return None
1992
1993        temp = self._match(TokenType.TEMPORARY)
1994        unlogged = self._match_text_seq("UNLOGGED")
1995        self._match(TokenType.TABLE)
1996
1997        return self.expression(
1998            exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged
1999        )
2000
2001    def _parse_from(self, modifiers: bool = False) -> t.Optional[exp.Expression]:
2002        if not self._match(TokenType.FROM):
2003            return None
2004
2005        comments = self._prev_comments
2006        this = self._parse_table()
2007
2008        return self.expression(
2009            exp.From,
2010            comments=comments,
2011            this=self._parse_query_modifiers(this) if modifiers else this,
2012        )
2013
2014    def _parse_match_recognize(self) -> t.Optional[exp.Expression]:
2015        if not self._match(TokenType.MATCH_RECOGNIZE):
2016            return None
2017
2018        self._match_l_paren()
2019
2020        partition = self._parse_partition_by()
2021        order = self._parse_order()
2022        measures = (
2023            self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None
2024        )
2025
2026        if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
2027            rows = exp.Var(this="ONE ROW PER MATCH")
2028        elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
2029            text = "ALL ROWS PER MATCH"
2030            if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
2031                text += f" SHOW EMPTY MATCHES"
2032            elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
2033                text += f" OMIT EMPTY MATCHES"
2034            elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
2035                text += f" WITH UNMATCHED ROWS"
2036            rows = exp.Var(this=text)
2037        else:
2038            rows = None
2039
2040        if self._match_text_seq("AFTER", "MATCH", "SKIP"):
2041            text = "AFTER MATCH SKIP"
2042            if self._match_text_seq("PAST", "LAST", "ROW"):
2043                text += f" PAST LAST ROW"
2044            elif self._match_text_seq("TO", "NEXT", "ROW"):
2045                text += f" TO NEXT ROW"
2046            elif self._match_text_seq("TO", "FIRST"):
2047                text += f" TO FIRST {self._advance_any().text}"  # type: ignore
2048            elif self._match_text_seq("TO", "LAST"):
2049                text += f" TO LAST {self._advance_any().text}"  # type: ignore
2050            after = exp.Var(this=text)
2051        else:
2052            after = None
2053
2054        if self._match_text_seq("PATTERN"):
2055            self._match_l_paren()
2056
2057            if not self._curr:
2058                self.raise_error("Expecting )", self._curr)
2059
2060            paren = 1
2061            start = self._curr
2062
2063            while self._curr and paren > 0:
2064                if self._curr.token_type == TokenType.L_PAREN:
2065                    paren += 1
2066                if self._curr.token_type == TokenType.R_PAREN:
2067                    paren -= 1
2068                end = self._prev
2069                self._advance()
2070            if paren > 0:
2071                self.raise_error("Expecting )", self._curr)
2072            pattern = exp.Var(this=self._find_sql(start, end))
2073        else:
2074            pattern = None
2075
2076        define = (
2077            self._parse_csv(
2078                lambda: self.expression(
2079                    exp.Alias,
2080                    alias=self._parse_id_var(any_token=True),
2081                    this=self._match(TokenType.ALIAS) and self._parse_conjunction(),
2082                )
2083            )
2084            if self._match_text_seq("DEFINE")
2085            else None
2086        )
2087
2088        self._match_r_paren()
2089
2090        return self.expression(
2091            exp.MatchRecognize,
2092            partition_by=partition,
2093            order=order,
2094            measures=measures,
2095            rows=rows,
2096            after=after,
2097            pattern=pattern,
2098            define=define,
2099            alias=self._parse_table_alias(),
2100        )
2101
2102    def _parse_lateral(self) -> t.Optional[exp.Expression]:
2103        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY)
2104        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
2105
2106        if outer_apply or cross_apply:
2107            this = self._parse_select(table=True)
2108            view = None
2109            outer = not cross_apply
2110        elif self._match(TokenType.LATERAL):
2111            this = self._parse_select(table=True)
2112            view = self._match(TokenType.VIEW)
2113            outer = self._match(TokenType.OUTER)
2114        else:
2115            return None
2116
2117        if not this:
2118            this = self._parse_function() or self._parse_id_var(any_token=False)
2119            while self._match(TokenType.DOT):
2120                this = exp.Dot(
2121                    this=this,
2122                    expression=self._parse_function() or self._parse_id_var(any_token=False),
2123                )
2124
2125        table_alias: t.Optional[exp.Expression]
2126
2127        if view:
2128            table = self._parse_id_var(any_token=False)
2129            columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
2130            table_alias = self.expression(exp.TableAlias, this=table, columns=columns)
2131        else:
2132            table_alias = self._parse_table_alias()
2133
2134        expression = self.expression(
2135            exp.Lateral,
2136            this=this,
2137            view=view,
2138            outer=outer,
2139            alias=table_alias,
2140        )
2141
2142        return expression
2143
2144    def _parse_join_side_and_kind(
2145        self,
2146    ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
2147        return (
2148            self._match(TokenType.NATURAL) and self._prev,
2149            self._match_set(self.JOIN_SIDES) and self._prev,
2150            self._match_set(self.JOIN_KINDS) and self._prev,
2151        )
2152
2153    def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]:
2154        if self._match(TokenType.COMMA):
2155            return self.expression(exp.Join, this=self._parse_table())
2156
2157        index = self._index
2158        natural, side, kind = self._parse_join_side_and_kind()
2159        hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None
2160        join = self._match(TokenType.JOIN)
2161
2162        if not skip_join_token and not join:
2163            self._retreat(index)
2164            kind = None
2165            natural = None
2166            side = None
2167
2168        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False)
2169        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False)
2170
2171        if not skip_join_token and not join and not outer_apply and not cross_apply:
2172            return None
2173
2174        if outer_apply:
2175            side = Token(TokenType.LEFT, "LEFT")
2176
2177        kwargs: t.Dict[
2178            str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]]
2179        ] = {"this": self._parse_table()}
2180
2181        if natural:
2182            kwargs["natural"] = True
2183        if side:
2184            kwargs["side"] = side.text
2185        if kind:
2186            kwargs["kind"] = kind.text
2187        if hint:
2188            kwargs["hint"] = hint
2189
2190        if self._match(TokenType.ON):
2191            kwargs["on"] = self._parse_conjunction()
2192        elif self._match(TokenType.USING):
2193            kwargs["using"] = self._parse_wrapped_id_vars()
2194
2195        return self.expression(exp.Join, **kwargs)  # type: ignore
2196
2197    def _parse_index(
2198        self,
2199        index: t.Optional[exp.Expression] = None,
2200    ) -> t.Optional[exp.Expression]:
2201        if index:
2202            unique = None
2203            primary = None
2204            amp = None
2205
2206            self._match(TokenType.ON)
2207            self._match(TokenType.TABLE)  # hive
2208            table = self._parse_table_parts(schema=True)
2209        else:
2210            unique = self._match(TokenType.UNIQUE)
2211            primary = self._match_text_seq("PRIMARY")
2212            amp = self._match_text_seq("AMP")
2213            if not self._match(TokenType.INDEX):
2214                return None
2215            index = self._parse_id_var()
2216            table = None
2217
2218        if self._match(TokenType.L_PAREN, advance=False):
2219            columns = self._parse_wrapped_csv(self._parse_ordered)
2220        else:
2221            columns = None
2222
2223        return self.expression(
2224            exp.Index,
2225            this=index,
2226            table=table,
2227            columns=columns,
2228            unique=unique,
2229            primary=primary,
2230            amp=amp,
2231        )
2232
2233    def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]:
2234        return (
2235            (not schema and self._parse_function())
2236            or self._parse_id_var(any_token=False)
2237            or self._parse_string_as_identifier()
2238            or self._parse_placeholder()
2239        )
2240
2241    def _parse_table_parts(self, schema: bool = False) -> exp.Table:
2242        catalog = None
2243        db = None
2244        table = self._parse_table_part(schema=schema)
2245
2246        while self._match(TokenType.DOT):
2247            if catalog:
2248                # This allows nesting the table in arbitrarily many dot expressions if needed
2249                table = self.expression(
2250                    exp.Dot, this=table, expression=self._parse_table_part(schema=schema)
2251                )
2252            else:
2253                catalog = db
2254                db = table
2255                table = self._parse_table_part(schema=schema)
2256
2257        if not table:
2258            self.raise_error(f"Expected table name but got {self._curr}")
2259
2260        return self.expression(
2261            exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots()
2262        )
2263
2264    def _parse_table(
2265        self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
2266    ) -> t.Optional[exp.Expression]:
2267        lateral = self._parse_lateral()
2268        if lateral:
2269            return lateral
2270
2271        unnest = self._parse_unnest()
2272        if unnest:
2273            return unnest
2274
2275        values = self._parse_derived_table_values()
2276        if values:
2277            return values
2278
2279        subquery = self._parse_select(table=True)
2280        if subquery:
2281            if not subquery.args.get("pivots"):
2282                subquery.set("pivots", self._parse_pivots())
2283            return subquery
2284
2285        this: exp.Expression = self._parse_table_parts(schema=schema)
2286
2287        if schema:
2288            return self._parse_schema(this=this)
2289
2290        if self.alias_post_tablesample:
2291            table_sample = self._parse_table_sample()
2292
2293        alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
2294        if alias:
2295            this.set("alias", alias)
2296
2297        if not this.args.get("pivots"):
2298            this.set("pivots", self._parse_pivots())
2299
2300        if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
2301            this.set(
2302                "hints",
2303                self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)),
2304            )
2305            self._match_r_paren()
2306
2307        if not self.alias_post_tablesample:
2308            table_sample = self._parse_table_sample()
2309
2310        if table_sample:
2311            table_sample.set("this", this)
2312            this = table_sample
2313
2314        return this
2315
2316    def _parse_unnest(self) -> t.Optional[exp.Expression]:
2317        if not self._match(TokenType.UNNEST):
2318            return None
2319
2320        expressions = self._parse_wrapped_csv(self._parse_type)
2321        ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY)
2322        alias = self._parse_table_alias()
2323
2324        if alias and self.unnest_column_only:
2325            if alias.args.get("columns"):
2326                self.raise_error("Unexpected extra column alias in unnest.")
2327            alias.set("columns", [alias.this])
2328            alias.set("this", None)
2329
2330        offset = None
2331        if self._match_pair(TokenType.WITH, TokenType.OFFSET):
2332            self._match(TokenType.ALIAS)
2333            offset = self._parse_id_var() or exp.Identifier(this="offset")
2334
2335        return self.expression(
2336            exp.Unnest,
2337            expressions=expressions,
2338            ordinality=ordinality,
2339            alias=alias,
2340            offset=offset,
2341        )
2342
2343    def _parse_derived_table_values(self) -> t.Optional[exp.Expression]:
2344        is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES)
2345        if not is_derived and not self._match(TokenType.VALUES):
2346            return None
2347
2348        expressions = self._parse_csv(self._parse_value)
2349
2350        if is_derived:
2351            self._match_r_paren()
2352
2353        return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias())
2354
2355    def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]:
2356        if not self._match(TokenType.TABLE_SAMPLE) and not (
2357            as_modifier and self._match_text_seq("USING", "SAMPLE")
2358        ):
2359            return None
2360
2361        bucket_numerator = None
2362        bucket_denominator = None
2363        bucket_field = None
2364        percent = None
2365        rows = None
2366        size = None
2367        seed = None
2368
2369        kind = (
2370            self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE"
2371        )
2372        method = self._parse_var(tokens=(TokenType.ROW,))
2373
2374        self._match(TokenType.L_PAREN)
2375
2376        num = self._parse_number()
2377
2378        if self._match_text_seq("BUCKET"):
2379            bucket_numerator = self._parse_number()
2380            self._match_text_seq("OUT", "OF")
2381            bucket_denominator = bucket_denominator = self._parse_number()
2382            self._match(TokenType.ON)
2383            bucket_field = self._parse_field()
2384        elif self._match_set((TokenType.PERCENT, TokenType.MOD)):
2385            percent = num
2386        elif self._match(TokenType.ROWS):
2387            rows = num
2388        else:
2389            size = num
2390
2391        self._match(TokenType.R_PAREN)
2392
2393        if self._match(TokenType.L_PAREN):
2394            method = self._parse_var()
2395            seed = self._match(TokenType.COMMA) and self._parse_number()
2396            self._match_r_paren()
2397        elif self._match_texts(("SEED", "REPEATABLE")):
2398            seed = self._parse_wrapped(self._parse_number)
2399
2400        return self.expression(
2401            exp.TableSample,
2402            method=method,
2403            bucket_numerator=bucket_numerator,
2404            bucket_denominator=bucket_denominator,
2405            bucket_field=bucket_field,
2406            percent=percent,
2407            rows=rows,
2408            size=size,
2409            seed=seed,
2410            kind=kind,
2411        )
2412
2413    def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]:
2414        return list(iter(self._parse_pivot, None))
2415
2416    def _parse_pivot(self) -> t.Optional[exp.Expression]:
2417        index = self._index
2418
2419        if self._match(TokenType.PIVOT):
2420            unpivot = False
2421        elif self._match(TokenType.UNPIVOT):
2422            unpivot = True
2423        else:
2424            return None
2425
2426        expressions = []
2427        field = None
2428
2429        if not self._match(TokenType.L_PAREN):
2430            self._retreat(index)
2431            return None
2432
2433        if unpivot:
2434            expressions = self._parse_csv(self._parse_column)
2435        else:
2436            expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function()))
2437
2438        if not expressions:
2439            self.raise_error("Failed to parse PIVOT's aggregation list")
2440
2441        if not self._match(TokenType.FOR):
2442            self.raise_error("Expecting FOR")
2443
2444        value = self._parse_column()
2445
2446        if not self._match(TokenType.IN):
2447            self.raise_error("Expecting IN")
2448
2449        field = self._parse_in(value, alias=True)
2450
2451        self._match_r_paren()
2452
2453        pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot)
2454
2455        if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False):
2456            pivot.set("alias", self._parse_table_alias())
2457
2458        if not unpivot:
2459            names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions))
2460
2461            columns: t.List[exp.Expression] = []
2462            for fld in pivot.args["field"].expressions:
2463                field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name
2464                for name in names:
2465                    if self.PREFIXED_PIVOT_COLUMNS:
2466                        name = f"{name}_{field_name}" if name else field_name
2467                    else:
2468                        name = f"{field_name}_{name}" if name else field_name
2469
2470                    columns.append(exp.to_identifier(name))
2471
2472            pivot.set("columns", columns)
2473
2474        return pivot
2475
2476    def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]:
2477        return [agg.alias for agg in aggregations]
2478
2479    def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]:
2480        if not skip_where_token and not self._match(TokenType.WHERE):
2481            return None
2482
2483        return self.expression(
2484            exp.Where, comments=self._prev_comments, this=self._parse_conjunction()
2485        )
2486
2487    def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]:
2488        if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
2489            return None
2490
2491        elements = defaultdict(list)
2492
2493        while True:
2494            expressions = self._parse_csv(self._parse_conjunction)
2495            if expressions:
2496                elements["expressions"].extend(expressions)
2497
2498            grouping_sets = self._parse_grouping_sets()
2499            if grouping_sets:
2500                elements["grouping_sets"].extend(grouping_sets)
2501
2502            rollup = None
2503            cube = None
2504            totals = None
2505
2506            with_ = self._match(TokenType.WITH)
2507            if self._match(TokenType.ROLLUP):
2508                rollup = with_ or self._parse_wrapped_csv(self._parse_column)
2509                elements["rollup"].extend(ensure_list(rollup))
2510
2511            if self._match(TokenType.CUBE):
2512                cube = with_ or self._parse_wrapped_csv(self._parse_column)
2513                elements["cube"].extend(ensure_list(cube))
2514
2515            if self._match_text_seq("TOTALS"):
2516                totals = True
2517                elements["totals"] = True  # type: ignore
2518
2519            if not (grouping_sets or rollup or cube or totals):
2520                break
2521
2522        return self.expression(exp.Group, **elements)  # type: ignore
2523
2524    def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
2525        if not self._match(TokenType.GROUPING_SETS):
2526            return None
2527
2528        return self._parse_wrapped_csv(self._parse_grouping_set)
2529
2530    def _parse_grouping_set(self) -> t.Optional[exp.Expression]:
2531        if self._match(TokenType.L_PAREN):
2532            grouping_set = self._parse_csv(self._parse_column)
2533            self._match_r_paren()
2534            return self.expression(exp.Tuple, expressions=grouping_set)
2535
2536        return self._parse_column()
2537
2538    def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]:
2539        if not skip_having_token and not self._match(TokenType.HAVING):
2540            return None
2541        return self.expression(exp.Having, this=self._parse_conjunction())
2542
2543    def _parse_qualify(self) -> t.Optional[exp.Expression]:
2544        if not self._match(TokenType.QUALIFY):
2545            return None
2546        return self.expression(exp.Qualify, this=self._parse_conjunction())
2547
2548    def _parse_order(
2549        self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
2550    ) -> t.Optional[exp.Expression]:
2551        if not skip_order_token and not self._match(TokenType.ORDER_BY):
2552            return this
2553
2554        return self.expression(
2555            exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
2556        )
2557
2558    def _parse_sort(
2559        self, exp_class: t.Type[exp.Expression], *texts: str
2560    ) -> t.Optional[exp.Expression]:
2561        if not self._match_text_seq(*texts):
2562            return None
2563        return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered))
2564
2565    def _parse_ordered(self) -> exp.Expression:
2566        this = self._parse_conjunction()
2567        self._match(TokenType.ASC)
2568        is_desc = self._match(TokenType.DESC)
2569        is_nulls_first = self._match_text_seq("NULLS", "FIRST")
2570        is_nulls_last = self._match_text_seq("NULLS", "LAST")
2571        desc = is_desc or False
2572        asc = not desc
2573        nulls_first = is_nulls_first or False
2574        explicitly_null_ordered = is_nulls_first or is_nulls_last
2575        if (
2576            not explicitly_null_ordered
2577            and (
2578                (asc and self.null_ordering == "nulls_are_small")
2579                or (desc and self.null_ordering != "nulls_are_small")
2580            )
2581            and self.null_ordering != "nulls_are_last"
2582        ):
2583            nulls_first = True
2584
2585        return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first)
2586
2587    def _parse_limit(
2588        self, this: t.Optional[exp.Expression] = None, top: bool = False
2589    ) -> t.Optional[exp.Expression]:
2590        if self._match(TokenType.TOP if top else TokenType.LIMIT):
2591            limit_paren = self._match(TokenType.L_PAREN)
2592            limit_exp = self.expression(
2593                exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term()
2594            )
2595
2596            if limit_paren:
2597                self._match_r_paren()
2598
2599            return limit_exp
2600
2601        if self._match(TokenType.FETCH):
2602            direction = self._match_set((TokenType.FIRST, TokenType.NEXT))
2603            direction = self._prev.text if direction else "FIRST"
2604
2605            count = self._parse_number()
2606            percent = self._match(TokenType.PERCENT)
2607
2608            self._match_set((TokenType.ROW, TokenType.ROWS))
2609
2610            only = self._match_text_seq("ONLY")
2611            with_ties = self._match_text_seq("WITH", "TIES")
2612
2613            if only and with_ties:
2614                self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause")
2615
2616            return self.expression(
2617                exp.Fetch,
2618                direction=direction,
2619                count=count,
2620                percent=percent,
2621                with_ties=with_ties,
2622            )
2623
2624        return this
2625
2626    def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2627        if not self._match_set((TokenType.OFFSET, TokenType.COMMA)):
2628            return this
2629
2630        count = self._parse_number()
2631        self._match_set((TokenType.ROW, TokenType.ROWS))
2632        return self.expression(exp.Offset, this=this, expression=count)
2633
2634    def _parse_locks(self) -> t.List[exp.Expression]:
2635        # Lists are invariant, so we need to use a type hint here
2636        locks: t.List[exp.Expression] = []
2637
2638        while True:
2639            if self._match_text_seq("FOR", "UPDATE"):
2640                update = True
2641            elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq(
2642                "LOCK", "IN", "SHARE", "MODE"
2643            ):
2644                update = False
2645            else:
2646                break
2647
2648            expressions = None
2649            if self._match_text_seq("OF"):
2650                expressions = self._parse_csv(lambda: self._parse_table(schema=True))
2651
2652            wait: t.Optional[bool | exp.Expression] = None
2653            if self._match_text_seq("NOWAIT"):
2654                wait = True
2655            elif self._match_text_seq("WAIT"):
2656                wait = self._parse_primary()
2657            elif self._match_text_seq("SKIP", "LOCKED"):
2658                wait = False
2659
2660            locks.append(
2661                self.expression(exp.Lock, update=update, expressions=expressions, wait=wait)
2662            )
2663
2664        return locks
2665
2666    def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2667        if not self._match_set(self.SET_OPERATIONS):
2668            return this
2669
2670        token_type = self._prev.token_type
2671
2672        if token_type == TokenType.UNION:
2673            expression = exp.Union
2674        elif token_type == TokenType.EXCEPT:
2675            expression = exp.Except
2676        else:
2677            expression = exp.Intersect
2678
2679        return self.expression(
2680            expression,
2681            this=this,
2682            distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
2683            expression=self._parse_set_operations(self._parse_select(nested=True)),
2684        )
2685
2686    def _parse_expression(self) -> t.Optional[exp.Expression]:
2687        return self._parse_alias(self._parse_conjunction())
2688
2689    def _parse_conjunction(self) -> t.Optional[exp.Expression]:
2690        return self._parse_tokens(self._parse_equality, self.CONJUNCTION)
2691
2692    def _parse_equality(self) -> t.Optional[exp.Expression]:
2693        return self._parse_tokens(self._parse_comparison, self.EQUALITY)
2694
2695    def _parse_comparison(self) -> t.Optional[exp.Expression]:
2696        return self._parse_tokens(self._parse_range, self.COMPARISON)
2697
2698    def _parse_range(self) -> t.Optional[exp.Expression]:
2699        this = self._parse_bitwise()
2700        negate = self._match(TokenType.NOT)
2701
2702        if self._match_set(self.RANGE_PARSERS):
2703            expression = self.RANGE_PARSERS[self._prev.token_type](self, this)
2704            if not expression:
2705                return this
2706
2707            this = expression
2708        elif self._match(TokenType.ISNULL):
2709            this = self.expression(exp.Is, this=this, expression=exp.Null())
2710
2711        # Postgres supports ISNULL and NOTNULL for conditions.
2712        # https://blog.andreiavram.ro/postgresql-null-composite-type/
2713        if self._match(TokenType.NOTNULL):
2714            this = self.expression(exp.Is, this=this, expression=exp.Null())
2715            this = self.expression(exp.Not, this=this)
2716
2717        if negate:
2718            this = self.expression(exp.Not, this=this)
2719
2720        if self._match(TokenType.IS):
2721            this = self._parse_is(this)
2722
2723        return this
2724
2725    def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2726        index = self._index - 1
2727        negate = self._match(TokenType.NOT)
2728        if self._match_text_seq("DISTINCT", "FROM"):
2729            klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ
2730            return self.expression(klass, this=this, expression=self._parse_expression())
2731
2732        expression = self._parse_null() or self._parse_boolean()
2733        if not expression:
2734            self._retreat(index)
2735            return None
2736
2737        this = self.expression(exp.Is, this=this, expression=expression)
2738        return self.expression(exp.Not, this=this) if negate else this
2739
2740    def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.Expression:
2741        unnest = self._parse_unnest()
2742        if unnest:
2743            this = self.expression(exp.In, this=this, unnest=unnest)
2744        elif self._match(TokenType.L_PAREN):
2745            expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias))
2746
2747            if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable):
2748                this = self.expression(exp.In, this=this, query=expressions[0])
2749            else:
2750                this = self.expression(exp.In, this=this, expressions=expressions)
2751
2752            self._match_r_paren(this)
2753        else:
2754            this = self.expression(exp.In, this=this, field=self._parse_field())
2755
2756        return this
2757
2758    def _parse_between(self, this: exp.Expression) -> exp.Expression:
2759        low = self._parse_bitwise()
2760        self._match(TokenType.AND)
2761        high = self._parse_bitwise()
2762        return self.expression(exp.Between, this=this, low=low, high=high)
2763
2764    def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2765        if not self._match(TokenType.ESCAPE):
2766            return this
2767        return self.expression(exp.Escape, this=this, expression=self._parse_string())
2768
2769    def _parse_interval(self) -> t.Optional[exp.Expression]:
2770        if not self._match(TokenType.INTERVAL):
2771            return None
2772
2773        this = self._parse_primary() or self._parse_term()
2774        unit = self._parse_function() or self._parse_var()
2775
2776        # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse
2777        # each INTERVAL expression into this canonical form so it's easy to transpile
2778        if this and isinstance(this, exp.Literal):
2779            if this.is_number:
2780                this = exp.Literal.string(this.name)
2781
2782            # Try to not clutter Snowflake's multi-part intervals like INTERVAL '1 day, 1 year'
2783            parts = this.name.split()
2784            if not unit and len(parts) <= 2:
2785                this = exp.Literal.string(seq_get(parts, 0))
2786                unit = self.expression(exp.Var, this=seq_get(parts, 1))
2787
2788        return self.expression(exp.Interval, this=this, unit=unit)
2789
2790    def _parse_bitwise(self) -> t.Optional[exp.Expression]:
2791        this = self._parse_term()
2792
2793        while True:
2794            if self._match_set(self.BITWISE):
2795                this = self.expression(
2796                    self.BITWISE[self._prev.token_type],
2797                    this=this,
2798                    expression=self._parse_term(),
2799                )
2800            elif self._match_pair(TokenType.LT, TokenType.LT):
2801                this = self.expression(
2802                    exp.BitwiseLeftShift, this=this, expression=self._parse_term()
2803                )
2804            elif self._match_pair(TokenType.GT, TokenType.GT):
2805                this = self.expression(
2806                    exp.BitwiseRightShift, this=this, expression=self._parse_term()
2807                )
2808            else:
2809                break
2810
2811        return this
2812
2813    def _parse_term(self) -> t.Optional[exp.Expression]:
2814        return self._parse_tokens(self._parse_factor, self.TERM)
2815
2816    def _parse_factor(self) -> t.Optional[exp.Expression]:
2817        return self._parse_tokens(self._parse_unary, self.FACTOR)
2818
2819    def _parse_unary(self) -> t.Optional[exp.Expression]:
2820        if self._match_set(self.UNARY_PARSERS):
2821            return self.UNARY_PARSERS[self._prev.token_type](self)
2822        return self._parse_at_time_zone(self._parse_type())
2823
2824    def _parse_type(self) -> t.Optional[exp.Expression]:
2825        interval = self._parse_interval()
2826        if interval:
2827            return interval
2828
2829        index = self._index
2830        data_type = self._parse_types(check_func=True)
2831        this = self._parse_column()
2832
2833        if data_type:
2834            if isinstance(this, exp.Literal):
2835                parser = self.TYPE_LITERAL_PARSERS.get(data_type.this)
2836                if parser:
2837                    return parser(self, this, data_type)
2838                return self.expression(exp.Cast, this=this, to=data_type)
2839            if not data_type.expressions:
2840                self._retreat(index)
2841                return self._parse_column()
2842            return data_type
2843
2844        return this
2845
2846    def _parse_type_size(self) -> t.Optional[exp.Expression]:
2847        this = self._parse_type()
2848        if not this:
2849            return None
2850
2851        return self.expression(
2852            exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True)
2853        )
2854
2855    def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]:
2856        index = self._index
2857
2858        prefix = self._match_text_seq("SYSUDTLIB", ".")
2859
2860        if not self._match_set(self.TYPE_TOKENS):
2861            return None
2862
2863        type_token = self._prev.token_type
2864
2865        if type_token == TokenType.PSEUDO_TYPE:
2866            return self.expression(exp.PseudoType, this=self._prev.text)
2867
2868        nested = type_token in self.NESTED_TYPE_TOKENS
2869        is_struct = type_token == TokenType.STRUCT
2870        expressions = None
2871        maybe_func = False
2872
2873        if self._match(TokenType.L_PAREN):
2874            if is_struct:
2875                expressions = self._parse_csv(self._parse_struct_types)
2876            elif nested:
2877                expressions = self._parse_csv(self._parse_types)
2878            else:
2879                expressions = self._parse_csv(self._parse_type_size)
2880
2881            if not expressions or not self._match(TokenType.R_PAREN):
2882                self._retreat(index)
2883                return None
2884
2885            maybe_func = True
2886
2887        if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2888            this = exp.DataType(
2889                this=exp.DataType.Type.ARRAY,
2890                expressions=[exp.DataType.build(type_token.value, expressions=expressions)],
2891                nested=True,
2892            )
2893
2894            while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2895                this = exp.DataType(
2896                    this=exp.DataType.Type.ARRAY,
2897                    expressions=[this],
2898                    nested=True,
2899                )
2900
2901            return this
2902
2903        if self._match(TokenType.L_BRACKET):
2904            self._retreat(index)
2905            return None
2906
2907        values: t.Optional[t.List[t.Optional[exp.Expression]]] = None
2908        if nested and self._match(TokenType.LT):
2909            if is_struct:
2910                expressions = self._parse_csv(self._parse_struct_types)
2911            else:
2912                expressions = self._parse_csv(self._parse_types)
2913
2914            if not self._match(TokenType.GT):
2915                self.raise_error("Expecting >")
2916
2917            if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)):
2918                values = self._parse_csv(self._parse_conjunction)
2919                self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN))
2920
2921        value: t.Optional[exp.Expression] = None
2922        if type_token in self.TIMESTAMPS:
2923            if self._match_text_seq("WITH", "TIME", "ZONE") or type_token == TokenType.TIMESTAMPTZ:
2924                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions)
2925            elif (
2926                self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE")
2927                or type_token == TokenType.TIMESTAMPLTZ
2928            ):
2929                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions)
2930            elif self._match_text_seq("WITHOUT", "TIME", "ZONE"):
2931                if type_token == TokenType.TIME:
2932                    value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions)
2933                else:
2934                    value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2935
2936            maybe_func = maybe_func and value is None
2937
2938            if value is None:
2939                value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2940        elif type_token == TokenType.INTERVAL:
2941            unit = self._parse_var()
2942
2943            if not unit:
2944                value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL)
2945            else:
2946                value = self.expression(exp.Interval, unit=unit)
2947
2948        if maybe_func and check_func:
2949            index2 = self._index
2950            peek = self._parse_string()
2951
2952            if not peek:
2953                self._retreat(index)
2954                return None
2955
2956            self._retreat(index2)
2957
2958        if value:
2959            return value
2960
2961        return exp.DataType(
2962            this=exp.DataType.Type[type_token.value.upper()],
2963            expressions=expressions,
2964            nested=nested,
2965            values=values,
2966            prefix=prefix,
2967        )
2968
2969    def _parse_struct_types(self) -> t.Optional[exp.Expression]:
2970        this = self._parse_type() or self._parse_id_var()
2971        self._match(TokenType.COLON)
2972        return self._parse_column_def(this)
2973
2974    def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2975        if not self._match_text_seq("AT", "TIME", "ZONE"):
2976            return this
2977        return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary())
2978
2979    def _parse_column(self) -> t.Optional[exp.Expression]:
2980        this = self._parse_field()
2981        if isinstance(this, exp.Identifier):
2982            this = self.expression(exp.Column, this=this)
2983        elif not this:
2984            return self._parse_bracket(this)
2985        this = self._parse_bracket(this)
2986
2987        while self._match_set(self.COLUMN_OPERATORS):
2988            op_token = self._prev.token_type
2989            op = self.COLUMN_OPERATORS.get(op_token)
2990
2991            if op_token == TokenType.DCOLON:
2992                field = self._parse_types()
2993                if not field:
2994                    self.raise_error("Expected type")
2995            elif op and self._curr:
2996                self._advance()
2997                value = self._prev.text
2998                field = (
2999                    exp.Literal.number(value)
3000                    if self._prev.token_type == TokenType.NUMBER
3001                    else exp.Literal.string(value)
3002                )
3003            else:
3004                field = (
3005                    self._parse_star()
3006                    or self._parse_function(anonymous=True)
3007                    or self._parse_id_var()
3008                )
3009
3010            if isinstance(field, exp.Func):
3011                # bigquery allows function calls like x.y.count(...)
3012                # SAFE.SUBSTR(...)
3013                # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
3014                this = self._replace_columns_with_dots(this)
3015
3016            if op:
3017                this = op(self, this, field)
3018            elif isinstance(this, exp.Column) and not this.args.get("catalog"):
3019                this = self.expression(
3020                    exp.Column,
3021                    this=field,
3022                    table=this.this,
3023                    db=this.args.get("table"),
3024                    catalog=this.args.get("db"),
3025                )
3026            else:
3027                this = self.expression(exp.Dot, this=this, expression=field)
3028            this = self._parse_bracket(this)
3029
3030        return this
3031
3032    def _parse_primary(self) -> t.Optional[exp.Expression]:
3033        if self._match_set(self.PRIMARY_PARSERS):
3034            token_type = self._prev.token_type
3035            primary = self.PRIMARY_PARSERS[token_type](self, self._prev)
3036
3037            if token_type == TokenType.STRING:
3038                expressions = [primary]
3039                while self._match(TokenType.STRING):
3040                    expressions.append(exp.Literal.string(self._prev.text))
3041                if len(expressions) > 1:
3042                    return self.expression(exp.Concat, expressions=expressions)
3043            return primary
3044
3045        if self._match_pair(TokenType.DOT, TokenType.NUMBER):
3046            return exp.Literal.number(f"0.{self._prev.text}")
3047
3048        if self._match(TokenType.L_PAREN):
3049            comments = self._prev_comments
3050            query = self._parse_select()
3051
3052            if query:
3053                expressions = [query]
3054            else:
3055                expressions = self._parse_csv(self._parse_expression)
3056
3057            this = self._parse_query_modifiers(seq_get(expressions, 0))
3058
3059            if isinstance(this, exp.Subqueryable):
3060                this = self._parse_set_operations(
3061                    self._parse_subquery(this=this, parse_alias=False)
3062                )
3063            elif len(expressions) > 1:
3064                this = self.expression(exp.Tuple, expressions=expressions)
3065            else:
3066                this = self.expression(exp.Paren, this=self._parse_set_operations(this))
3067
3068            if this:
3069                this.add_comments(comments)
3070            self._match_r_paren(expression=this)
3071
3072            return this
3073
3074        return None
3075
3076    def _parse_field(
3077        self,
3078        any_token: bool = False,
3079        tokens: t.Optional[t.Collection[TokenType]] = None,
3080    ) -> t.Optional[exp.Expression]:
3081        return (
3082            self._parse_primary()
3083            or self._parse_function()
3084            or self._parse_id_var(any_token=any_token, tokens=tokens)
3085        )
3086
3087    def _parse_function(
3088        self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False
3089    ) -> t.Optional[exp.Expression]:
3090        if not self._curr:
3091            return None
3092
3093        token_type = self._curr.token_type
3094
3095        if self._match_set(self.NO_PAREN_FUNCTION_PARSERS):
3096            return self.NO_PAREN_FUNCTION_PARSERS[token_type](self)
3097
3098        if not self._next or self._next.token_type != TokenType.L_PAREN:
3099            if token_type in self.NO_PAREN_FUNCTIONS:
3100                self._advance()
3101                return self.expression(self.NO_PAREN_FUNCTIONS[token_type])
3102
3103            return None
3104
3105        if token_type not in self.FUNC_TOKENS:
3106            return None
3107
3108        this = self._curr.text
3109        upper = this.upper()
3110        self._advance(2)
3111
3112        parser = self.FUNCTION_PARSERS.get(upper)
3113
3114        if parser and not anonymous:
3115            this = parser(self)
3116        else:
3117            subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
3118
3119            if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH):
3120                this = self.expression(subquery_predicate, this=self._parse_select())
3121                self._match_r_paren()
3122                return this
3123
3124            if functions is None:
3125                functions = self.FUNCTIONS
3126
3127            function = functions.get(upper)
3128
3129            alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS
3130            args = self._parse_csv(lambda: self._parse_lambda(alias=alias))
3131
3132            if function and not anonymous:
3133                this = function(args)
3134                self.validate_expression(this, args)
3135            else:
3136                this = self.expression(exp.Anonymous, this=this, expressions=args)
3137
3138        self._match_r_paren(this)
3139        return self._parse_window(this)
3140
3141    def _parse_function_parameter(self) -> t.Optional[exp.Expression]:
3142        return self._parse_column_def(self._parse_id_var())
3143
3144    def _parse_user_defined_function(
3145        self, kind: t.Optional[TokenType] = None
3146    ) -> t.Optional[exp.Expression]:
3147        this = self._parse_id_var()
3148
3149        while self._match(TokenType.DOT):
3150            this = self.expression(exp.Dot, this=this, expression=self._parse_id_var())
3151
3152        if not self._match(TokenType.L_PAREN):
3153            return this
3154
3155        expressions = self._parse_csv(self._parse_function_parameter)
3156        self._match_r_paren()
3157        return self.expression(
3158            exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True
3159        )
3160
3161    def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]:
3162        literal = self._parse_primary()
3163        if literal:
3164            return self.expression(exp.Introducer, this=token.text, expression=literal)
3165
3166        return self.expression(exp.Identifier, this=token.text)
3167
3168    def _parse_session_parameter(self) -> exp.Expression:
3169        kind = None
3170        this = self._parse_id_var() or self._parse_primary()
3171
3172        if this and self._match(TokenType.DOT):
3173            kind = this.name
3174            this = self._parse_var() or self._parse_primary()
3175
3176        return self.expression(exp.SessionParameter, this=this, kind=kind)
3177
3178    def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]:
3179        index = self._index
3180
3181        if self._match(TokenType.L_PAREN):
3182            expressions = self._parse_csv(self._parse_id_var)
3183
3184            if not self._match(TokenType.R_PAREN):
3185                self._retreat(index)
3186        else:
3187            expressions = [self._parse_id_var()]
3188
3189        if self._match_set(self.LAMBDAS):
3190            return self.LAMBDAS[self._prev.token_type](self, expressions)
3191
3192        self._retreat(index)
3193
3194        this: t.Optional[exp.Expression]
3195
3196        if self._match(TokenType.DISTINCT):
3197            this = self.expression(
3198                exp.Distinct, expressions=self._parse_csv(self._parse_conjunction)
3199            )
3200        else:
3201            this = self._parse_select_or_expression(alias=alias)
3202
3203            if isinstance(this, exp.EQ):
3204                left = this.this
3205                if isinstance(left, exp.Column):
3206                    left.replace(exp.Var(this=left.text("this")))
3207
3208        return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this)))
3209
3210    def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
3211        index = self._index
3212
3213        try:
3214            if self._parse_select(nested=True):
3215                return this
3216        except Exception:
3217            pass
3218        finally:
3219            self._retreat(index)
3220
3221        if not self._match(TokenType.L_PAREN):
3222            return this
3223
3224        args = self._parse_csv(
3225            lambda: self._parse_constraint()
3226            or self._parse_column_def(self._parse_field(any_token=True))
3227        )
3228        self._match_r_paren()
3229        return self.expression(exp.Schema, this=this, expressions=args)
3230
3231    def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3232        # column defs are not really columns, they're identifiers
3233        if isinstance(this, exp.Column):
3234            this = this.this
3235        kind = self._parse_types()
3236
3237        if self._match_text_seq("FOR", "ORDINALITY"):
3238            return self.expression(exp.ColumnDef, this=this, ordinality=True)
3239
3240        constraints = []
3241        while True:
3242            constraint = self._parse_column_constraint()
3243            if not constraint:
3244                break
3245            constraints.append(constraint)
3246
3247        if not kind and not constraints:
3248            return this
3249
3250        return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints)
3251
3252    def _parse_auto_increment(self) -> exp.Expression:
3253        start = None
3254        increment = None
3255
3256        if self._match(TokenType.L_PAREN, advance=False):
3257            args = self._parse_wrapped_csv(self._parse_bitwise)
3258            start = seq_get(args, 0)
3259            increment = seq_get(args, 1)
3260        elif self._match_text_seq("START"):
3261            start = self._parse_bitwise()
3262            self._match_text_seq("INCREMENT")
3263            increment = self._parse_bitwise()
3264
3265        if start and increment:
3266            return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment)
3267
3268        return exp.AutoIncrementColumnConstraint()
3269
3270    def _parse_compress(self) -> exp.Expression:
3271        if self._match(TokenType.L_PAREN, advance=False):
3272            return self.expression(
3273                exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise)
3274            )
3275
3276        return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise())
3277
3278    def _parse_generated_as_identity(self) -> exp.Expression:
3279        if self._match_text_seq("BY", "DEFAULT"):
3280            on_null = self._match_pair(TokenType.ON, TokenType.NULL)
3281            this = self.expression(
3282                exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null
3283            )
3284        else:
3285            self._match_text_seq("ALWAYS")
3286            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True)
3287
3288        self._match(TokenType.ALIAS)
3289        identity = self._match_text_seq("IDENTITY")
3290
3291        if self._match(TokenType.L_PAREN):
3292            if self._match_text_seq("START", "WITH"):
3293                this.set("start", self._parse_bitwise())
3294            if self._match_text_seq("INCREMENT", "BY"):
3295                this.set("increment", self._parse_bitwise())
3296            if self._match_text_seq("MINVALUE"):
3297                this.set("minvalue", self._parse_bitwise())
3298            if self._match_text_seq("MAXVALUE"):
3299                this.set("maxvalue", self._parse_bitwise())
3300
3301            if self._match_text_seq("CYCLE"):
3302                this.set("cycle", True)
3303            elif self._match_text_seq("NO", "CYCLE"):
3304                this.set("cycle", False)
3305
3306            if not identity:
3307                this.set("expression", self._parse_bitwise())
3308
3309            self._match_r_paren()
3310
3311        return this
3312
3313    def _parse_inline(self) -> t.Optional[exp.Expression]:
3314        self._match_text_seq("LENGTH")
3315        return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise())
3316
3317    def _parse_not_constraint(self) -> t.Optional[exp.Expression]:
3318        if self._match_text_seq("NULL"):
3319            return self.expression(exp.NotNullColumnConstraint)
3320        if self._match_text_seq("CASESPECIFIC"):
3321            return self.expression(exp.CaseSpecificColumnConstraint, not_=True)
3322        return None
3323
3324    def _parse_column_constraint(self) -> t.Optional[exp.Expression]:
3325        if self._match(TokenType.CONSTRAINT):
3326            this = self._parse_id_var()
3327        else:
3328            this = None
3329
3330        if self._match_texts(self.CONSTRAINT_PARSERS):
3331            return self.expression(
3332                exp.ColumnConstraint,
3333                this=this,
3334                kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self),
3335            )
3336
3337        return this
3338
3339    def _parse_constraint(self) -> t.Optional[exp.Expression]:
3340        if not self._match(TokenType.CONSTRAINT):
3341            return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS)
3342
3343        this = self._parse_id_var()
3344        expressions = []
3345
3346        while True:
3347            constraint = self._parse_unnamed_constraint() or self._parse_function()
3348            if not constraint:
3349                break
3350            expressions.append(constraint)
3351
3352        return self.expression(exp.Constraint, this=this, expressions=expressions)
3353
3354    def _parse_unnamed_constraint(
3355        self, constraints: t.Optional[t.Collection[str]] = None
3356    ) -> t.Optional[exp.Expression]:
3357        if not self._match_texts(constraints or self.CONSTRAINT_PARSERS):
3358            return None
3359
3360        constraint = self._prev.text.upper()
3361        if constraint not in self.CONSTRAINT_PARSERS:
3362            self.raise_error(f"No parser found for schema constraint {constraint}.")
3363
3364        return self.CONSTRAINT_PARSERS[constraint](self)
3365
3366    def _parse_unique(self) -> exp.Expression:
3367        if not self._match(TokenType.L_PAREN, advance=False):
3368            return self.expression(exp.UniqueColumnConstraint)
3369        return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars())
3370
3371    def _parse_key_constraint_options(self) -> t.List[str]:
3372        options = []
3373        while True:
3374            if not self._curr:
3375                break
3376
3377            if self._match(TokenType.ON):
3378                action = None
3379                on = self._advance_any() and self._prev.text
3380
3381                if self._match_text_seq("NO", "ACTION"):
3382                    action = "NO ACTION"
3383                elif self._match_text_seq("CASCADE"):
3384                    action = "CASCADE"
3385                elif self._match_pair(TokenType.SET, TokenType.NULL):
3386                    action = "SET NULL"
3387                elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
3388                    action = "SET DEFAULT"
3389                else:
3390                    self.raise_error("Invalid key constraint")
3391
3392                options.append(f"ON {on} {action}")
3393            elif self._match_text_seq("NOT", "ENFORCED"):
3394                options.append("NOT ENFORCED")
3395            elif self._match_text_seq("DEFERRABLE"):
3396                options.append("DEFERRABLE")
3397            elif self._match_text_seq("INITIALLY", "DEFERRED"):
3398                options.append("INITIALLY DEFERRED")
3399            elif self._match_text_seq("NORELY"):
3400                options.append("NORELY")
3401            elif self._match_text_seq("MATCH", "FULL"):
3402                options.append("MATCH FULL")
3403            else:
3404                break
3405
3406        return options
3407
3408    def _parse_references(self, match: bool = True) -> t.Optional[exp.Expression]:
3409        if match and not self._match(TokenType.REFERENCES):
3410            return None
3411
3412        expressions = None
3413        this = self._parse_id_var()
3414
3415        if self._match(TokenType.L_PAREN, advance=False):
3416            expressions = self._parse_wrapped_id_vars()
3417
3418        options = self._parse_key_constraint_options()
3419        return self.expression(exp.Reference, this=this, expressions=expressions, options=options)
3420
3421    def _parse_foreign_key(self) -> exp.Expression:
3422        expressions = self._parse_wrapped_id_vars()
3423        reference = self._parse_references()
3424        options = {}
3425
3426        while self._match(TokenType.ON):
3427            if not self._match_set((TokenType.DELETE, TokenType.UPDATE)):
3428                self.raise_error("Expected DELETE or UPDATE")
3429
3430            kind = self._prev.text.lower()
3431
3432            if self._match_text_seq("NO", "ACTION"):
3433                action = "NO ACTION"
3434            elif self._match(TokenType.SET):
3435                self._match_set((TokenType.NULL, TokenType.DEFAULT))
3436                action = "SET " + self._prev.text.upper()
3437            else:
3438                self._advance()
3439                action = self._prev.text.upper()
3440
3441            options[kind] = action
3442
3443        return self.expression(
3444            exp.ForeignKey, expressions=expressions, reference=reference, **options  # type: ignore
3445        )
3446
3447    def _parse_primary_key(self) -> exp.Expression:
3448        desc = (
3449            self._match_set((TokenType.ASC, TokenType.DESC))
3450            and self._prev.token_type == TokenType.DESC
3451        )
3452
3453        if not self._match(TokenType.L_PAREN, advance=False):
3454            return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc)
3455
3456        expressions = self._parse_wrapped_csv(self._parse_field)
3457        options = self._parse_key_constraint_options()
3458        return self.expression(exp.PrimaryKey, expressions=expressions, options=options)
3459
3460    def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3461        if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)):
3462            return this
3463
3464        bracket_kind = self._prev.token_type
3465        expressions: t.List[t.Optional[exp.Expression]]
3466
3467        if self._match(TokenType.COLON):
3468            expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())]
3469        else:
3470            expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction()))
3471
3472        # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
3473        if bracket_kind == TokenType.L_BRACE:
3474            this = self.expression(exp.Struct, expressions=expressions)
3475        elif not this or this.name.upper() == "ARRAY":
3476            this = self.expression(exp.Array, expressions=expressions)
3477        else:
3478            expressions = apply_index_offset(this, expressions, -self.index_offset)
3479            this = self.expression(exp.Bracket, this=this, expressions=expressions)
3480
3481        if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET:
3482            self.raise_error("Expected ]")
3483        elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE:
3484            self.raise_error("Expected }")
3485
3486        self._add_comments(this)
3487        return self._parse_bracket(this)
3488
3489    def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3490        if self._match(TokenType.COLON):
3491            return self.expression(exp.Slice, this=this, expression=self._parse_conjunction())
3492        return this
3493
3494    def _parse_case(self) -> t.Optional[exp.Expression]:
3495        ifs = []
3496        default = None
3497
3498        expression = self._parse_conjunction()
3499
3500        while self._match(TokenType.WHEN):
3501            this = self._parse_conjunction()
3502            self._match(TokenType.THEN)
3503            then = self._parse_conjunction()
3504            ifs.append(self.expression(exp.If, this=this, true=then))
3505
3506        if self._match(TokenType.ELSE):
3507            default = self._parse_conjunction()
3508
3509        if not self._match(TokenType.END):
3510            self.raise_error("Expected END after CASE", self._prev)
3511
3512        return self._parse_window(
3513            self.expression(exp.Case, this=expression, ifs=ifs, default=default)
3514        )
3515
3516    def _parse_if(self) -> t.Optional[exp.Expression]:
3517        if self._match(TokenType.L_PAREN):
3518            args = self._parse_csv(self._parse_conjunction)
3519            this = exp.If.from_arg_list(args)
3520            self.validate_expression(this, args)
3521            self._match_r_paren()
3522        else:
3523            index = self._index - 1
3524            condition = self._parse_conjunction()
3525
3526            if not condition:
3527                self._retreat(index)
3528                return None
3529
3530            self._match(TokenType.THEN)
3531            true = self._parse_conjunction()
3532            false = self._parse_conjunction() if self._match(TokenType.ELSE) else None
3533            self._match(TokenType.END)
3534            this = self.expression(exp.If, this=condition, true=true, false=false)
3535
3536        return self._parse_window(this)
3537
3538    def _parse_extract(self) -> exp.Expression:
3539        this = self._parse_function() or self._parse_var() or self._parse_type()
3540
3541        if self._match(TokenType.FROM):
3542            return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3543
3544        if not self._match(TokenType.COMMA):
3545            self.raise_error("Expected FROM or comma after EXTRACT", self._prev)
3546
3547        return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3548
3549    def _parse_cast(self, strict: bool) -> exp.Expression:
3550        this = self._parse_conjunction()
3551
3552        if not self._match(TokenType.ALIAS):
3553            if self._match(TokenType.COMMA):
3554                return self.expression(
3555                    exp.CastToStrType, this=this, expression=self._parse_string()
3556                )
3557            else:
3558                self.raise_error("Expected AS after CAST")
3559
3560        to = self._parse_types()
3561
3562        if not to:
3563            self.raise_error("Expected TYPE after CAST")
3564        elif to.this == exp.DataType.Type.CHAR:
3565            if self._match(TokenType.CHARACTER_SET):
3566                to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())
3567
3568        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3569
3570    def _parse_string_agg(self) -> exp.Expression:
3571        expression: t.Optional[exp.Expression]
3572
3573        if self._match(TokenType.DISTINCT):
3574            args = self._parse_csv(self._parse_conjunction)
3575            expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)])
3576        else:
3577            args = self._parse_csv(self._parse_conjunction)
3578            expression = seq_get(args, 0)
3579
3580        index = self._index
3581        if not self._match(TokenType.R_PAREN):
3582            # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]])
3583            order = self._parse_order(this=expression)
3584            return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3585
3586        # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]).
3587        # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that
3588        # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them.
3589        if not self._match_text_seq("WITHIN", "GROUP"):
3590            self._retreat(index)
3591            this = exp.GroupConcat.from_arg_list(args)
3592            self.validate_expression(this, args)
3593            return this
3594
3595        self._match_l_paren()  # The corresponding match_r_paren will be called in parse_function (caller)
3596        order = self._parse_order(this=expression)
3597        return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3598
3599    def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]:
3600        to: t.Optional[exp.Expression]
3601        this = self._parse_bitwise()
3602
3603        if self._match(TokenType.USING):
3604            to = self.expression(exp.CharacterSet, this=self._parse_var())
3605        elif self._match(TokenType.COMMA):
3606            to = self._parse_bitwise()
3607        else:
3608            to = None
3609
3610        # Swap the argument order if needed to produce the correct AST
3611        if self.CONVERT_TYPE_FIRST:
3612            this, to = to, this
3613
3614        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3615
3616    def _parse_decode(self) -> t.Optional[exp.Expression]:
3617        """
3618        There are generally two variants of the DECODE function:
3619
3620        - DECODE(bin, charset)
3621        - DECODE(expression, search, result [, search, result] ... [, default])
3622
3623        The second variant will always be parsed into a CASE expression. Note that NULL
3624        needs special treatment, since we need to explicitly check for it with `IS NULL`,
3625        instead of relying on pattern matching.
3626        """
3627        args = self._parse_csv(self._parse_conjunction)
3628
3629        if len(args) < 3:
3630            return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1))
3631
3632        expression, *expressions = args
3633        if not expression:
3634            return None
3635
3636        ifs = []
3637        for search, result in zip(expressions[::2], expressions[1::2]):
3638            if not search or not result:
3639                return None
3640
3641            if isinstance(search, exp.Literal):
3642                ifs.append(
3643                    exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result)
3644                )
3645            elif isinstance(search, exp.Null):
3646                ifs.append(
3647                    exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result)
3648                )
3649            else:
3650                cond = exp.or_(
3651                    exp.EQ(this=expression.copy(), expression=search),
3652                    exp.and_(
3653                        exp.Is(this=expression.copy(), expression=exp.Null()),
3654                        exp.Is(this=search.copy(), expression=exp.Null()),
3655                        copy=False,
3656                    ),
3657                    copy=False,
3658                )
3659                ifs.append(exp.If(this=cond, true=result))
3660
3661        return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None)
3662
3663    def _parse_json_key_value(self) -> t.Optional[exp.Expression]:
3664        self._match_text_seq("KEY")
3665        key = self._parse_field()
3666        self._match(TokenType.COLON)
3667        self._match_text_seq("VALUE")
3668        value = self._parse_field()
3669        if not key and not value:
3670            return None
3671        return self.expression(exp.JSONKeyValue, this=key, expression=value)
3672
3673    def _parse_json_object(self) -> exp.Expression:
3674        expressions = self._parse_csv(self._parse_json_key_value)
3675
3676        null_handling = None
3677        if self._match_text_seq("NULL", "ON", "NULL"):
3678            null_handling = "NULL ON NULL"
3679        elif self._match_text_seq("ABSENT", "ON", "NULL"):
3680            null_handling = "ABSENT ON NULL"
3681
3682        unique_keys = None
3683        if self._match_text_seq("WITH", "UNIQUE"):
3684            unique_keys = True
3685        elif self._match_text_seq("WITHOUT", "UNIQUE"):
3686            unique_keys = False
3687
3688        self._match_text_seq("KEYS")
3689
3690        return_type = self._match_text_seq("RETURNING") and self._parse_type()
3691        format_json = self._match_text_seq("FORMAT", "JSON")
3692        encoding = self._match_text_seq("ENCODING") and self._parse_var()
3693
3694        return self.expression(
3695            exp.JSONObject,
3696            expressions=expressions,
3697            null_handling=null_handling,
3698            unique_keys=unique_keys,
3699            return_type=return_type,
3700            format_json=format_json,
3701            encoding=encoding,
3702        )
3703
3704    def _parse_logarithm(self) -> exp.Expression:
3705        # Default argument order is base, expression
3706        args = self._parse_csv(self._parse_range)
3707
3708        if len(args) > 1:
3709            if not self.LOG_BASE_FIRST:
3710                args.reverse()
3711            return exp.Log.from_arg_list(args)
3712
3713        return self.expression(
3714            exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0)
3715        )
3716
3717    def _parse_match_against(self) -> exp.Expression:
3718        expressions = self._parse_csv(self._parse_column)
3719
3720        self._match_text_seq(")", "AGAINST", "(")
3721
3722        this = self._parse_string()
3723
3724        if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"):
3725            modifier = "IN NATURAL LANGUAGE MODE"
3726            if self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3727                modifier = f"{modifier} WITH QUERY EXPANSION"
3728        elif self._match_text_seq("IN", "BOOLEAN", "MODE"):
3729            modifier = "IN BOOLEAN MODE"
3730        elif self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3731            modifier = "WITH QUERY EXPANSION"
3732        else:
3733            modifier = None
3734
3735        return self.expression(
3736            exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier
3737        )
3738
3739    # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16
3740    def _parse_open_json(self) -> exp.Expression:
3741        this = self._parse_bitwise()
3742        path = self._match(TokenType.COMMA) and self._parse_string()
3743
3744        def _parse_open_json_column_def() -> exp.Expression:
3745            this = self._parse_field(any_token=True)
3746            kind = self._parse_types()
3747            path = self._parse_string()
3748            as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON)
3749            return self.expression(
3750                exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json
3751            )
3752
3753        expressions = None
3754        if self._match_pair(TokenType.R_PAREN, TokenType.WITH):
3755            self._match_l_paren()
3756            expressions = self._parse_csv(_parse_open_json_column_def)
3757
3758        return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions)
3759
3760    def _parse_position(self, haystack_first: bool = False) -> exp.Expression:
3761        args = self._parse_csv(self._parse_bitwise)
3762
3763        if self._match(TokenType.IN):
3764            return self.expression(
3765                exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0)
3766            )
3767
3768        if haystack_first:
3769            haystack = seq_get(args, 0)
3770            needle = seq_get(args, 1)
3771        else:
3772            needle = seq_get(args, 0)
3773            haystack = seq_get(args, 1)
3774
3775        this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2))
3776
3777        self.validate_expression(this, args)
3778
3779        return this
3780
3781    def _parse_join_hint(self, func_name: str) -> exp.Expression:
3782        args = self._parse_csv(self._parse_table)
3783        return exp.JoinHint(this=func_name.upper(), expressions=args)
3784
3785    def _parse_substring(self) -> exp.Expression:
3786        # Postgres supports the form: substring(string [from int] [for int])
3787        # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
3788
3789        args = self._parse_csv(self._parse_bitwise)
3790
3791        if self._match(TokenType.FROM):
3792            args.append(self._parse_bitwise())
3793            if self._match(TokenType.FOR):
3794                args.append(self._parse_bitwise())
3795
3796        this = exp.Substring.from_arg_list(args)
3797        self.validate_expression(this, args)
3798
3799        return this
3800
3801    def _parse_trim(self) -> exp.Expression:
3802        # https://www.w3resource.com/sql/character-functions/trim.php
3803        # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html
3804
3805        position = None
3806        collation = None
3807
3808        if self._match_texts(self.TRIM_TYPES):
3809            position = self._prev.text.upper()
3810
3811        expression = self._parse_bitwise()
3812        if self._match_set((TokenType.FROM, TokenType.COMMA)):
3813            this = self._parse_bitwise()
3814        else:
3815            this = expression
3816            expression = None
3817
3818        if self._match(TokenType.COLLATE):
3819            collation = self._parse_bitwise()
3820
3821        return self.expression(
3822            exp.Trim,
3823            this=this,
3824            position=position,
3825            expression=expression,
3826            collation=collation,
3827        )
3828
3829    def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3830        return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window)
3831
3832    def _parse_named_window(self) -> t.Optional[exp.Expression]:
3833        return self._parse_window(self._parse_id_var(), alias=True)
3834
3835    def _parse_respect_or_ignore_nulls(
3836        self, this: t.Optional[exp.Expression]
3837    ) -> t.Optional[exp.Expression]:
3838        if self._match_text_seq("IGNORE", "NULLS"):
3839            return self.expression(exp.IgnoreNulls, this=this)
3840        if self._match_text_seq("RESPECT", "NULLS"):
3841            return self.expression(exp.RespectNulls, this=this)
3842        return this
3843
3844    def _parse_window(
3845        self, this: t.Optional[exp.Expression], alias: bool = False
3846    ) -> t.Optional[exp.Expression]:
3847        if self._match_pair(TokenType.FILTER, TokenType.L_PAREN):
3848            this = self.expression(exp.Filter, this=this, expression=self._parse_where())
3849            self._match_r_paren()
3850
3851        # T-SQL allows the OVER (...) syntax after WITHIN GROUP.
3852        # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
3853        if self._match_text_seq("WITHIN", "GROUP"):
3854            order = self._parse_wrapped(self._parse_order)
3855            this = self.expression(exp.WithinGroup, this=this, expression=order)
3856
3857        # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
3858        # Some dialects choose to implement and some do not.
3859        # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
3860
3861        # There is some code above in _parse_lambda that handles
3862        #   SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ...
3863
3864        # The below changes handle
3865        #   SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ...
3866
3867        # Oracle allows both formats
3868        #   (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html)
3869        #   and Snowflake chose to do the same for familiarity
3870        #   https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes
3871        this = self._parse_respect_or_ignore_nulls(this)
3872
3873        # bigquery select from window x AS (partition by ...)
3874        if alias:
3875            over = None
3876            self._match(TokenType.ALIAS)
3877        elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS):
3878            return this
3879        else:
3880            over = self._prev.text.upper()
3881
3882        if not self._match(TokenType.L_PAREN):
3883            return self.expression(
3884                exp.Window, this=this, alias=self._parse_id_var(False), over=over
3885            )
3886
3887        window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
3888
3889        first = self._match(TokenType.FIRST)
3890        if self._match_text_seq("LAST"):
3891            first = False
3892
3893        partition = self._parse_partition_by()
3894        order = self._parse_order()
3895        kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text
3896
3897        if kind:
3898            self._match(TokenType.BETWEEN)
3899            start = self._parse_window_spec()
3900            self._match(TokenType.AND)
3901            end = self._parse_window_spec()
3902
3903            spec = self.expression(
3904                exp.WindowSpec,
3905                kind=kind,
3906                start=start["value"],
3907                start_side=start["side"],
3908                end=end["value"],
3909                end_side=end["side"],
3910            )
3911        else:
3912            spec = None
3913
3914        self._match_r_paren()
3915
3916        return self.expression(
3917            exp.Window,
3918            this=this,
3919            partition_by=partition,
3920            order=order,
3921            spec=spec,
3922            alias=window_alias,
3923            over=over,
3924            first=first,
3925        )
3926
3927    def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]:
3928        self._match(TokenType.BETWEEN)
3929
3930        return {
3931            "value": (
3932                (self._match_text_seq("UNBOUNDED") and "UNBOUNDED")
3933                or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW")
3934                or self._parse_bitwise()
3935            ),
3936            "side": self._match_texts(("PRECEDING", "FOLLOWING")) and self._prev.text,
3937        }
3938
3939    def _parse_alias(
3940        self, this: t.Optional[exp.Expression], explicit: bool = False
3941    ) -> t.Optional[exp.Expression]:
3942        any_token = self._match(TokenType.ALIAS)
3943
3944        if explicit and not any_token:
3945            return this
3946
3947        if self._match(TokenType.L_PAREN):
3948            aliases = self.expression(
3949                exp.Aliases,
3950                this=this,
3951                expressions=self._parse_csv(lambda: self._parse_id_var(any_token)),
3952            )
3953            self._match_r_paren(aliases)
3954            return aliases
3955
3956        alias = self._parse_id_var(any_token)
3957
3958        if alias:
3959            return self.expression(exp.Alias, this=this, alias=alias)
3960
3961        return this
3962
3963    def _parse_id_var(
3964        self,
3965        any_token: bool = True,
3966        tokens: t.Optional[t.Collection[TokenType]] = None,
3967        prefix_tokens: t.Optional[t.Collection[TokenType]] = None,
3968    ) -> t.Optional[exp.Expression]:
3969        identifier = self._parse_identifier()
3970
3971        if identifier:
3972            return identifier
3973
3974        prefix = ""
3975
3976        if prefix_tokens:
3977            while self._match_set(prefix_tokens):
3978                prefix += self._prev.text
3979
3980        if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS):
3981            quoted = self._prev.token_type == TokenType.STRING
3982            return exp.Identifier(this=prefix + self._prev.text, quoted=quoted)
3983
3984        return None
3985
3986    def _parse_string(self) -> t.Optional[exp.Expression]:
3987        if self._match(TokenType.STRING):
3988            return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev)
3989        return self._parse_placeholder()
3990
3991    def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]:
3992        return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True)
3993
3994    def _parse_number(self) -> t.Optional[exp.Expression]:
3995        if self._match(TokenType.NUMBER):
3996            return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev)
3997        return self._parse_placeholder()
3998
3999    def _parse_identifier(self) -> t.Optional[exp.Expression]:
4000        if self._match(TokenType.IDENTIFIER):
4001            return self.expression(exp.Identifier, this=self._prev.text, quoted=True)
4002        return self._parse_placeholder()
4003
4004    def _parse_var(
4005        self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None
4006    ) -> t.Optional[exp.Expression]:
4007        if (
4008            (any_token and self._advance_any())
4009            or self._match(TokenType.VAR)
4010            or (self._match_set(tokens) if tokens else False)
4011        ):
4012            return self.expression(exp.Var, this=self._prev.text)
4013        return self._parse_placeholder()
4014
4015    def _advance_any(self) -> t.Optional[Token]:
4016        if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS:
4017            self._advance()
4018            return self._prev
4019        return None
4020
4021    def _parse_var_or_string(self) -> t.Optional[exp.Expression]:
4022        return self._parse_var() or self._parse_string()
4023
4024    def _parse_null(self) -> t.Optional[exp.Expression]:
4025        if self._match(TokenType.NULL):
4026            return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev)
4027        return None
4028
4029    def _parse_boolean(self) -> t.Optional[exp.Expression]:
4030        if self._match(TokenType.TRUE):
4031            return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev)
4032        if self._match(TokenType.FALSE):
4033            return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev)
4034        return None
4035
4036    def _parse_star(self) -> t.Optional[exp.Expression]:
4037        if self._match(TokenType.STAR):
4038            return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev)
4039        return None
4040
4041    def _parse_parameter(self) -> exp.Expression:
4042        wrapped = self._match(TokenType.L_BRACE)
4043        this = self._parse_var() or self._parse_identifier() or self._parse_primary()
4044        self._match(TokenType.R_BRACE)
4045        return self.expression(exp.Parameter, this=this, wrapped=wrapped)
4046
4047    def _parse_placeholder(self) -> t.Optional[exp.Expression]:
4048        if self._match_set(self.PLACEHOLDER_PARSERS):
4049            placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self)
4050            if placeholder:
4051                return placeholder
4052            self._advance(-1)
4053        return None
4054
4055    def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4056        if not self._match(TokenType.EXCEPT):
4057            return None
4058        if self._match(TokenType.L_PAREN, advance=False):
4059            return self._parse_wrapped_csv(self._parse_column)
4060        return self._parse_csv(self._parse_column)
4061
4062    def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4063        if not self._match(TokenType.REPLACE):
4064            return None
4065        if self._match(TokenType.L_PAREN, advance=False):
4066            return self._parse_wrapped_csv(self._parse_expression)
4067        return self._parse_csv(self._parse_expression)
4068
4069    def _parse_csv(
4070        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
4071    ) -> t.List[t.Optional[exp.Expression]]:
4072        parse_result = parse_method()
4073        items = [parse_result] if parse_result is not None else []
4074
4075        while self._match(sep):
4076            self._add_comments(parse_result)
4077            parse_result = parse_method()
4078            if parse_result is not None:
4079                items.append(parse_result)
4080
4081        return items
4082
4083    def _parse_tokens(
4084        self, parse_method: t.Callable, expressions: t.Dict
4085    ) -> t.Optional[exp.Expression]:
4086        this = parse_method()
4087
4088        while self._match_set(expressions):
4089            this = self.expression(
4090                expressions[self._prev.token_type],
4091                this=this,
4092                comments=self._prev_comments,
4093                expression=parse_method(),
4094            )
4095
4096        return this
4097
4098    def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]:
4099        return self._parse_wrapped_csv(self._parse_id_var, optional=optional)
4100
4101    def _parse_wrapped_csv(
4102        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False
4103    ) -> t.List[t.Optional[exp.Expression]]:
4104        return self._parse_wrapped(
4105            lambda: self._parse_csv(parse_method, sep=sep), optional=optional
4106        )
4107
4108    def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any:
4109        wrapped = self._match(TokenType.L_PAREN)
4110        if not wrapped and not optional:
4111            self.raise_error("Expecting (")
4112        parse_result = parse_method()
4113        if wrapped:
4114            self._match_r_paren()
4115        return parse_result
4116
4117    def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]:
4118        return self._parse_select() or self._parse_set_operations(
4119            self._parse_expression() if alias else self._parse_conjunction()
4120        )
4121
4122    def _parse_ddl_select(self) -> t.Optional[exp.Expression]:
4123        return self._parse_set_operations(
4124            self._parse_select(nested=True, parse_subquery_alias=False)
4125        )
4126
4127    def _parse_transaction(self) -> exp.Expression:
4128        this = None
4129        if self._match_texts(self.TRANSACTION_KIND):
4130            this = self._prev.text
4131
4132        self._match_texts({"TRANSACTION", "WORK"})
4133
4134        modes = []
4135        while True:
4136            mode = []
4137            while self._match(TokenType.VAR):
4138                mode.append(self._prev.text)
4139
4140            if mode:
4141                modes.append(" ".join(mode))
4142            if not self._match(TokenType.COMMA):
4143                break
4144
4145        return self.expression(exp.Transaction, this=this, modes=modes)
4146
4147    def _parse_commit_or_rollback(self) -> exp.Expression:
4148        chain = None
4149        savepoint = None
4150        is_rollback = self._prev.token_type == TokenType.ROLLBACK
4151
4152        self._match_texts({"TRANSACTION", "WORK"})
4153
4154        if self._match_text_seq("TO"):
4155            self._match_text_seq("SAVEPOINT")
4156            savepoint = self._parse_id_var()
4157
4158        if self._match(TokenType.AND):
4159            chain = not self._match_text_seq("NO")
4160            self._match_text_seq("CHAIN")
4161
4162        if is_rollback:
4163            return self.expression(exp.Rollback, savepoint=savepoint)
4164        return self.expression(exp.Commit, chain=chain)
4165
4166    def _parse_add_column(self) -> t.Optional[exp.Expression]:
4167        if not self._match_text_seq("ADD"):
4168            return None
4169
4170        self._match(TokenType.COLUMN)
4171        exists_column = self._parse_exists(not_=True)
4172        expression = self._parse_column_def(self._parse_field(any_token=True))
4173
4174        if expression:
4175            expression.set("exists", exists_column)
4176
4177            # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns
4178            if self._match_texts(("FIRST", "AFTER")):
4179                position = self._prev.text
4180                column_position = self.expression(
4181                    exp.ColumnPosition, this=self._parse_column(), position=position
4182                )
4183                expression.set("position", column_position)
4184
4185        return expression
4186
4187    def _parse_drop_column(self) -> t.Optional[exp.Expression]:
4188        drop = self._match(TokenType.DROP) and self._parse_drop()
4189        if drop and not isinstance(drop, exp.Command):
4190            drop.set("kind", drop.args.get("kind", "COLUMN"))
4191        return drop
4192
4193    # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html
4194    def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression:
4195        return self.expression(
4196            exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists
4197        )
4198
4199    def _parse_add_constraint(self) -> t.Optional[exp.Expression]:
4200        this = None
4201        kind = self._prev.token_type
4202
4203        if kind == TokenType.CONSTRAINT:
4204            this = self._parse_id_var()
4205
4206            if self._match_text_seq("CHECK"):
4207                expression = self._parse_wrapped(self._parse_conjunction)
4208                enforced = self._match_text_seq("ENFORCED")
4209
4210                return self.expression(
4211                    exp.AddConstraint, this=this, expression=expression, enforced=enforced
4212                )
4213
4214        if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY):
4215            expression = self._parse_foreign_key()
4216        elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY):
4217            expression = self._parse_primary_key()
4218        else:
4219            expression = None
4220
4221        return self.expression(exp.AddConstraint, this=this, expression=expression)
4222
4223    def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]:
4224        index = self._index - 1
4225
4226        if self._match_set(self.ADD_CONSTRAINT_TOKENS):
4227            return self._parse_csv(self._parse_add_constraint)
4228
4229        self._retreat(index)
4230        return self._parse_csv(self._parse_add_column)
4231
4232    def _parse_alter_table_alter(self) -> exp.Expression:
4233        self._match(TokenType.COLUMN)
4234        column = self._parse_field(any_token=True)
4235
4236        if self._match_pair(TokenType.DROP, TokenType.DEFAULT):
4237            return self.expression(exp.AlterColumn, this=column, drop=True)
4238        if self._match_pair(TokenType.SET, TokenType.DEFAULT):
4239            return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction())
4240
4241        self._match_text_seq("SET", "DATA")
4242        return self.expression(
4243            exp.AlterColumn,
4244            this=column,
4245            dtype=self._match_text_seq("TYPE") and self._parse_types(),
4246            collate=self._match(TokenType.COLLATE) and self._parse_term(),
4247            using=self._match(TokenType.USING) and self._parse_conjunction(),
4248        )
4249
4250    def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]:
4251        index = self._index - 1
4252
4253        partition_exists = self._parse_exists()
4254        if self._match(TokenType.PARTITION, advance=False):
4255            return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists))
4256
4257        self._retreat(index)
4258        return self._parse_csv(self._parse_drop_column)
4259
4260    def _parse_alter_table_rename(self) -> exp.Expression:
4261        self._match_text_seq("TO")
4262        return self.expression(exp.RenameTable, this=self._parse_table(schema=True))
4263
4264    def _parse_alter(self) -> t.Optional[exp.Expression]:
4265        start = self._prev
4266
4267        if not self._match(TokenType.TABLE):
4268            return self._parse_as_command(start)
4269
4270        exists = self._parse_exists()
4271        this = self._parse_table(schema=True)
4272
4273        if self._next:
4274            self._advance()
4275        parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None
4276
4277        if parser:
4278            actions = ensure_list(parser(self))
4279
4280            if not self._curr:
4281                return self.expression(
4282                    exp.AlterTable,
4283                    this=this,
4284                    exists=exists,
4285                    actions=actions,
4286                )
4287        return self._parse_as_command(start)
4288
4289    def _parse_merge(self) -> exp.Expression:
4290        self._match(TokenType.INTO)
4291        target = self._parse_table()
4292
4293        self._match(TokenType.USING)
4294        using = self._parse_table()
4295
4296        self._match(TokenType.ON)
4297        on = self._parse_conjunction()
4298
4299        whens = []
4300        while self._match(TokenType.WHEN):
4301            matched = not self._match(TokenType.NOT)
4302            self._match_text_seq("MATCHED")
4303            source = (
4304                False
4305                if self._match_text_seq("BY", "TARGET")
4306                else self._match_text_seq("BY", "SOURCE")
4307            )
4308            condition = self._parse_conjunction() if self._match(TokenType.AND) else None
4309
4310            self._match(TokenType.THEN)
4311
4312            if self._match(TokenType.INSERT):
4313                _this = self._parse_star()
4314                if _this:
4315                    then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this)
4316                else:
4317                    then = self.expression(
4318                        exp.Insert,
4319                        this=self._parse_value(),
4320                        expression=self._match(TokenType.VALUES) and self._parse_value(),
4321                    )
4322            elif self._match(TokenType.UPDATE):
4323                expressions = self._parse_star()
4324                if expressions:
4325                    then = self.expression(exp.Update, expressions=expressions)
4326                else:
4327                    then = self.expression(
4328                        exp.Update,
4329                        expressions=self._match(TokenType.SET)
4330                        and self._parse_csv(self._parse_equality),
4331                    )
4332            elif self._match(TokenType.DELETE):
4333                then = self.expression(exp.Var, this=self._prev.text)
4334            else:
4335                then = None
4336
4337            whens.append(
4338                self.expression(
4339                    exp.When,
4340                    matched=matched,
4341                    source=source,
4342                    condition=condition,
4343                    then=then,
4344                )
4345            )
4346
4347        return self.expression(
4348            exp.Merge,
4349            this=target,
4350            using=using,
4351            on=on,
4352            expressions=whens,
4353        )
4354
4355    def _parse_show(self) -> t.Optional[exp.Expression]:
4356        parser = self._find_parser(self.SHOW_PARSERS, self._show_trie)  # type: ignore
4357        if parser:
4358            return parser(self)
4359        self._advance()
4360        return self.expression(exp.Show, this=self._prev.text.upper())
4361
4362    def _parse_set_item_assignment(
4363        self, kind: t.Optional[str] = None
4364    ) -> t.Optional[exp.Expression]:
4365        index = self._index
4366
4367        if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"):
4368            return self._parse_set_transaction(global_=kind == "GLOBAL")
4369
4370        left = self._parse_primary() or self._parse_id_var()
4371
4372        if not self._match_texts(("=", "TO")):
4373            self._retreat(index)
4374            return None
4375
4376        right = self._parse_statement() or self._parse_id_var()
4377        this = self.expression(
4378            exp.EQ,
4379            this=left,
4380            expression=right,
4381        )
4382
4383        return self.expression(
4384            exp.SetItem,
4385            this=this,
4386            kind=kind,
4387        )
4388
4389    def _parse_set_transaction(self, global_: bool = False) -> exp.Expression:
4390        self._match_text_seq("TRANSACTION")
4391        characteristics = self._parse_csv(
4392            lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS)
4393        )
4394        return self.expression(
4395            exp.SetItem,
4396            expressions=characteristics,
4397            kind="TRANSACTION",
4398            **{"global": global_},  # type: ignore
4399        )
4400
4401    def _parse_set_item(self) -> t.Optional[exp.Expression]:
4402        parser = self._find_parser(self.SET_PARSERS, self._set_trie)  # type: ignore
4403        return parser(self) if parser else self._parse_set_item_assignment(kind=None)
4404
4405    def _parse_set(self) -> exp.Expression:
4406        index = self._index
4407        set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item))
4408
4409        if self._curr:
4410            self._retreat(index)
4411            return self._parse_as_command(self._prev)
4412
4413        return set_
4414
4415    def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]:
4416        for option in options:
4417            if self._match_text_seq(*option.split(" ")):
4418                return exp.Var(this=option)
4419        return None
4420
4421    def _parse_as_command(self, start: Token) -> exp.Command:
4422        while self._curr:
4423            self._advance()
4424        text = self._find_sql(start, self._prev)
4425        size = len(start.text)
4426        return exp.Command(this=text[:size], expression=text[size:])
4427
4428    def _find_parser(
4429        self, parsers: t.Dict[str, t.Callable], trie: t.Dict
4430    ) -> t.Optional[t.Callable]:
4431        if not self._curr:
4432            return None
4433
4434        index = self._index
4435        this = []
4436        while True:
4437            # The current token might be multiple words
4438            curr = self._curr.text.upper()
4439            key = curr.split(" ")
4440            this.append(curr)
4441            self._advance()
4442            result, trie = in_trie(trie, key)
4443            if result == 0:
4444                break
4445            if result == 2:
4446                subparser = parsers[" ".join(this)]
4447                return subparser
4448        self._retreat(index)
4449        return None
4450
4451    def _match(self, token_type, advance=True, expression=None):
4452        if not self._curr:
4453            return None
4454
4455        if self._curr.token_type == token_type:
4456            if advance:
4457                self._advance()
4458            self._add_comments(expression)
4459            return True
4460
4461        return None
4462
4463    def _match_set(self, types, advance=True):
4464        if not self._curr:
4465            return None
4466
4467        if self._curr.token_type in types:
4468            if advance:
4469                self._advance()
4470            return True
4471
4472        return None
4473
4474    def _match_pair(self, token_type_a, token_type_b, advance=True):
4475        if not self._curr or not self._next:
4476            return None
4477
4478        if self._curr.token_type == token_type_a and self._next.token_type == token_type_b:
4479            if advance:
4480                self._advance(2)
4481            return True
4482
4483        return None
4484
4485    def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None:
4486        if not self._match(TokenType.L_PAREN, expression=expression):
4487            self.raise_error("Expecting (")
4488
4489    def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None:
4490        if not self._match(TokenType.R_PAREN, expression=expression):
4491            self.raise_error("Expecting )")
4492
4493    def _match_texts(self, texts, advance=True):
4494        if self._curr and self._curr.text.upper() in texts:
4495            if advance:
4496                self._advance()
4497            return True
4498        return False
4499
4500    def _match_text_seq(self, *texts, advance=True):
4501        index = self._index
4502        for text in texts:
4503            if self._curr and self._curr.text.upper() == text:
4504                self._advance()
4505            else:
4506                self._retreat(index)
4507                return False
4508
4509        if not advance:
4510            self._retreat(index)
4511
4512        return True
4513
4514    def _replace_columns_with_dots(
4515        self, this: t.Optional[exp.Expression]
4516    ) -> t.Optional[exp.Expression]:
4517        if isinstance(this, exp.Dot):
4518            exp.replace_children(this, self._replace_columns_with_dots)
4519        elif isinstance(this, exp.Column):
4520            exp.replace_children(this, self._replace_columns_with_dots)
4521            table = this.args.get("table")
4522            this = (
4523                self.expression(exp.Dot, this=table, expression=this.this)
4524                if table
4525                else self.expression(exp.Var, this=this.name)
4526            )
4527        elif isinstance(this, exp.Identifier):
4528            this = self.expression(exp.Var, this=this.name)
4529
4530        return this
4531
4532    def _replace_lambda(
4533        self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str]
4534    ) -> t.Optional[exp.Expression]:
4535        if not node:
4536            return node
4537
4538        for column in node.find_all(exp.Column):
4539            if column.parts[0].name in lambda_variables:
4540                dot_or_id = column.to_dot() if column.table else column.this
4541                parent = column.parent
4542
4543                while isinstance(parent, exp.Dot):
4544                    if not isinstance(parent.parent, exp.Dot):
4545                        parent.replace(dot_or_id)
4546                        break
4547                    parent = parent.parent
4548                else:
4549                    if column is node:
4550                        node = dot_or_id
4551                    else:
4552                        column.replace(dot_or_id)
4553        return node
def parse_var_map(args: List) -> sqlglot.expressions.Expression:
20def parse_var_map(args: t.List) -> exp.Expression:
21    if len(args) == 1 and args[0].is_star:
22        return exp.StarMap(this=args[0])
23
24    keys = []
25    values = []
26    for i in range(0, len(args), 2):
27        keys.append(args[i])
28        values.append(args[i + 1])
29    return exp.VarMap(
30        keys=exp.Array(expressions=keys),
31        values=exp.Array(expressions=values),
32    )
def parse_like(args: List) -> sqlglot.expressions.Expression:
35def parse_like(args: t.List) -> exp.Expression:
36    like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0))
37    return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like
def binary_range_parser( expr_type: Type[sqlglot.expressions.Expression]) -> Callable[[sqlglot.parser.Parser, Optional[sqlglot.expressions.Expression]], Optional[sqlglot.expressions.Expression]]:
40def binary_range_parser(
41    expr_type: t.Type[exp.Expression],
42) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]:
43    return lambda self, this: self._parse_escape(
44        self.expression(expr_type, this=this, expression=self._parse_bitwise())
45    )
class Parser:
  57class Parser(metaclass=_Parser):
  58    """
  59    Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces
  60    a parsed syntax tree.
  61
  62    Args:
  63        error_level: the desired error level.
  64            Default: ErrorLevel.RAISE
  65        error_message_context: determines the amount of context to capture from a
  66            query string when displaying the error message (in number of characters).
  67            Default: 50.
  68        index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list.
  69            Default: 0
  70        alias_post_tablesample: If the table alias comes after tablesample.
  71            Default: False
  72        max_errors: Maximum number of error messages to include in a raised ParseError.
  73            This is only relevant if error_level is ErrorLevel.RAISE.
  74            Default: 3
  75        null_ordering: Indicates the default null ordering method to use if not explicitly set.
  76            Options are "nulls_are_small", "nulls_are_large", "nulls_are_last".
  77            Default: "nulls_are_small"
  78    """
  79
  80    FUNCTIONS: t.Dict[str, t.Callable] = {
  81        **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()},
  82        "DATE_TO_DATE_STR": lambda args: exp.Cast(
  83            this=seq_get(args, 0),
  84            to=exp.DataType(this=exp.DataType.Type.TEXT),
  85        ),
  86        "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)),
  87        "IFNULL": exp.Coalesce.from_arg_list,
  88        "LIKE": parse_like,
  89        "TIME_TO_TIME_STR": lambda args: exp.Cast(
  90            this=seq_get(args, 0),
  91            to=exp.DataType(this=exp.DataType.Type.TEXT),
  92        ),
  93        "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
  94            this=exp.Cast(
  95                this=seq_get(args, 0),
  96                to=exp.DataType(this=exp.DataType.Type.TEXT),
  97            ),
  98            start=exp.Literal.number(1),
  99            length=exp.Literal.number(10),
 100        ),
 101        "VAR_MAP": parse_var_map,
 102    }
 103
 104    NO_PAREN_FUNCTIONS = {
 105        TokenType.CURRENT_DATE: exp.CurrentDate,
 106        TokenType.CURRENT_DATETIME: exp.CurrentDate,
 107        TokenType.CURRENT_TIME: exp.CurrentTime,
 108        TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
 109        TokenType.CURRENT_USER: exp.CurrentUser,
 110    }
 111
 112    JOIN_HINTS: t.Set[str] = set()
 113
 114    NESTED_TYPE_TOKENS = {
 115        TokenType.ARRAY,
 116        TokenType.MAP,
 117        TokenType.NULLABLE,
 118        TokenType.STRUCT,
 119    }
 120
 121    TYPE_TOKENS = {
 122        TokenType.BIT,
 123        TokenType.BOOLEAN,
 124        TokenType.TINYINT,
 125        TokenType.UTINYINT,
 126        TokenType.SMALLINT,
 127        TokenType.USMALLINT,
 128        TokenType.INT,
 129        TokenType.UINT,
 130        TokenType.BIGINT,
 131        TokenType.UBIGINT,
 132        TokenType.INT128,
 133        TokenType.UINT128,
 134        TokenType.INT256,
 135        TokenType.UINT256,
 136        TokenType.FLOAT,
 137        TokenType.DOUBLE,
 138        TokenType.CHAR,
 139        TokenType.NCHAR,
 140        TokenType.VARCHAR,
 141        TokenType.NVARCHAR,
 142        TokenType.TEXT,
 143        TokenType.MEDIUMTEXT,
 144        TokenType.LONGTEXT,
 145        TokenType.MEDIUMBLOB,
 146        TokenType.LONGBLOB,
 147        TokenType.BINARY,
 148        TokenType.VARBINARY,
 149        TokenType.JSON,
 150        TokenType.JSONB,
 151        TokenType.INTERVAL,
 152        TokenType.TIME,
 153        TokenType.TIMESTAMP,
 154        TokenType.TIMESTAMPTZ,
 155        TokenType.TIMESTAMPLTZ,
 156        TokenType.DATETIME,
 157        TokenType.DATETIME64,
 158        TokenType.DATE,
 159        TokenType.DECIMAL,
 160        TokenType.BIGDECIMAL,
 161        TokenType.UUID,
 162        TokenType.GEOGRAPHY,
 163        TokenType.GEOMETRY,
 164        TokenType.HLLSKETCH,
 165        TokenType.HSTORE,
 166        TokenType.PSEUDO_TYPE,
 167        TokenType.SUPER,
 168        TokenType.SERIAL,
 169        TokenType.SMALLSERIAL,
 170        TokenType.BIGSERIAL,
 171        TokenType.XML,
 172        TokenType.UNIQUEIDENTIFIER,
 173        TokenType.MONEY,
 174        TokenType.SMALLMONEY,
 175        TokenType.ROWVERSION,
 176        TokenType.IMAGE,
 177        TokenType.VARIANT,
 178        TokenType.OBJECT,
 179        TokenType.INET,
 180        *NESTED_TYPE_TOKENS,
 181    }
 182
 183    SUBQUERY_PREDICATES = {
 184        TokenType.ANY: exp.Any,
 185        TokenType.ALL: exp.All,
 186        TokenType.EXISTS: exp.Exists,
 187        TokenType.SOME: exp.Any,
 188    }
 189
 190    RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT}
 191
 192    DB_CREATABLES = {
 193        TokenType.DATABASE,
 194        TokenType.SCHEMA,
 195        TokenType.TABLE,
 196        TokenType.VIEW,
 197    }
 198
 199    CREATABLES = {
 200        TokenType.COLUMN,
 201        TokenType.FUNCTION,
 202        TokenType.INDEX,
 203        TokenType.PROCEDURE,
 204        *DB_CREATABLES,
 205    }
 206
 207    ID_VAR_TOKENS = {
 208        TokenType.VAR,
 209        TokenType.ANTI,
 210        TokenType.APPLY,
 211        TokenType.ASC,
 212        TokenType.AUTO_INCREMENT,
 213        TokenType.BEGIN,
 214        TokenType.CACHE,
 215        TokenType.COLLATE,
 216        TokenType.COMMAND,
 217        TokenType.COMMENT,
 218        TokenType.COMMIT,
 219        TokenType.CONSTRAINT,
 220        TokenType.DEFAULT,
 221        TokenType.DELETE,
 222        TokenType.DESC,
 223        TokenType.DESCRIBE,
 224        TokenType.DIV,
 225        TokenType.END,
 226        TokenType.EXECUTE,
 227        TokenType.ESCAPE,
 228        TokenType.FALSE,
 229        TokenType.FIRST,
 230        TokenType.FILTER,
 231        TokenType.FORMAT,
 232        TokenType.FULL,
 233        TokenType.IF,
 234        TokenType.IS,
 235        TokenType.ISNULL,
 236        TokenType.INTERVAL,
 237        TokenType.KEEP,
 238        TokenType.LEFT,
 239        TokenType.LOAD,
 240        TokenType.MERGE,
 241        TokenType.NATURAL,
 242        TokenType.NEXT,
 243        TokenType.OFFSET,
 244        TokenType.ORDINALITY,
 245        TokenType.OVERWRITE,
 246        TokenType.PARTITION,
 247        TokenType.PERCENT,
 248        TokenType.PIVOT,
 249        TokenType.PRAGMA,
 250        TokenType.RANGE,
 251        TokenType.REFERENCES,
 252        TokenType.RIGHT,
 253        TokenType.ROW,
 254        TokenType.ROWS,
 255        TokenType.SEMI,
 256        TokenType.SET,
 257        TokenType.SETTINGS,
 258        TokenType.SHOW,
 259        TokenType.TEMPORARY,
 260        TokenType.TOP,
 261        TokenType.TRUE,
 262        TokenType.UNIQUE,
 263        TokenType.UNPIVOT,
 264        TokenType.VOLATILE,
 265        TokenType.WINDOW,
 266        *CREATABLES,
 267        *SUBQUERY_PREDICATES,
 268        *TYPE_TOKENS,
 269        *NO_PAREN_FUNCTIONS,
 270    }
 271
 272    INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END}
 273
 274    TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {
 275        TokenType.APPLY,
 276        TokenType.FULL,
 277        TokenType.LEFT,
 278        TokenType.LOCK,
 279        TokenType.NATURAL,
 280        TokenType.OFFSET,
 281        TokenType.RIGHT,
 282        TokenType.WINDOW,
 283    }
 284
 285    COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS}
 286
 287    UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
 288
 289    TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"}
 290
 291    FUNC_TOKENS = {
 292        TokenType.COMMAND,
 293        TokenType.CURRENT_DATE,
 294        TokenType.CURRENT_DATETIME,
 295        TokenType.CURRENT_TIMESTAMP,
 296        TokenType.CURRENT_TIME,
 297        TokenType.CURRENT_USER,
 298        TokenType.FILTER,
 299        TokenType.FIRST,
 300        TokenType.FORMAT,
 301        TokenType.GLOB,
 302        TokenType.IDENTIFIER,
 303        TokenType.INDEX,
 304        TokenType.ISNULL,
 305        TokenType.ILIKE,
 306        TokenType.LIKE,
 307        TokenType.MERGE,
 308        TokenType.OFFSET,
 309        TokenType.PRIMARY_KEY,
 310        TokenType.RANGE,
 311        TokenType.REPLACE,
 312        TokenType.ROW,
 313        TokenType.UNNEST,
 314        TokenType.VAR,
 315        TokenType.LEFT,
 316        TokenType.RIGHT,
 317        TokenType.DATE,
 318        TokenType.DATETIME,
 319        TokenType.TABLE,
 320        TokenType.TIMESTAMP,
 321        TokenType.TIMESTAMPTZ,
 322        TokenType.WINDOW,
 323        *TYPE_TOKENS,
 324        *SUBQUERY_PREDICATES,
 325    }
 326
 327    CONJUNCTION = {
 328        TokenType.AND: exp.And,
 329        TokenType.OR: exp.Or,
 330    }
 331
 332    EQUALITY = {
 333        TokenType.EQ: exp.EQ,
 334        TokenType.NEQ: exp.NEQ,
 335        TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
 336    }
 337
 338    COMPARISON = {
 339        TokenType.GT: exp.GT,
 340        TokenType.GTE: exp.GTE,
 341        TokenType.LT: exp.LT,
 342        TokenType.LTE: exp.LTE,
 343    }
 344
 345    BITWISE = {
 346        TokenType.AMP: exp.BitwiseAnd,
 347        TokenType.CARET: exp.BitwiseXor,
 348        TokenType.PIPE: exp.BitwiseOr,
 349        TokenType.DPIPE: exp.DPipe,
 350    }
 351
 352    TERM = {
 353        TokenType.DASH: exp.Sub,
 354        TokenType.PLUS: exp.Add,
 355        TokenType.MOD: exp.Mod,
 356        TokenType.COLLATE: exp.Collate,
 357    }
 358
 359    FACTOR = {
 360        TokenType.DIV: exp.IntDiv,
 361        TokenType.LR_ARROW: exp.Distance,
 362        TokenType.SLASH: exp.Div,
 363        TokenType.STAR: exp.Mul,
 364    }
 365
 366    TIMESTAMPS = {
 367        TokenType.TIME,
 368        TokenType.TIMESTAMP,
 369        TokenType.TIMESTAMPTZ,
 370        TokenType.TIMESTAMPLTZ,
 371    }
 372
 373    SET_OPERATIONS = {
 374        TokenType.UNION,
 375        TokenType.INTERSECT,
 376        TokenType.EXCEPT,
 377    }
 378
 379    JOIN_SIDES = {
 380        TokenType.LEFT,
 381        TokenType.RIGHT,
 382        TokenType.FULL,
 383    }
 384
 385    JOIN_KINDS = {
 386        TokenType.INNER,
 387        TokenType.OUTER,
 388        TokenType.CROSS,
 389        TokenType.SEMI,
 390        TokenType.ANTI,
 391    }
 392
 393    LAMBDAS = {
 394        TokenType.ARROW: lambda self, expressions: self.expression(
 395            exp.Lambda,
 396            this=self._replace_lambda(
 397                self._parse_conjunction(),
 398                {node.name for node in expressions},
 399            ),
 400            expressions=expressions,
 401        ),
 402        TokenType.FARROW: lambda self, expressions: self.expression(
 403            exp.Kwarg,
 404            this=exp.Var(this=expressions[0].name),
 405            expression=self._parse_conjunction(),
 406        ),
 407    }
 408
 409    COLUMN_OPERATORS = {
 410        TokenType.DOT: None,
 411        TokenType.DCOLON: lambda self, this, to: self.expression(
 412            exp.Cast if self.STRICT_CAST else exp.TryCast,
 413            this=this,
 414            to=to,
 415        ),
 416        TokenType.ARROW: lambda self, this, path: self.expression(
 417            exp.JSONExtract,
 418            this=this,
 419            expression=path,
 420        ),
 421        TokenType.DARROW: lambda self, this, path: self.expression(
 422            exp.JSONExtractScalar,
 423            this=this,
 424            expression=path,
 425        ),
 426        TokenType.HASH_ARROW: lambda self, this, path: self.expression(
 427            exp.JSONBExtract,
 428            this=this,
 429            expression=path,
 430        ),
 431        TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
 432            exp.JSONBExtractScalar,
 433            this=this,
 434            expression=path,
 435        ),
 436        TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
 437            exp.JSONBContains,
 438            this=this,
 439            expression=key,
 440        ),
 441    }
 442
 443    EXPRESSION_PARSERS = {
 444        exp.Column: lambda self: self._parse_column(),
 445        exp.DataType: lambda self: self._parse_types(),
 446        exp.From: lambda self: self._parse_from(),
 447        exp.Group: lambda self: self._parse_group(),
 448        exp.Identifier: lambda self: self._parse_id_var(),
 449        exp.Lateral: lambda self: self._parse_lateral(),
 450        exp.Join: lambda self: self._parse_join(),
 451        exp.Order: lambda self: self._parse_order(),
 452        exp.Cluster: lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"),
 453        exp.Sort: lambda self: self._parse_sort(exp.Sort, "SORT", "BY"),
 454        exp.Lambda: lambda self: self._parse_lambda(),
 455        exp.Limit: lambda self: self._parse_limit(),
 456        exp.Offset: lambda self: self._parse_offset(),
 457        exp.TableAlias: lambda self: self._parse_table_alias(),
 458        exp.Table: lambda self: self._parse_table_parts(),
 459        exp.Condition: lambda self: self._parse_conjunction(),
 460        exp.Expression: lambda self: self._parse_statement(),
 461        exp.Properties: lambda self: self._parse_properties(),
 462        exp.Where: lambda self: self._parse_where(),
 463        exp.Ordered: lambda self: self._parse_ordered(),
 464        exp.Having: lambda self: self._parse_having(),
 465        exp.With: lambda self: self._parse_with(),
 466        exp.Window: lambda self: self._parse_named_window(),
 467        exp.Qualify: lambda self: self._parse_qualify(),
 468        exp.Returning: lambda self: self._parse_returning(),
 469        "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(),
 470    }
 471
 472    STATEMENT_PARSERS = {
 473        TokenType.ALTER: lambda self: self._parse_alter(),
 474        TokenType.BEGIN: lambda self: self._parse_transaction(),
 475        TokenType.CACHE: lambda self: self._parse_cache(),
 476        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
 477        TokenType.COMMENT: lambda self: self._parse_comment(),
 478        TokenType.CREATE: lambda self: self._parse_create(),
 479        TokenType.DELETE: lambda self: self._parse_delete(),
 480        TokenType.DESC: lambda self: self._parse_describe(),
 481        TokenType.DESCRIBE: lambda self: self._parse_describe(),
 482        TokenType.DROP: lambda self: self._parse_drop(),
 483        TokenType.END: lambda self: self._parse_commit_or_rollback(),
 484        TokenType.INSERT: lambda self: self._parse_insert(),
 485        TokenType.LOAD: lambda self: self._parse_load(),
 486        TokenType.MERGE: lambda self: self._parse_merge(),
 487        TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()),
 488        TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
 489        TokenType.SET: lambda self: self._parse_set(),
 490        TokenType.UNCACHE: lambda self: self._parse_uncache(),
 491        TokenType.UPDATE: lambda self: self._parse_update(),
 492        TokenType.USE: lambda self: self.expression(
 493            exp.Use,
 494            kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"))
 495            and exp.Var(this=self._prev.text),
 496            this=self._parse_table(schema=False),
 497        ),
 498    }
 499
 500    UNARY_PARSERS = {
 501        TokenType.PLUS: lambda self: self._parse_unary(),  # Unary + is handled as a no-op
 502        TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()),
 503        TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()),
 504        TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()),
 505    }
 506
 507    PRIMARY_PARSERS = {
 508        TokenType.STRING: lambda self, token: self.expression(
 509            exp.Literal, this=token.text, is_string=True
 510        ),
 511        TokenType.NUMBER: lambda self, token: self.expression(
 512            exp.Literal, this=token.text, is_string=False
 513        ),
 514        TokenType.STAR: lambda self, _: self.expression(
 515            exp.Star,
 516            **{"except": self._parse_except(), "replace": self._parse_replace()},
 517        ),
 518        TokenType.NULL: lambda self, _: self.expression(exp.Null),
 519        TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True),
 520        TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False),
 521        TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text),
 522        TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text),
 523        TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text),
 524        TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
 525        TokenType.NATIONAL_STRING: lambda self, token: self.expression(
 526            exp.National, this=token.text
 527        ),
 528        TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text),
 529        TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
 530    }
 531
 532    PLACEHOLDER_PARSERS = {
 533        TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
 534        TokenType.PARAMETER: lambda self: self._parse_parameter(),
 535        TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text)
 536        if self._match_set((TokenType.NUMBER, TokenType.VAR))
 537        else None,
 538    }
 539
 540    RANGE_PARSERS = {
 541        TokenType.BETWEEN: lambda self, this: self._parse_between(this),
 542        TokenType.GLOB: binary_range_parser(exp.Glob),
 543        TokenType.ILIKE: binary_range_parser(exp.ILike),
 544        TokenType.IN: lambda self, this: self._parse_in(this),
 545        TokenType.IRLIKE: binary_range_parser(exp.RegexpILike),
 546        TokenType.IS: lambda self, this: self._parse_is(this),
 547        TokenType.LIKE: binary_range_parser(exp.Like),
 548        TokenType.OVERLAPS: binary_range_parser(exp.Overlaps),
 549        TokenType.RLIKE: binary_range_parser(exp.RegexpLike),
 550        TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo),
 551    }
 552
 553    PROPERTY_PARSERS: t.Dict[str, t.Callable] = {
 554        "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
 555        "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
 556        "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
 557        "CHARACTER SET": lambda self: self._parse_character_set(),
 558        "CHECKSUM": lambda self: self._parse_checksum(),
 559        "CLUSTER": lambda self: self._parse_cluster(),
 560        "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty),
 561        "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
 562        "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs),
 563        "DEFINER": lambda self: self._parse_definer(),
 564        "DETERMINISTIC": lambda self: self.expression(
 565            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 566        ),
 567        "DISTKEY": lambda self: self._parse_distkey(),
 568        "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
 569        "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty),
 570        "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
 571        "EXTERNAL": lambda self: self.expression(exp.ExternalProperty),
 572        "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs),
 573        "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 574        "FREESPACE": lambda self: self._parse_freespace(),
 575        "IMMUTABLE": lambda self: self.expression(
 576            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 577        ),
 578        "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs),
 579        "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
 580        "LIKE": lambda self: self._parse_create_like(),
 581        "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
 582        "LOCK": lambda self: self._parse_locking(),
 583        "LOCKING": lambda self: self._parse_locking(),
 584        "LOG": lambda self, **kwargs: self._parse_log(**kwargs),
 585        "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty),
 586        "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs),
 587        "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True),
 588        "NO": lambda self: self._parse_no_property(),
 589        "ON": lambda self: self._parse_on_property(),
 590        "ORDER BY": lambda self: self._parse_order(skip_order_token=True),
 591        "PARTITION BY": lambda self: self._parse_partitioned_by(),
 592        "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
 593        "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
 594        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 595        "RETURNS": lambda self: self._parse_returns(),
 596        "ROW": lambda self: self._parse_row(),
 597        "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty),
 598        "SET": lambda self: self.expression(exp.SetProperty, multi=False),
 599        "SETTINGS": lambda self: self.expression(
 600            exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item)
 601        ),
 602        "SORTKEY": lambda self: self._parse_sortkey(),
 603        "STABLE": lambda self: self.expression(
 604            exp.StabilityProperty, this=exp.Literal.string("STABLE")
 605        ),
 606        "STORED": lambda self: self._parse_stored(),
 607        "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property),
 608        "TEMP": lambda self: self.expression(exp.TemporaryProperty),
 609        "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty),
 610        "TRANSIENT": lambda self: self.expression(exp.TransientProperty),
 611        "TTL": lambda self: self._parse_ttl(),
 612        "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 613        "VOLATILE": lambda self: self._parse_volatile_property(),
 614        "WITH": lambda self: self._parse_with_property(),
 615    }
 616
 617    CONSTRAINT_PARSERS = {
 618        "AUTOINCREMENT": lambda self: self._parse_auto_increment(),
 619        "AUTO_INCREMENT": lambda self: self._parse_auto_increment(),
 620        "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False),
 621        "CHARACTER SET": lambda self: self.expression(
 622            exp.CharacterSetColumnConstraint, this=self._parse_var_or_string()
 623        ),
 624        "CHECK": lambda self: self.expression(
 625            exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction)
 626        ),
 627        "COLLATE": lambda self: self.expression(
 628            exp.CollateColumnConstraint, this=self._parse_var()
 629        ),
 630        "COMMENT": lambda self: self.expression(
 631            exp.CommentColumnConstraint, this=self._parse_string()
 632        ),
 633        "COMPRESS": lambda self: self._parse_compress(),
 634        "DEFAULT": lambda self: self.expression(
 635            exp.DefaultColumnConstraint, this=self._parse_bitwise()
 636        ),
 637        "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()),
 638        "FOREIGN KEY": lambda self: self._parse_foreign_key(),
 639        "FORMAT": lambda self: self.expression(
 640            exp.DateFormatColumnConstraint, this=self._parse_var_or_string()
 641        ),
 642        "GENERATED": lambda self: self._parse_generated_as_identity(),
 643        "IDENTITY": lambda self: self._parse_auto_increment(),
 644        "INLINE": lambda self: self._parse_inline(),
 645        "LIKE": lambda self: self._parse_create_like(),
 646        "NOT": lambda self: self._parse_not_constraint(),
 647        "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True),
 648        "ON": lambda self: self._match(TokenType.UPDATE)
 649        and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()),
 650        "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()),
 651        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 652        "REFERENCES": lambda self: self._parse_references(match=False),
 653        "TITLE": lambda self: self.expression(
 654            exp.TitleColumnConstraint, this=self._parse_var_or_string()
 655        ),
 656        "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]),
 657        "UNIQUE": lambda self: self._parse_unique(),
 658        "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint),
 659    }
 660
 661    ALTER_PARSERS = {
 662        "ADD": lambda self: self._parse_alter_table_add(),
 663        "ALTER": lambda self: self._parse_alter_table_alter(),
 664        "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()),
 665        "DROP": lambda self: self._parse_alter_table_drop(),
 666        "RENAME": lambda self: self._parse_alter_table_rename(),
 667    }
 668
 669    SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"}
 670
 671    NO_PAREN_FUNCTION_PARSERS = {
 672        TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()),
 673        TokenType.CASE: lambda self: self._parse_case(),
 674        TokenType.IF: lambda self: self._parse_if(),
 675        TokenType.NEXT_VALUE_FOR: lambda self: self.expression(
 676            exp.NextValueFor,
 677            this=self._parse_column(),
 678            order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order),
 679        ),
 680    }
 681
 682    FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"}
 683
 684    FUNCTION_PARSERS: t.Dict[str, t.Callable] = {
 685        "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
 686        "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
 687        "DECODE": lambda self: self._parse_decode(),
 688        "EXTRACT": lambda self: self._parse_extract(),
 689        "JSON_OBJECT": lambda self: self._parse_json_object(),
 690        "LOG": lambda self: self._parse_logarithm(),
 691        "MATCH": lambda self: self._parse_match_against(),
 692        "OPENJSON": lambda self: self._parse_open_json(),
 693        "POSITION": lambda self: self._parse_position(),
 694        "SAFE_CAST": lambda self: self._parse_cast(False),
 695        "STRING_AGG": lambda self: self._parse_string_agg(),
 696        "SUBSTRING": lambda self: self._parse_substring(),
 697        "TRIM": lambda self: self._parse_trim(),
 698        "TRY_CAST": lambda self: self._parse_cast(False),
 699        "TRY_CONVERT": lambda self: self._parse_convert(False),
 700    }
 701
 702    QUERY_MODIFIER_PARSERS = {
 703        "joins": lambda self: list(iter(self._parse_join, None)),
 704        "laterals": lambda self: list(iter(self._parse_lateral, None)),
 705        "match": lambda self: self._parse_match_recognize(),
 706        "where": lambda self: self._parse_where(),
 707        "group": lambda self: self._parse_group(),
 708        "having": lambda self: self._parse_having(),
 709        "qualify": lambda self: self._parse_qualify(),
 710        "windows": lambda self: self._parse_window_clause(),
 711        "order": lambda self: self._parse_order(),
 712        "limit": lambda self: self._parse_limit(),
 713        "offset": lambda self: self._parse_offset(),
 714        "locks": lambda self: self._parse_locks(),
 715        "sample": lambda self: self._parse_table_sample(as_modifier=True),
 716    }
 717
 718    SET_PARSERS = {
 719        "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"),
 720        "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"),
 721        "SESSION": lambda self: self._parse_set_item_assignment("SESSION"),
 722        "TRANSACTION": lambda self: self._parse_set_transaction(),
 723    }
 724
 725    SHOW_PARSERS: t.Dict[str, t.Callable] = {}
 726
 727    TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {}
 728
 729    MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
 730
 731    TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
 732
 733    TRANSACTION_CHARACTERISTICS = {
 734        "ISOLATION LEVEL REPEATABLE READ",
 735        "ISOLATION LEVEL READ COMMITTED",
 736        "ISOLATION LEVEL READ UNCOMMITTED",
 737        "ISOLATION LEVEL SERIALIZABLE",
 738        "READ WRITE",
 739        "READ ONLY",
 740    }
 741
 742    INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
 743
 744    CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"}
 745
 746    WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
 747    WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER}
 748
 749    ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY}
 750
 751    STRICT_CAST = True
 752
 753    CONVERT_TYPE_FIRST = False
 754
 755    PREFIXED_PIVOT_COLUMNS = False
 756    IDENTIFY_PIVOT_STRINGS = False
 757
 758    LOG_BASE_FIRST = True
 759    LOG_DEFAULTS_TO_LN = False
 760
 761    __slots__ = (
 762        "error_level",
 763        "error_message_context",
 764        "sql",
 765        "errors",
 766        "index_offset",
 767        "unnest_column_only",
 768        "alias_post_tablesample",
 769        "max_errors",
 770        "null_ordering",
 771        "_tokens",
 772        "_index",
 773        "_curr",
 774        "_next",
 775        "_prev",
 776        "_prev_comments",
 777        "_show_trie",
 778        "_set_trie",
 779    )
 780
 781    def __init__(
 782        self,
 783        error_level: t.Optional[ErrorLevel] = None,
 784        error_message_context: int = 100,
 785        index_offset: int = 0,
 786        unnest_column_only: bool = False,
 787        alias_post_tablesample: bool = False,
 788        max_errors: int = 3,
 789        null_ordering: t.Optional[str] = None,
 790    ):
 791        self.error_level = error_level or ErrorLevel.IMMEDIATE
 792        self.error_message_context = error_message_context
 793        self.index_offset = index_offset
 794        self.unnest_column_only = unnest_column_only
 795        self.alias_post_tablesample = alias_post_tablesample
 796        self.max_errors = max_errors
 797        self.null_ordering = null_ordering
 798        self.reset()
 799
 800    def reset(self):
 801        self.sql = ""
 802        self.errors = []
 803        self._tokens = []
 804        self._index = 0
 805        self._curr = None
 806        self._next = None
 807        self._prev = None
 808        self._prev_comments = None
 809
 810    def parse(
 811        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
 812    ) -> t.List[t.Optional[exp.Expression]]:
 813        """
 814        Parses a list of tokens and returns a list of syntax trees, one tree
 815        per parsed SQL statement.
 816
 817        Args:
 818            raw_tokens: the list of tokens.
 819            sql: the original SQL string, used to produce helpful debug messages.
 820
 821        Returns:
 822            The list of syntax trees.
 823        """
 824        return self._parse(
 825            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
 826        )
 827
 828    def parse_into(
 829        self,
 830        expression_types: exp.IntoType,
 831        raw_tokens: t.List[Token],
 832        sql: t.Optional[str] = None,
 833    ) -> t.List[t.Optional[exp.Expression]]:
 834        """
 835        Parses a list of tokens into a given Expression type. If a collection of Expression
 836        types is given instead, this method will try to parse the token list into each one
 837        of them, stopping at the first for which the parsing succeeds.
 838
 839        Args:
 840            expression_types: the expression type(s) to try and parse the token list into.
 841            raw_tokens: the list of tokens.
 842            sql: the original SQL string, used to produce helpful debug messages.
 843
 844        Returns:
 845            The target Expression.
 846        """
 847        errors = []
 848        for expression_type in ensure_collection(expression_types):
 849            parser = self.EXPRESSION_PARSERS.get(expression_type)
 850            if not parser:
 851                raise TypeError(f"No parser registered for {expression_type}")
 852            try:
 853                return self._parse(parser, raw_tokens, sql)
 854            except ParseError as e:
 855                e.errors[0]["into_expression"] = expression_type
 856                errors.append(e)
 857        raise ParseError(
 858            f"Failed to parse into {expression_types}",
 859            errors=merge_errors(errors),
 860        ) from errors[-1]
 861
 862    def _parse(
 863        self,
 864        parse_method: t.Callable[[Parser], t.Optional[exp.Expression]],
 865        raw_tokens: t.List[Token],
 866        sql: t.Optional[str] = None,
 867    ) -> t.List[t.Optional[exp.Expression]]:
 868        self.reset()
 869        self.sql = sql or ""
 870        total = len(raw_tokens)
 871        chunks: t.List[t.List[Token]] = [[]]
 872
 873        for i, token in enumerate(raw_tokens):
 874            if token.token_type == TokenType.SEMICOLON:
 875                if i < total - 1:
 876                    chunks.append([])
 877            else:
 878                chunks[-1].append(token)
 879
 880        expressions = []
 881
 882        for tokens in chunks:
 883            self._index = -1
 884            self._tokens = tokens
 885            self._advance()
 886
 887            expressions.append(parse_method(self))
 888
 889            if self._index < len(self._tokens):
 890                self.raise_error("Invalid expression / Unexpected token")
 891
 892            self.check_errors()
 893
 894        return expressions
 895
 896    def check_errors(self) -> None:
 897        """
 898        Logs or raises any found errors, depending on the chosen error level setting.
 899        """
 900        if self.error_level == ErrorLevel.WARN:
 901            for error in self.errors:
 902                logger.error(str(error))
 903        elif self.error_level == ErrorLevel.RAISE and self.errors:
 904            raise ParseError(
 905                concat_messages(self.errors, self.max_errors),
 906                errors=merge_errors(self.errors),
 907            )
 908
 909    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
 910        """
 911        Appends an error in the list of recorded errors or raises it, depending on the chosen
 912        error level setting.
 913        """
 914        token = token or self._curr or self._prev or Token.string("")
 915        start = token.start
 916        end = token.end + 1
 917        start_context = self.sql[max(start - self.error_message_context, 0) : start]
 918        highlight = self.sql[start:end]
 919        end_context = self.sql[end : end + self.error_message_context]
 920
 921        error = ParseError.new(
 922            f"{message}. Line {token.line}, Col: {token.col}.\n"
 923            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
 924            description=message,
 925            line=token.line,
 926            col=token.col,
 927            start_context=start_context,
 928            highlight=highlight,
 929            end_context=end_context,
 930        )
 931
 932        if self.error_level == ErrorLevel.IMMEDIATE:
 933            raise error
 934
 935        self.errors.append(error)
 936
 937    def expression(
 938        self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs
 939    ) -> E:
 940        """
 941        Creates a new, validated Expression.
 942
 943        Args:
 944            exp_class: the expression class to instantiate.
 945            comments: an optional list of comments to attach to the expression.
 946            kwargs: the arguments to set for the expression along with their respective values.
 947
 948        Returns:
 949            The target expression.
 950        """
 951        instance = exp_class(**kwargs)
 952        instance.add_comments(comments) if comments else self._add_comments(instance)
 953        self.validate_expression(instance)
 954        return instance
 955
 956    def _add_comments(self, expression: t.Optional[exp.Expression]) -> None:
 957        if expression and self._prev_comments:
 958            expression.add_comments(self._prev_comments)
 959            self._prev_comments = None
 960
 961    def validate_expression(
 962        self, expression: exp.Expression, args: t.Optional[t.List] = None
 963    ) -> None:
 964        """
 965        Validates an already instantiated expression, making sure that all its mandatory arguments
 966        are set.
 967
 968        Args:
 969            expression: the expression to validate.
 970            args: an optional list of items that was used to instantiate the expression, if it's a Func.
 971        """
 972        if self.error_level == ErrorLevel.IGNORE:
 973            return
 974
 975        for error_message in expression.error_messages(args):
 976            self.raise_error(error_message)
 977
 978    def _find_sql(self, start: Token, end: Token) -> str:
 979        return self.sql[start.start : end.end + 1]
 980
 981    def _advance(self, times: int = 1) -> None:
 982        self._index += times
 983        self._curr = seq_get(self._tokens, self._index)
 984        self._next = seq_get(self._tokens, self._index + 1)
 985        if self._index > 0:
 986            self._prev = self._tokens[self._index - 1]
 987            self._prev_comments = self._prev.comments
 988        else:
 989            self._prev = None
 990            self._prev_comments = None
 991
 992    def _retreat(self, index: int) -> None:
 993        if index != self._index:
 994            self._advance(index - self._index)
 995
 996    def _parse_command(self) -> exp.Command:
 997        return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string())
 998
 999    def _parse_comment(self, allow_exists: bool = True) -> exp.Expression:
1000        start = self._prev
1001        exists = self._parse_exists() if allow_exists else None
1002
1003        self._match(TokenType.ON)
1004
1005        kind = self._match_set(self.CREATABLES) and self._prev
1006
1007        if not kind:
1008            return self._parse_as_command(start)
1009
1010        if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1011            this = self._parse_user_defined_function(kind=kind.token_type)
1012        elif kind.token_type == TokenType.TABLE:
1013            this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS)
1014        elif kind.token_type == TokenType.COLUMN:
1015            this = self._parse_column()
1016        else:
1017            this = self._parse_id_var()
1018
1019        self._match(TokenType.IS)
1020
1021        return self.expression(
1022            exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists
1023        )
1024
1025    # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl
1026    def _parse_ttl(self) -> exp.Expression:
1027        def _parse_ttl_action() -> t.Optional[exp.Expression]:
1028            this = self._parse_bitwise()
1029
1030            if self._match_text_seq("DELETE"):
1031                return self.expression(exp.MergeTreeTTLAction, this=this, delete=True)
1032            if self._match_text_seq("RECOMPRESS"):
1033                return self.expression(
1034                    exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise()
1035                )
1036            if self._match_text_seq("TO", "DISK"):
1037                return self.expression(
1038                    exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string()
1039                )
1040            if self._match_text_seq("TO", "VOLUME"):
1041                return self.expression(
1042                    exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string()
1043                )
1044
1045            return this
1046
1047        expressions = self._parse_csv(_parse_ttl_action)
1048        where = self._parse_where()
1049        group = self._parse_group()
1050
1051        aggregates = None
1052        if group and self._match(TokenType.SET):
1053            aggregates = self._parse_csv(self._parse_set_item)
1054
1055        return self.expression(
1056            exp.MergeTreeTTL,
1057            expressions=expressions,
1058            where=where,
1059            group=group,
1060            aggregates=aggregates,
1061        )
1062
1063    def _parse_statement(self) -> t.Optional[exp.Expression]:
1064        if self._curr is None:
1065            return None
1066
1067        if self._match_set(self.STATEMENT_PARSERS):
1068            return self.STATEMENT_PARSERS[self._prev.token_type](self)
1069
1070        if self._match_set(Tokenizer.COMMANDS):
1071            return self._parse_command()
1072
1073        expression = self._parse_expression()
1074        expression = self._parse_set_operations(expression) if expression else self._parse_select()
1075        return self._parse_query_modifiers(expression)
1076
1077    def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]:
1078        start = self._prev
1079        temporary = self._match(TokenType.TEMPORARY)
1080        materialized = self._match_text_seq("MATERIALIZED")
1081        kind = self._match_set(self.CREATABLES) and self._prev.text
1082        if not kind:
1083            return self._parse_as_command(start)
1084
1085        return self.expression(
1086            exp.Drop,
1087            exists=self._parse_exists(),
1088            this=self._parse_table(schema=True),
1089            kind=kind,
1090            temporary=temporary,
1091            materialized=materialized,
1092            cascade=self._match_text_seq("CASCADE"),
1093            constraints=self._match_text_seq("CONSTRAINTS"),
1094            purge=self._match_text_seq("PURGE"),
1095        )
1096
1097    def _parse_exists(self, not_: bool = False) -> t.Optional[bool]:
1098        return (
1099            self._match(TokenType.IF)
1100            and (not not_ or self._match(TokenType.NOT))
1101            and self._match(TokenType.EXISTS)
1102        )
1103
1104    def _parse_create(self) -> t.Optional[exp.Expression]:
1105        start = self._prev
1106        replace = self._prev.text.upper() == "REPLACE" or self._match_pair(
1107            TokenType.OR, TokenType.REPLACE
1108        )
1109        unique = self._match(TokenType.UNIQUE)
1110
1111        if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
1112            self._match(TokenType.TABLE)
1113
1114        properties = None
1115        create_token = self._match_set(self.CREATABLES) and self._prev
1116
1117        if not create_token:
1118            properties = self._parse_properties()  # exp.Properties.Location.POST_CREATE
1119            create_token = self._match_set(self.CREATABLES) and self._prev
1120
1121            if not properties or not create_token:
1122                return self._parse_as_command(start)
1123
1124        exists = self._parse_exists(not_=True)
1125        this = None
1126        expression = None
1127        indexes = None
1128        no_schema_binding = None
1129        begin = None
1130        clone = None
1131
1132        if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1133            this = self._parse_user_defined_function(kind=create_token.token_type)
1134            temp_properties = self._parse_properties()
1135            if properties and temp_properties:
1136                properties.expressions.extend(temp_properties.expressions)
1137            elif temp_properties:
1138                properties = temp_properties
1139
1140            self._match(TokenType.ALIAS)
1141            begin = self._match(TokenType.BEGIN)
1142            return_ = self._match_text_seq("RETURN")
1143            expression = self._parse_statement()
1144
1145            if return_:
1146                expression = self.expression(exp.Return, this=expression)
1147        elif create_token.token_type == TokenType.INDEX:
1148            this = self._parse_index(index=self._parse_id_var())
1149        elif create_token.token_type in self.DB_CREATABLES:
1150            table_parts = self._parse_table_parts(schema=True)
1151
1152            # exp.Properties.Location.POST_NAME
1153            if self._match(TokenType.COMMA):
1154                temp_properties = self._parse_properties(before=True)
1155                if properties and temp_properties:
1156                    properties.expressions.extend(temp_properties.expressions)
1157                elif temp_properties:
1158                    properties = temp_properties
1159
1160            this = self._parse_schema(this=table_parts)
1161
1162            # exp.Properties.Location.POST_SCHEMA and POST_WITH
1163            temp_properties = self._parse_properties()
1164            if properties and temp_properties:
1165                properties.expressions.extend(temp_properties.expressions)
1166            elif temp_properties:
1167                properties = temp_properties
1168
1169            self._match(TokenType.ALIAS)
1170
1171            # exp.Properties.Location.POST_ALIAS
1172            if not (
1173                self._match(TokenType.SELECT, advance=False)
1174                or self._match(TokenType.WITH, advance=False)
1175                or self._match(TokenType.L_PAREN, advance=False)
1176            ):
1177                temp_properties = self._parse_properties()
1178                if properties and temp_properties:
1179                    properties.expressions.extend(temp_properties.expressions)
1180                elif temp_properties:
1181                    properties = temp_properties
1182
1183            expression = self._parse_ddl_select()
1184
1185            if create_token.token_type == TokenType.TABLE:
1186                indexes = []
1187                while True:
1188                    index = self._parse_index()
1189
1190                    # exp.Properties.Location.POST_EXPRESSION or exp.Properties.Location.POST_INDEX
1191                    temp_properties = self._parse_properties()
1192                    if properties and temp_properties:
1193                        properties.expressions.extend(temp_properties.expressions)
1194                    elif temp_properties:
1195                        properties = temp_properties
1196
1197                    if not index:
1198                        break
1199                    else:
1200                        self._match(TokenType.COMMA)
1201                        indexes.append(index)
1202            elif create_token.token_type == TokenType.VIEW:
1203                if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
1204                    no_schema_binding = True
1205
1206            if self._match_text_seq("CLONE"):
1207                clone = self._parse_table(schema=True)
1208                when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper()
1209                clone_kind = (
1210                    self._match(TokenType.L_PAREN)
1211                    and self._match_texts(self.CLONE_KINDS)
1212                    and self._prev.text.upper()
1213                )
1214                clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise()
1215                self._match(TokenType.R_PAREN)
1216                clone = self.expression(
1217                    exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression
1218                )
1219
1220        return self.expression(
1221            exp.Create,
1222            this=this,
1223            kind=create_token.text,
1224            replace=replace,
1225            unique=unique,
1226            expression=expression,
1227            exists=exists,
1228            properties=properties,
1229            indexes=indexes,
1230            no_schema_binding=no_schema_binding,
1231            begin=begin,
1232            clone=clone,
1233        )
1234
1235    def _parse_property_before(self) -> t.Optional[exp.Expression]:
1236        # only used for teradata currently
1237        self._match(TokenType.COMMA)
1238
1239        kwargs = {
1240            "no": self._match_text_seq("NO"),
1241            "dual": self._match_text_seq("DUAL"),
1242            "before": self._match_text_seq("BEFORE"),
1243            "default": self._match_text_seq("DEFAULT"),
1244            "local": (self._match_text_seq("LOCAL") and "LOCAL")
1245            or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"),
1246            "after": self._match_text_seq("AFTER"),
1247            "minimum": self._match_texts(("MIN", "MINIMUM")),
1248            "maximum": self._match_texts(("MAX", "MAXIMUM")),
1249        }
1250
1251        if self._match_texts(self.PROPERTY_PARSERS):
1252            parser = self.PROPERTY_PARSERS[self._prev.text.upper()]
1253            try:
1254                return parser(self, **{k: v for k, v in kwargs.items() if v})
1255            except TypeError:
1256                self.raise_error(f"Cannot parse property '{self._prev.text}'")
1257
1258        return None
1259
1260    def _parse_property(self) -> t.Optional[exp.Expression]:
1261        if self._match_texts(self.PROPERTY_PARSERS):
1262            return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
1263
1264        if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET):
1265            return self._parse_character_set(default=True)
1266
1267        if self._match_text_seq("COMPOUND", "SORTKEY"):
1268            return self._parse_sortkey(compound=True)
1269
1270        if self._match_text_seq("SQL", "SECURITY"):
1271            return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER"))
1272
1273        assignment = self._match_pair(
1274            TokenType.VAR, TokenType.EQ, advance=False
1275        ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False)
1276
1277        if assignment:
1278            key = self._parse_var_or_string()
1279            self._match(TokenType.EQ)
1280            return self.expression(exp.Property, this=key, value=self._parse_column())
1281
1282        return None
1283
1284    def _parse_stored(self) -> exp.Expression:
1285        self._match(TokenType.ALIAS)
1286
1287        input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None
1288        output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None
1289
1290        return self.expression(
1291            exp.FileFormatProperty,
1292            this=self.expression(
1293                exp.InputOutputFormat, input_format=input_format, output_format=output_format
1294            )
1295            if input_format or output_format
1296            else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
1297        )
1298
1299    def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression:
1300        self._match(TokenType.EQ)
1301        self._match(TokenType.ALIAS)
1302        return self.expression(exp_class, this=self._parse_field())
1303
1304    def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Expression]:
1305        properties = []
1306
1307        while True:
1308            if before:
1309                prop = self._parse_property_before()
1310            else:
1311                prop = self._parse_property()
1312
1313            if not prop:
1314                break
1315            for p in ensure_list(prop):
1316                properties.append(p)
1317
1318        if properties:
1319            return self.expression(exp.Properties, expressions=properties)
1320
1321        return None
1322
1323    def _parse_fallback(self, no: bool = False) -> exp.Expression:
1324        return self.expression(
1325            exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
1326        )
1327
1328    def _parse_volatile_property(self) -> exp.Expression:
1329        if self._index >= 2:
1330            pre_volatile_token = self._tokens[self._index - 2]
1331        else:
1332            pre_volatile_token = None
1333
1334        if pre_volatile_token and pre_volatile_token.token_type in (
1335            TokenType.CREATE,
1336            TokenType.REPLACE,
1337            TokenType.UNIQUE,
1338        ):
1339            return exp.VolatileProperty()
1340
1341        return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE"))
1342
1343    def _parse_with_property(
1344        self,
1345    ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]:
1346        self._match(TokenType.WITH)
1347        if self._match(TokenType.L_PAREN, advance=False):
1348            return self._parse_wrapped_csv(self._parse_property)
1349
1350        if self._match_text_seq("JOURNAL"):
1351            return self._parse_withjournaltable()
1352
1353        if self._match_text_seq("DATA"):
1354            return self._parse_withdata(no=False)
1355        elif self._match_text_seq("NO", "DATA"):
1356            return self._parse_withdata(no=True)
1357
1358        if not self._next:
1359            return None
1360
1361        return self._parse_withisolatedloading()
1362
1363    # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
1364    def _parse_definer(self) -> t.Optional[exp.Expression]:
1365        self._match(TokenType.EQ)
1366
1367        user = self._parse_id_var()
1368        self._match(TokenType.PARAMETER)
1369        host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
1370
1371        if not user or not host:
1372            return None
1373
1374        return exp.DefinerProperty(this=f"{user}@{host}")
1375
1376    def _parse_withjournaltable(self) -> exp.Expression:
1377        self._match(TokenType.TABLE)
1378        self._match(TokenType.EQ)
1379        return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
1380
1381    def _parse_log(self, no: bool = False) -> exp.Expression:
1382        return self.expression(exp.LogProperty, no=no)
1383
1384    def _parse_journal(self, **kwargs) -> exp.Expression:
1385        return self.expression(exp.JournalProperty, **kwargs)
1386
1387    def _parse_checksum(self) -> exp.Expression:
1388        self._match(TokenType.EQ)
1389
1390        on = None
1391        if self._match(TokenType.ON):
1392            on = True
1393        elif self._match_text_seq("OFF"):
1394            on = False
1395        default = self._match(TokenType.DEFAULT)
1396
1397        return self.expression(
1398            exp.ChecksumProperty,
1399            on=on,
1400            default=default,
1401        )
1402
1403    def _parse_cluster(self) -> t.Optional[exp.Expression]:
1404        if not self._match_text_seq("BY"):
1405            self._retreat(self._index - 1)
1406            return None
1407        return self.expression(
1408            exp.Cluster,
1409            expressions=self._parse_csv(self._parse_ordered),
1410        )
1411
1412    def _parse_freespace(self) -> exp.Expression:
1413        self._match(TokenType.EQ)
1414        return self.expression(
1415            exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
1416        )
1417
1418    def _parse_mergeblockratio(self, no: bool = False, default: bool = False) -> exp.Expression:
1419        if self._match(TokenType.EQ):
1420            return self.expression(
1421                exp.MergeBlockRatioProperty,
1422                this=self._parse_number(),
1423                percent=self._match(TokenType.PERCENT),
1424            )
1425        return self.expression(
1426            exp.MergeBlockRatioProperty,
1427            no=no,
1428            default=default,
1429        )
1430
1431    def _parse_datablocksize(
1432        self,
1433        default: t.Optional[bool] = None,
1434        minimum: t.Optional[bool] = None,
1435        maximum: t.Optional[bool] = None,
1436    ) -> exp.Expression:
1437        self._match(TokenType.EQ)
1438        size = self._parse_number()
1439        units = None
1440        if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
1441            units = self._prev.text
1442        return self.expression(
1443            exp.DataBlocksizeProperty,
1444            size=size,
1445            units=units,
1446            default=default,
1447            minimum=minimum,
1448            maximum=maximum,
1449        )
1450
1451    def _parse_blockcompression(self) -> exp.Expression:
1452        self._match(TokenType.EQ)
1453        always = self._match_text_seq("ALWAYS")
1454        manual = self._match_text_seq("MANUAL")
1455        never = self._match_text_seq("NEVER")
1456        default = self._match_text_seq("DEFAULT")
1457        autotemp = None
1458        if self._match_text_seq("AUTOTEMP"):
1459            autotemp = self._parse_schema()
1460
1461        return self.expression(
1462            exp.BlockCompressionProperty,
1463            always=always,
1464            manual=manual,
1465            never=never,
1466            default=default,
1467            autotemp=autotemp,
1468        )
1469
1470    def _parse_withisolatedloading(self) -> exp.Expression:
1471        no = self._match_text_seq("NO")
1472        concurrent = self._match_text_seq("CONCURRENT")
1473        self._match_text_seq("ISOLATED", "LOADING")
1474        for_all = self._match_text_seq("FOR", "ALL")
1475        for_insert = self._match_text_seq("FOR", "INSERT")
1476        for_none = self._match_text_seq("FOR", "NONE")
1477        return self.expression(
1478            exp.IsolatedLoadingProperty,
1479            no=no,
1480            concurrent=concurrent,
1481            for_all=for_all,
1482            for_insert=for_insert,
1483            for_none=for_none,
1484        )
1485
1486    def _parse_locking(self) -> exp.Expression:
1487        if self._match(TokenType.TABLE):
1488            kind = "TABLE"
1489        elif self._match(TokenType.VIEW):
1490            kind = "VIEW"
1491        elif self._match(TokenType.ROW):
1492            kind = "ROW"
1493        elif self._match_text_seq("DATABASE"):
1494            kind = "DATABASE"
1495        else:
1496            kind = None
1497
1498        if kind in ("DATABASE", "TABLE", "VIEW"):
1499            this = self._parse_table_parts()
1500        else:
1501            this = None
1502
1503        if self._match(TokenType.FOR):
1504            for_or_in = "FOR"
1505        elif self._match(TokenType.IN):
1506            for_or_in = "IN"
1507        else:
1508            for_or_in = None
1509
1510        if self._match_text_seq("ACCESS"):
1511            lock_type = "ACCESS"
1512        elif self._match_texts(("EXCL", "EXCLUSIVE")):
1513            lock_type = "EXCLUSIVE"
1514        elif self._match_text_seq("SHARE"):
1515            lock_type = "SHARE"
1516        elif self._match_text_seq("READ"):
1517            lock_type = "READ"
1518        elif self._match_text_seq("WRITE"):
1519            lock_type = "WRITE"
1520        elif self._match_text_seq("CHECKSUM"):
1521            lock_type = "CHECKSUM"
1522        else:
1523            lock_type = None
1524
1525        override = self._match_text_seq("OVERRIDE")
1526
1527        return self.expression(
1528            exp.LockingProperty,
1529            this=this,
1530            kind=kind,
1531            for_or_in=for_or_in,
1532            lock_type=lock_type,
1533            override=override,
1534        )
1535
1536    def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]:
1537        if self._match(TokenType.PARTITION_BY):
1538            return self._parse_csv(self._parse_conjunction)
1539        return []
1540
1541    def _parse_partitioned_by(self) -> exp.Expression:
1542        self._match(TokenType.EQ)
1543        return self.expression(
1544            exp.PartitionedByProperty,
1545            this=self._parse_schema() or self._parse_bracket(self._parse_field()),
1546        )
1547
1548    def _parse_withdata(self, no: bool = False) -> exp.Expression:
1549        if self._match_text_seq("AND", "STATISTICS"):
1550            statistics = True
1551        elif self._match_text_seq("AND", "NO", "STATISTICS"):
1552            statistics = False
1553        else:
1554            statistics = None
1555
1556        return self.expression(exp.WithDataProperty, no=no, statistics=statistics)
1557
1558    def _parse_no_property(self) -> t.Optional[exp.Property]:
1559        if self._match_text_seq("PRIMARY", "INDEX"):
1560            return exp.NoPrimaryIndexProperty()
1561        return None
1562
1563    def _parse_on_property(self) -> t.Optional[exp.Property]:
1564        if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"):
1565            return exp.OnCommitProperty()
1566        elif self._match_text_seq("COMMIT", "DELETE", "ROWS"):
1567            return exp.OnCommitProperty(delete=True)
1568        return None
1569
1570    def _parse_distkey(self) -> exp.Expression:
1571        return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
1572
1573    def _parse_create_like(self) -> t.Optional[exp.Expression]:
1574        table = self._parse_table(schema=True)
1575        options = []
1576        while self._match_texts(("INCLUDING", "EXCLUDING")):
1577            this = self._prev.text.upper()
1578            id_var = self._parse_id_var()
1579
1580            if not id_var:
1581                return None
1582
1583            options.append(
1584                self.expression(
1585                    exp.Property,
1586                    this=this,
1587                    value=exp.Var(this=id_var.this.upper()),
1588                )
1589            )
1590        return self.expression(exp.LikeProperty, this=table, expressions=options)
1591
1592    def _parse_sortkey(self, compound: bool = False) -> exp.Expression:
1593        return self.expression(
1594            exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound
1595        )
1596
1597    def _parse_character_set(self, default: bool = False) -> exp.Expression:
1598        self._match(TokenType.EQ)
1599        return self.expression(
1600            exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default
1601        )
1602
1603    def _parse_returns(self) -> exp.Expression:
1604        value: t.Optional[exp.Expression]
1605        is_table = self._match(TokenType.TABLE)
1606
1607        if is_table:
1608            if self._match(TokenType.LT):
1609                value = self.expression(
1610                    exp.Schema,
1611                    this="TABLE",
1612                    expressions=self._parse_csv(self._parse_struct_types),
1613                )
1614                if not self._match(TokenType.GT):
1615                    self.raise_error("Expecting >")
1616            else:
1617                value = self._parse_schema(exp.Var(this="TABLE"))
1618        else:
1619            value = self._parse_types()
1620
1621        return self.expression(exp.ReturnsProperty, this=value, is_table=is_table)
1622
1623    def _parse_describe(self) -> exp.Expression:
1624        kind = self._match_set(self.CREATABLES) and self._prev.text
1625        this = self._parse_table()
1626
1627        return self.expression(exp.Describe, this=this, kind=kind)
1628
1629    def _parse_insert(self) -> exp.Expression:
1630        overwrite = self._match(TokenType.OVERWRITE)
1631        local = self._match_text_seq("LOCAL")
1632        alternative = None
1633
1634        if self._match_text_seq("DIRECTORY"):
1635            this: t.Optional[exp.Expression] = self.expression(
1636                exp.Directory,
1637                this=self._parse_var_or_string(),
1638                local=local,
1639                row_format=self._parse_row_format(match_row=True),
1640            )
1641        else:
1642            if self._match(TokenType.OR):
1643                alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text
1644
1645            self._match(TokenType.INTO)
1646            self._match(TokenType.TABLE)
1647            this = self._parse_table(schema=True)
1648
1649        return self.expression(
1650            exp.Insert,
1651            this=this,
1652            exists=self._parse_exists(),
1653            partition=self._parse_partition(),
1654            expression=self._parse_ddl_select(),
1655            conflict=self._parse_on_conflict(),
1656            returning=self._parse_returning(),
1657            overwrite=overwrite,
1658            alternative=alternative,
1659        )
1660
1661    def _parse_on_conflict(self) -> t.Optional[exp.Expression]:
1662        conflict = self._match_text_seq("ON", "CONFLICT")
1663        duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY")
1664
1665        if not (conflict or duplicate):
1666            return None
1667
1668        nothing = None
1669        expressions = None
1670        key = None
1671        constraint = None
1672
1673        if conflict:
1674            if self._match_text_seq("ON", "CONSTRAINT"):
1675                constraint = self._parse_id_var()
1676            else:
1677                key = self._parse_csv(self._parse_value)
1678
1679        self._match_text_seq("DO")
1680        if self._match_text_seq("NOTHING"):
1681            nothing = True
1682        else:
1683            self._match(TokenType.UPDATE)
1684            expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality)
1685
1686        return self.expression(
1687            exp.OnConflict,
1688            duplicate=duplicate,
1689            expressions=expressions,
1690            nothing=nothing,
1691            key=key,
1692            constraint=constraint,
1693        )
1694
1695    def _parse_returning(self) -> t.Optional[exp.Expression]:
1696        if not self._match(TokenType.RETURNING):
1697            return None
1698
1699        return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column))
1700
1701    def _parse_row(self) -> t.Optional[exp.Expression]:
1702        if not self._match(TokenType.FORMAT):
1703            return None
1704        return self._parse_row_format()
1705
1706    def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]:
1707        if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
1708            return None
1709
1710        if self._match_text_seq("SERDE"):
1711            return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string())
1712
1713        self._match_text_seq("DELIMITED")
1714
1715        kwargs = {}
1716
1717        if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
1718            kwargs["fields"] = self._parse_string()
1719            if self._match_text_seq("ESCAPED", "BY"):
1720                kwargs["escaped"] = self._parse_string()
1721        if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
1722            kwargs["collection_items"] = self._parse_string()
1723        if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
1724            kwargs["map_keys"] = self._parse_string()
1725        if self._match_text_seq("LINES", "TERMINATED", "BY"):
1726            kwargs["lines"] = self._parse_string()
1727        if self._match_text_seq("NULL", "DEFINED", "AS"):
1728            kwargs["null"] = self._parse_string()
1729
1730        return self.expression(exp.RowFormatDelimitedProperty, **kwargs)  # type: ignore
1731
1732    def _parse_load(self) -> exp.Expression:
1733        if self._match_text_seq("DATA"):
1734            local = self._match_text_seq("LOCAL")
1735            self._match_text_seq("INPATH")
1736            inpath = self._parse_string()
1737            overwrite = self._match(TokenType.OVERWRITE)
1738            self._match_pair(TokenType.INTO, TokenType.TABLE)
1739
1740            return self.expression(
1741                exp.LoadData,
1742                this=self._parse_table(schema=True),
1743                local=local,
1744                overwrite=overwrite,
1745                inpath=inpath,
1746                partition=self._parse_partition(),
1747                input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
1748                serde=self._match_text_seq("SERDE") and self._parse_string(),
1749            )
1750        return self._parse_as_command(self._prev)
1751
1752    def _parse_delete(self) -> exp.Expression:
1753        self._match(TokenType.FROM)
1754
1755        return self.expression(
1756            exp.Delete,
1757            this=self._parse_table(),
1758            using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()),
1759            where=self._parse_where(),
1760            returning=self._parse_returning(),
1761        )
1762
1763    def _parse_update(self) -> exp.Expression:
1764        return self.expression(
1765            exp.Update,
1766            **{  # type: ignore
1767                "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS),
1768                "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality),
1769                "from": self._parse_from(modifiers=True),
1770                "where": self._parse_where(),
1771                "returning": self._parse_returning(),
1772            },
1773        )
1774
1775    def _parse_uncache(self) -> exp.Expression:
1776        if not self._match(TokenType.TABLE):
1777            self.raise_error("Expecting TABLE after UNCACHE")
1778
1779        return self.expression(
1780            exp.Uncache,
1781            exists=self._parse_exists(),
1782            this=self._parse_table(schema=True),
1783        )
1784
1785    def _parse_cache(self) -> exp.Expression:
1786        lazy = self._match_text_seq("LAZY")
1787        self._match(TokenType.TABLE)
1788        table = self._parse_table(schema=True)
1789        options = []
1790
1791        if self._match_text_seq("OPTIONS"):
1792            self._match_l_paren()
1793            k = self._parse_string()
1794            self._match(TokenType.EQ)
1795            v = self._parse_string()
1796            options = [k, v]
1797            self._match_r_paren()
1798
1799        self._match(TokenType.ALIAS)
1800        return self.expression(
1801            exp.Cache,
1802            this=table,
1803            lazy=lazy,
1804            options=options,
1805            expression=self._parse_select(nested=True),
1806        )
1807
1808    def _parse_partition(self) -> t.Optional[exp.Expression]:
1809        if not self._match(TokenType.PARTITION):
1810            return None
1811
1812        return self.expression(
1813            exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction)
1814        )
1815
1816    def _parse_value(self) -> exp.Expression:
1817        if self._match(TokenType.L_PAREN):
1818            expressions = self._parse_csv(self._parse_conjunction)
1819            self._match_r_paren()
1820            return self.expression(exp.Tuple, expressions=expressions)
1821
1822        # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows.
1823        # Source: https://prestodb.io/docs/current/sql/values.html
1824        return self.expression(exp.Tuple, expressions=[self._parse_conjunction()])
1825
1826    def _parse_select(
1827        self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True
1828    ) -> t.Optional[exp.Expression]:
1829        cte = self._parse_with()
1830        if cte:
1831            this = self._parse_statement()
1832
1833            if not this:
1834                self.raise_error("Failed to parse any statement following CTE")
1835                return cte
1836
1837            if "with" in this.arg_types:
1838                this.set("with", cte)
1839            else:
1840                self.raise_error(f"{this.key} does not support CTE")
1841                this = cte
1842        elif self._match(TokenType.SELECT):
1843            comments = self._prev_comments
1844
1845            hint = self._parse_hint()
1846            all_ = self._match(TokenType.ALL)
1847            distinct = self._match(TokenType.DISTINCT)
1848
1849            kind = (
1850                self._match(TokenType.ALIAS)
1851                and self._match_texts(("STRUCT", "VALUE"))
1852                and self._prev.text
1853            )
1854
1855            if distinct:
1856                distinct = self.expression(
1857                    exp.Distinct,
1858                    on=self._parse_value() if self._match(TokenType.ON) else None,
1859                )
1860
1861            if all_ and distinct:
1862                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
1863
1864            limit = self._parse_limit(top=True)
1865            expressions = self._parse_csv(self._parse_expression)
1866
1867            this = self.expression(
1868                exp.Select,
1869                kind=kind,
1870                hint=hint,
1871                distinct=distinct,
1872                expressions=expressions,
1873                limit=limit,
1874            )
1875            this.comments = comments
1876
1877            into = self._parse_into()
1878            if into:
1879                this.set("into", into)
1880
1881            from_ = self._parse_from()
1882            if from_:
1883                this.set("from", from_)
1884
1885            this = self._parse_query_modifiers(this)
1886        elif (table or nested) and self._match(TokenType.L_PAREN):
1887            this = self._parse_table() if table else self._parse_select(nested=True)
1888            this = self._parse_set_operations(self._parse_query_modifiers(this))
1889            self._match_r_paren()
1890
1891            # early return so that subquery unions aren't parsed again
1892            # SELECT * FROM (SELECT 1) UNION ALL SELECT 1
1893            # Union ALL should be a property of the top select node, not the subquery
1894            return self._parse_subquery(this, parse_alias=parse_subquery_alias)
1895        elif self._match(TokenType.VALUES):
1896            this = self.expression(
1897                exp.Values,
1898                expressions=self._parse_csv(self._parse_value),
1899                alias=self._parse_table_alias(),
1900            )
1901        else:
1902            this = None
1903
1904        return self._parse_set_operations(this)
1905
1906    def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]:
1907        if not skip_with_token and not self._match(TokenType.WITH):
1908            return None
1909
1910        comments = self._prev_comments
1911        recursive = self._match(TokenType.RECURSIVE)
1912
1913        expressions = []
1914        while True:
1915            expressions.append(self._parse_cte())
1916
1917            if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
1918                break
1919            else:
1920                self._match(TokenType.WITH)
1921
1922        return self.expression(
1923            exp.With, comments=comments, expressions=expressions, recursive=recursive
1924        )
1925
1926    def _parse_cte(self) -> exp.Expression:
1927        alias = self._parse_table_alias()
1928        if not alias or not alias.this:
1929            self.raise_error("Expected CTE to have alias")
1930
1931        self._match(TokenType.ALIAS)
1932
1933        return self.expression(
1934            exp.CTE,
1935            this=self._parse_wrapped(self._parse_statement),
1936            alias=alias,
1937        )
1938
1939    def _parse_table_alias(
1940        self, alias_tokens: t.Optional[t.Collection[TokenType]] = None
1941    ) -> t.Optional[exp.Expression]:
1942        any_token = self._match(TokenType.ALIAS)
1943        alias = (
1944            self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
1945            or self._parse_string_as_identifier()
1946        )
1947
1948        index = self._index
1949        if self._match(TokenType.L_PAREN):
1950            columns = self._parse_csv(self._parse_function_parameter)
1951            self._match_r_paren() if columns else self._retreat(index)
1952        else:
1953            columns = None
1954
1955        if not alias and not columns:
1956            return None
1957
1958        return self.expression(exp.TableAlias, this=alias, columns=columns)
1959
1960    def _parse_subquery(
1961        self, this: t.Optional[exp.Expression], parse_alias: bool = True
1962    ) -> exp.Expression:
1963        return self.expression(
1964            exp.Subquery,
1965            this=this,
1966            pivots=self._parse_pivots(),
1967            alias=self._parse_table_alias() if parse_alias else None,
1968        )
1969
1970    def _parse_query_modifiers(
1971        self, this: t.Optional[exp.Expression]
1972    ) -> t.Optional[exp.Expression]:
1973        if isinstance(this, self.MODIFIABLES):
1974            for key, parser in self.QUERY_MODIFIER_PARSERS.items():
1975                expression = parser(self)
1976
1977                if expression:
1978                    this.set(key, expression)
1979        return this
1980
1981    def _parse_hint(self) -> t.Optional[exp.Expression]:
1982        if self._match(TokenType.HINT):
1983            hints = self._parse_csv(self._parse_function)
1984            if not self._match_pair(TokenType.STAR, TokenType.SLASH):
1985                self.raise_error("Expected */ after HINT")
1986            return self.expression(exp.Hint, expressions=hints)
1987
1988        return None
1989
1990    def _parse_into(self) -> t.Optional[exp.Expression]:
1991        if not self._match(TokenType.INTO):
1992            return None
1993
1994        temp = self._match(TokenType.TEMPORARY)
1995        unlogged = self._match_text_seq("UNLOGGED")
1996        self._match(TokenType.TABLE)
1997
1998        return self.expression(
1999            exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged
2000        )
2001
2002    def _parse_from(self, modifiers: bool = False) -> t.Optional[exp.Expression]:
2003        if not self._match(TokenType.FROM):
2004            return None
2005
2006        comments = self._prev_comments
2007        this = self._parse_table()
2008
2009        return self.expression(
2010            exp.From,
2011            comments=comments,
2012            this=self._parse_query_modifiers(this) if modifiers else this,
2013        )
2014
2015    def _parse_match_recognize(self) -> t.Optional[exp.Expression]:
2016        if not self._match(TokenType.MATCH_RECOGNIZE):
2017            return None
2018
2019        self._match_l_paren()
2020
2021        partition = self._parse_partition_by()
2022        order = self._parse_order()
2023        measures = (
2024            self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None
2025        )
2026
2027        if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
2028            rows = exp.Var(this="ONE ROW PER MATCH")
2029        elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
2030            text = "ALL ROWS PER MATCH"
2031            if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
2032                text += f" SHOW EMPTY MATCHES"
2033            elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
2034                text += f" OMIT EMPTY MATCHES"
2035            elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
2036                text += f" WITH UNMATCHED ROWS"
2037            rows = exp.Var(this=text)
2038        else:
2039            rows = None
2040
2041        if self._match_text_seq("AFTER", "MATCH", "SKIP"):
2042            text = "AFTER MATCH SKIP"
2043            if self._match_text_seq("PAST", "LAST", "ROW"):
2044                text += f" PAST LAST ROW"
2045            elif self._match_text_seq("TO", "NEXT", "ROW"):
2046                text += f" TO NEXT ROW"
2047            elif self._match_text_seq("TO", "FIRST"):
2048                text += f" TO FIRST {self._advance_any().text}"  # type: ignore
2049            elif self._match_text_seq("TO", "LAST"):
2050                text += f" TO LAST {self._advance_any().text}"  # type: ignore
2051            after = exp.Var(this=text)
2052        else:
2053            after = None
2054
2055        if self._match_text_seq("PATTERN"):
2056            self._match_l_paren()
2057
2058            if not self._curr:
2059                self.raise_error("Expecting )", self._curr)
2060
2061            paren = 1
2062            start = self._curr
2063
2064            while self._curr and paren > 0:
2065                if self._curr.token_type == TokenType.L_PAREN:
2066                    paren += 1
2067                if self._curr.token_type == TokenType.R_PAREN:
2068                    paren -= 1
2069                end = self._prev
2070                self._advance()
2071            if paren > 0:
2072                self.raise_error("Expecting )", self._curr)
2073            pattern = exp.Var(this=self._find_sql(start, end))
2074        else:
2075            pattern = None
2076
2077        define = (
2078            self._parse_csv(
2079                lambda: self.expression(
2080                    exp.Alias,
2081                    alias=self._parse_id_var(any_token=True),
2082                    this=self._match(TokenType.ALIAS) and self._parse_conjunction(),
2083                )
2084            )
2085            if self._match_text_seq("DEFINE")
2086            else None
2087        )
2088
2089        self._match_r_paren()
2090
2091        return self.expression(
2092            exp.MatchRecognize,
2093            partition_by=partition,
2094            order=order,
2095            measures=measures,
2096            rows=rows,
2097            after=after,
2098            pattern=pattern,
2099            define=define,
2100            alias=self._parse_table_alias(),
2101        )
2102
2103    def _parse_lateral(self) -> t.Optional[exp.Expression]:
2104        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY)
2105        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
2106
2107        if outer_apply or cross_apply:
2108            this = self._parse_select(table=True)
2109            view = None
2110            outer = not cross_apply
2111        elif self._match(TokenType.LATERAL):
2112            this = self._parse_select(table=True)
2113            view = self._match(TokenType.VIEW)
2114            outer = self._match(TokenType.OUTER)
2115        else:
2116            return None
2117
2118        if not this:
2119            this = self._parse_function() or self._parse_id_var(any_token=False)
2120            while self._match(TokenType.DOT):
2121                this = exp.Dot(
2122                    this=this,
2123                    expression=self._parse_function() or self._parse_id_var(any_token=False),
2124                )
2125
2126        table_alias: t.Optional[exp.Expression]
2127
2128        if view:
2129            table = self._parse_id_var(any_token=False)
2130            columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
2131            table_alias = self.expression(exp.TableAlias, this=table, columns=columns)
2132        else:
2133            table_alias = self._parse_table_alias()
2134
2135        expression = self.expression(
2136            exp.Lateral,
2137            this=this,
2138            view=view,
2139            outer=outer,
2140            alias=table_alias,
2141        )
2142
2143        return expression
2144
2145    def _parse_join_side_and_kind(
2146        self,
2147    ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
2148        return (
2149            self._match(TokenType.NATURAL) and self._prev,
2150            self._match_set(self.JOIN_SIDES) and self._prev,
2151            self._match_set(self.JOIN_KINDS) and self._prev,
2152        )
2153
2154    def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]:
2155        if self._match(TokenType.COMMA):
2156            return self.expression(exp.Join, this=self._parse_table())
2157
2158        index = self._index
2159        natural, side, kind = self._parse_join_side_and_kind()
2160        hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None
2161        join = self._match(TokenType.JOIN)
2162
2163        if not skip_join_token and not join:
2164            self._retreat(index)
2165            kind = None
2166            natural = None
2167            side = None
2168
2169        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False)
2170        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False)
2171
2172        if not skip_join_token and not join and not outer_apply and not cross_apply:
2173            return None
2174
2175        if outer_apply:
2176            side = Token(TokenType.LEFT, "LEFT")
2177
2178        kwargs: t.Dict[
2179            str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]]
2180        ] = {"this": self._parse_table()}
2181
2182        if natural:
2183            kwargs["natural"] = True
2184        if side:
2185            kwargs["side"] = side.text
2186        if kind:
2187            kwargs["kind"] = kind.text
2188        if hint:
2189            kwargs["hint"] = hint
2190
2191        if self._match(TokenType.ON):
2192            kwargs["on"] = self._parse_conjunction()
2193        elif self._match(TokenType.USING):
2194            kwargs["using"] = self._parse_wrapped_id_vars()
2195
2196        return self.expression(exp.Join, **kwargs)  # type: ignore
2197
2198    def _parse_index(
2199        self,
2200        index: t.Optional[exp.Expression] = None,
2201    ) -> t.Optional[exp.Expression]:
2202        if index:
2203            unique = None
2204            primary = None
2205            amp = None
2206
2207            self._match(TokenType.ON)
2208            self._match(TokenType.TABLE)  # hive
2209            table = self._parse_table_parts(schema=True)
2210        else:
2211            unique = self._match(TokenType.UNIQUE)
2212            primary = self._match_text_seq("PRIMARY")
2213            amp = self._match_text_seq("AMP")
2214            if not self._match(TokenType.INDEX):
2215                return None
2216            index = self._parse_id_var()
2217            table = None
2218
2219        if self._match(TokenType.L_PAREN, advance=False):
2220            columns = self._parse_wrapped_csv(self._parse_ordered)
2221        else:
2222            columns = None
2223
2224        return self.expression(
2225            exp.Index,
2226            this=index,
2227            table=table,
2228            columns=columns,
2229            unique=unique,
2230            primary=primary,
2231            amp=amp,
2232        )
2233
2234    def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]:
2235        return (
2236            (not schema and self._parse_function())
2237            or self._parse_id_var(any_token=False)
2238            or self._parse_string_as_identifier()
2239            or self._parse_placeholder()
2240        )
2241
2242    def _parse_table_parts(self, schema: bool = False) -> exp.Table:
2243        catalog = None
2244        db = None
2245        table = self._parse_table_part(schema=schema)
2246
2247        while self._match(TokenType.DOT):
2248            if catalog:
2249                # This allows nesting the table in arbitrarily many dot expressions if needed
2250                table = self.expression(
2251                    exp.Dot, this=table, expression=self._parse_table_part(schema=schema)
2252                )
2253            else:
2254                catalog = db
2255                db = table
2256                table = self._parse_table_part(schema=schema)
2257
2258        if not table:
2259            self.raise_error(f"Expected table name but got {self._curr}")
2260
2261        return self.expression(
2262            exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots()
2263        )
2264
2265    def _parse_table(
2266        self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
2267    ) -> t.Optional[exp.Expression]:
2268        lateral = self._parse_lateral()
2269        if lateral:
2270            return lateral
2271
2272        unnest = self._parse_unnest()
2273        if unnest:
2274            return unnest
2275
2276        values = self._parse_derived_table_values()
2277        if values:
2278            return values
2279
2280        subquery = self._parse_select(table=True)
2281        if subquery:
2282            if not subquery.args.get("pivots"):
2283                subquery.set("pivots", self._parse_pivots())
2284            return subquery
2285
2286        this: exp.Expression = self._parse_table_parts(schema=schema)
2287
2288        if schema:
2289            return self._parse_schema(this=this)
2290
2291        if self.alias_post_tablesample:
2292            table_sample = self._parse_table_sample()
2293
2294        alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
2295        if alias:
2296            this.set("alias", alias)
2297
2298        if not this.args.get("pivots"):
2299            this.set("pivots", self._parse_pivots())
2300
2301        if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
2302            this.set(
2303                "hints",
2304                self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)),
2305            )
2306            self._match_r_paren()
2307
2308        if not self.alias_post_tablesample:
2309            table_sample = self._parse_table_sample()
2310
2311        if table_sample:
2312            table_sample.set("this", this)
2313            this = table_sample
2314
2315        return this
2316
2317    def _parse_unnest(self) -> t.Optional[exp.Expression]:
2318        if not self._match(TokenType.UNNEST):
2319            return None
2320
2321        expressions = self._parse_wrapped_csv(self._parse_type)
2322        ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY)
2323        alias = self._parse_table_alias()
2324
2325        if alias and self.unnest_column_only:
2326            if alias.args.get("columns"):
2327                self.raise_error("Unexpected extra column alias in unnest.")
2328            alias.set("columns", [alias.this])
2329            alias.set("this", None)
2330
2331        offset = None
2332        if self._match_pair(TokenType.WITH, TokenType.OFFSET):
2333            self._match(TokenType.ALIAS)
2334            offset = self._parse_id_var() or exp.Identifier(this="offset")
2335
2336        return self.expression(
2337            exp.Unnest,
2338            expressions=expressions,
2339            ordinality=ordinality,
2340            alias=alias,
2341            offset=offset,
2342        )
2343
2344    def _parse_derived_table_values(self) -> t.Optional[exp.Expression]:
2345        is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES)
2346        if not is_derived and not self._match(TokenType.VALUES):
2347            return None
2348
2349        expressions = self._parse_csv(self._parse_value)
2350
2351        if is_derived:
2352            self._match_r_paren()
2353
2354        return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias())
2355
2356    def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]:
2357        if not self._match(TokenType.TABLE_SAMPLE) and not (
2358            as_modifier and self._match_text_seq("USING", "SAMPLE")
2359        ):
2360            return None
2361
2362        bucket_numerator = None
2363        bucket_denominator = None
2364        bucket_field = None
2365        percent = None
2366        rows = None
2367        size = None
2368        seed = None
2369
2370        kind = (
2371            self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE"
2372        )
2373        method = self._parse_var(tokens=(TokenType.ROW,))
2374
2375        self._match(TokenType.L_PAREN)
2376
2377        num = self._parse_number()
2378
2379        if self._match_text_seq("BUCKET"):
2380            bucket_numerator = self._parse_number()
2381            self._match_text_seq("OUT", "OF")
2382            bucket_denominator = bucket_denominator = self._parse_number()
2383            self._match(TokenType.ON)
2384            bucket_field = self._parse_field()
2385        elif self._match_set((TokenType.PERCENT, TokenType.MOD)):
2386            percent = num
2387        elif self._match(TokenType.ROWS):
2388            rows = num
2389        else:
2390            size = num
2391
2392        self._match(TokenType.R_PAREN)
2393
2394        if self._match(TokenType.L_PAREN):
2395            method = self._parse_var()
2396            seed = self._match(TokenType.COMMA) and self._parse_number()
2397            self._match_r_paren()
2398        elif self._match_texts(("SEED", "REPEATABLE")):
2399            seed = self._parse_wrapped(self._parse_number)
2400
2401        return self.expression(
2402            exp.TableSample,
2403            method=method,
2404            bucket_numerator=bucket_numerator,
2405            bucket_denominator=bucket_denominator,
2406            bucket_field=bucket_field,
2407            percent=percent,
2408            rows=rows,
2409            size=size,
2410            seed=seed,
2411            kind=kind,
2412        )
2413
2414    def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]:
2415        return list(iter(self._parse_pivot, None))
2416
2417    def _parse_pivot(self) -> t.Optional[exp.Expression]:
2418        index = self._index
2419
2420        if self._match(TokenType.PIVOT):
2421            unpivot = False
2422        elif self._match(TokenType.UNPIVOT):
2423            unpivot = True
2424        else:
2425            return None
2426
2427        expressions = []
2428        field = None
2429
2430        if not self._match(TokenType.L_PAREN):
2431            self._retreat(index)
2432            return None
2433
2434        if unpivot:
2435            expressions = self._parse_csv(self._parse_column)
2436        else:
2437            expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function()))
2438
2439        if not expressions:
2440            self.raise_error("Failed to parse PIVOT's aggregation list")
2441
2442        if not self._match(TokenType.FOR):
2443            self.raise_error("Expecting FOR")
2444
2445        value = self._parse_column()
2446
2447        if not self._match(TokenType.IN):
2448            self.raise_error("Expecting IN")
2449
2450        field = self._parse_in(value, alias=True)
2451
2452        self._match_r_paren()
2453
2454        pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot)
2455
2456        if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False):
2457            pivot.set("alias", self._parse_table_alias())
2458
2459        if not unpivot:
2460            names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions))
2461
2462            columns: t.List[exp.Expression] = []
2463            for fld in pivot.args["field"].expressions:
2464                field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name
2465                for name in names:
2466                    if self.PREFIXED_PIVOT_COLUMNS:
2467                        name = f"{name}_{field_name}" if name else field_name
2468                    else:
2469                        name = f"{field_name}_{name}" if name else field_name
2470
2471                    columns.append(exp.to_identifier(name))
2472
2473            pivot.set("columns", columns)
2474
2475        return pivot
2476
2477    def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]:
2478        return [agg.alias for agg in aggregations]
2479
2480    def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]:
2481        if not skip_where_token and not self._match(TokenType.WHERE):
2482            return None
2483
2484        return self.expression(
2485            exp.Where, comments=self._prev_comments, this=self._parse_conjunction()
2486        )
2487
2488    def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]:
2489        if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
2490            return None
2491
2492        elements = defaultdict(list)
2493
2494        while True:
2495            expressions = self._parse_csv(self._parse_conjunction)
2496            if expressions:
2497                elements["expressions"].extend(expressions)
2498
2499            grouping_sets = self._parse_grouping_sets()
2500            if grouping_sets:
2501                elements["grouping_sets"].extend(grouping_sets)
2502
2503            rollup = None
2504            cube = None
2505            totals = None
2506
2507            with_ = self._match(TokenType.WITH)
2508            if self._match(TokenType.ROLLUP):
2509                rollup = with_ or self._parse_wrapped_csv(self._parse_column)
2510                elements["rollup"].extend(ensure_list(rollup))
2511
2512            if self._match(TokenType.CUBE):
2513                cube = with_ or self._parse_wrapped_csv(self._parse_column)
2514                elements["cube"].extend(ensure_list(cube))
2515
2516            if self._match_text_seq("TOTALS"):
2517                totals = True
2518                elements["totals"] = True  # type: ignore
2519
2520            if not (grouping_sets or rollup or cube or totals):
2521                break
2522
2523        return self.expression(exp.Group, **elements)  # type: ignore
2524
2525    def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
2526        if not self._match(TokenType.GROUPING_SETS):
2527            return None
2528
2529        return self._parse_wrapped_csv(self._parse_grouping_set)
2530
2531    def _parse_grouping_set(self) -> t.Optional[exp.Expression]:
2532        if self._match(TokenType.L_PAREN):
2533            grouping_set = self._parse_csv(self._parse_column)
2534            self._match_r_paren()
2535            return self.expression(exp.Tuple, expressions=grouping_set)
2536
2537        return self._parse_column()
2538
2539    def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]:
2540        if not skip_having_token and not self._match(TokenType.HAVING):
2541            return None
2542        return self.expression(exp.Having, this=self._parse_conjunction())
2543
2544    def _parse_qualify(self) -> t.Optional[exp.Expression]:
2545        if not self._match(TokenType.QUALIFY):
2546            return None
2547        return self.expression(exp.Qualify, this=self._parse_conjunction())
2548
2549    def _parse_order(
2550        self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
2551    ) -> t.Optional[exp.Expression]:
2552        if not skip_order_token and not self._match(TokenType.ORDER_BY):
2553            return this
2554
2555        return self.expression(
2556            exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
2557        )
2558
2559    def _parse_sort(
2560        self, exp_class: t.Type[exp.Expression], *texts: str
2561    ) -> t.Optional[exp.Expression]:
2562        if not self._match_text_seq(*texts):
2563            return None
2564        return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered))
2565
2566    def _parse_ordered(self) -> exp.Expression:
2567        this = self._parse_conjunction()
2568        self._match(TokenType.ASC)
2569        is_desc = self._match(TokenType.DESC)
2570        is_nulls_first = self._match_text_seq("NULLS", "FIRST")
2571        is_nulls_last = self._match_text_seq("NULLS", "LAST")
2572        desc = is_desc or False
2573        asc = not desc
2574        nulls_first = is_nulls_first or False
2575        explicitly_null_ordered = is_nulls_first or is_nulls_last
2576        if (
2577            not explicitly_null_ordered
2578            and (
2579                (asc and self.null_ordering == "nulls_are_small")
2580                or (desc and self.null_ordering != "nulls_are_small")
2581            )
2582            and self.null_ordering != "nulls_are_last"
2583        ):
2584            nulls_first = True
2585
2586        return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first)
2587
2588    def _parse_limit(
2589        self, this: t.Optional[exp.Expression] = None, top: bool = False
2590    ) -> t.Optional[exp.Expression]:
2591        if self._match(TokenType.TOP if top else TokenType.LIMIT):
2592            limit_paren = self._match(TokenType.L_PAREN)
2593            limit_exp = self.expression(
2594                exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term()
2595            )
2596
2597            if limit_paren:
2598                self._match_r_paren()
2599
2600            return limit_exp
2601
2602        if self._match(TokenType.FETCH):
2603            direction = self._match_set((TokenType.FIRST, TokenType.NEXT))
2604            direction = self._prev.text if direction else "FIRST"
2605
2606            count = self._parse_number()
2607            percent = self._match(TokenType.PERCENT)
2608
2609            self._match_set((TokenType.ROW, TokenType.ROWS))
2610
2611            only = self._match_text_seq("ONLY")
2612            with_ties = self._match_text_seq("WITH", "TIES")
2613
2614            if only and with_ties:
2615                self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause")
2616
2617            return self.expression(
2618                exp.Fetch,
2619                direction=direction,
2620                count=count,
2621                percent=percent,
2622                with_ties=with_ties,
2623            )
2624
2625        return this
2626
2627    def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2628        if not self._match_set((TokenType.OFFSET, TokenType.COMMA)):
2629            return this
2630
2631        count = self._parse_number()
2632        self._match_set((TokenType.ROW, TokenType.ROWS))
2633        return self.expression(exp.Offset, this=this, expression=count)
2634
2635    def _parse_locks(self) -> t.List[exp.Expression]:
2636        # Lists are invariant, so we need to use a type hint here
2637        locks: t.List[exp.Expression] = []
2638
2639        while True:
2640            if self._match_text_seq("FOR", "UPDATE"):
2641                update = True
2642            elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq(
2643                "LOCK", "IN", "SHARE", "MODE"
2644            ):
2645                update = False
2646            else:
2647                break
2648
2649            expressions = None
2650            if self._match_text_seq("OF"):
2651                expressions = self._parse_csv(lambda: self._parse_table(schema=True))
2652
2653            wait: t.Optional[bool | exp.Expression] = None
2654            if self._match_text_seq("NOWAIT"):
2655                wait = True
2656            elif self._match_text_seq("WAIT"):
2657                wait = self._parse_primary()
2658            elif self._match_text_seq("SKIP", "LOCKED"):
2659                wait = False
2660
2661            locks.append(
2662                self.expression(exp.Lock, update=update, expressions=expressions, wait=wait)
2663            )
2664
2665        return locks
2666
2667    def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2668        if not self._match_set(self.SET_OPERATIONS):
2669            return this
2670
2671        token_type = self._prev.token_type
2672
2673        if token_type == TokenType.UNION:
2674            expression = exp.Union
2675        elif token_type == TokenType.EXCEPT:
2676            expression = exp.Except
2677        else:
2678            expression = exp.Intersect
2679
2680        return self.expression(
2681            expression,
2682            this=this,
2683            distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
2684            expression=self._parse_set_operations(self._parse_select(nested=True)),
2685        )
2686
2687    def _parse_expression(self) -> t.Optional[exp.Expression]:
2688        return self._parse_alias(self._parse_conjunction())
2689
2690    def _parse_conjunction(self) -> t.Optional[exp.Expression]:
2691        return self._parse_tokens(self._parse_equality, self.CONJUNCTION)
2692
2693    def _parse_equality(self) -> t.Optional[exp.Expression]:
2694        return self._parse_tokens(self._parse_comparison, self.EQUALITY)
2695
2696    def _parse_comparison(self) -> t.Optional[exp.Expression]:
2697        return self._parse_tokens(self._parse_range, self.COMPARISON)
2698
2699    def _parse_range(self) -> t.Optional[exp.Expression]:
2700        this = self._parse_bitwise()
2701        negate = self._match(TokenType.NOT)
2702
2703        if self._match_set(self.RANGE_PARSERS):
2704            expression = self.RANGE_PARSERS[self._prev.token_type](self, this)
2705            if not expression:
2706                return this
2707
2708            this = expression
2709        elif self._match(TokenType.ISNULL):
2710            this = self.expression(exp.Is, this=this, expression=exp.Null())
2711
2712        # Postgres supports ISNULL and NOTNULL for conditions.
2713        # https://blog.andreiavram.ro/postgresql-null-composite-type/
2714        if self._match(TokenType.NOTNULL):
2715            this = self.expression(exp.Is, this=this, expression=exp.Null())
2716            this = self.expression(exp.Not, this=this)
2717
2718        if negate:
2719            this = self.expression(exp.Not, this=this)
2720
2721        if self._match(TokenType.IS):
2722            this = self._parse_is(this)
2723
2724        return this
2725
2726    def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2727        index = self._index - 1
2728        negate = self._match(TokenType.NOT)
2729        if self._match_text_seq("DISTINCT", "FROM"):
2730            klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ
2731            return self.expression(klass, this=this, expression=self._parse_expression())
2732
2733        expression = self._parse_null() or self._parse_boolean()
2734        if not expression:
2735            self._retreat(index)
2736            return None
2737
2738        this = self.expression(exp.Is, this=this, expression=expression)
2739        return self.expression(exp.Not, this=this) if negate else this
2740
2741    def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.Expression:
2742        unnest = self._parse_unnest()
2743        if unnest:
2744            this = self.expression(exp.In, this=this, unnest=unnest)
2745        elif self._match(TokenType.L_PAREN):
2746            expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias))
2747
2748            if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable):
2749                this = self.expression(exp.In, this=this, query=expressions[0])
2750            else:
2751                this = self.expression(exp.In, this=this, expressions=expressions)
2752
2753            self._match_r_paren(this)
2754        else:
2755            this = self.expression(exp.In, this=this, field=self._parse_field())
2756
2757        return this
2758
2759    def _parse_between(self, this: exp.Expression) -> exp.Expression:
2760        low = self._parse_bitwise()
2761        self._match(TokenType.AND)
2762        high = self._parse_bitwise()
2763        return self.expression(exp.Between, this=this, low=low, high=high)
2764
2765    def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2766        if not self._match(TokenType.ESCAPE):
2767            return this
2768        return self.expression(exp.Escape, this=this, expression=self._parse_string())
2769
2770    def _parse_interval(self) -> t.Optional[exp.Expression]:
2771        if not self._match(TokenType.INTERVAL):
2772            return None
2773
2774        this = self._parse_primary() or self._parse_term()
2775        unit = self._parse_function() or self._parse_var()
2776
2777        # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse
2778        # each INTERVAL expression into this canonical form so it's easy to transpile
2779        if this and isinstance(this, exp.Literal):
2780            if this.is_number:
2781                this = exp.Literal.string(this.name)
2782
2783            # Try to not clutter Snowflake's multi-part intervals like INTERVAL '1 day, 1 year'
2784            parts = this.name.split()
2785            if not unit and len(parts) <= 2:
2786                this = exp.Literal.string(seq_get(parts, 0))
2787                unit = self.expression(exp.Var, this=seq_get(parts, 1))
2788
2789        return self.expression(exp.Interval, this=this, unit=unit)
2790
2791    def _parse_bitwise(self) -> t.Optional[exp.Expression]:
2792        this = self._parse_term()
2793
2794        while True:
2795            if self._match_set(self.BITWISE):
2796                this = self.expression(
2797                    self.BITWISE[self._prev.token_type],
2798                    this=this,
2799                    expression=self._parse_term(),
2800                )
2801            elif self._match_pair(TokenType.LT, TokenType.LT):
2802                this = self.expression(
2803                    exp.BitwiseLeftShift, this=this, expression=self._parse_term()
2804                )
2805            elif self._match_pair(TokenType.GT, TokenType.GT):
2806                this = self.expression(
2807                    exp.BitwiseRightShift, this=this, expression=self._parse_term()
2808                )
2809            else:
2810                break
2811
2812        return this
2813
2814    def _parse_term(self) -> t.Optional[exp.Expression]:
2815        return self._parse_tokens(self._parse_factor, self.TERM)
2816
2817    def _parse_factor(self) -> t.Optional[exp.Expression]:
2818        return self._parse_tokens(self._parse_unary, self.FACTOR)
2819
2820    def _parse_unary(self) -> t.Optional[exp.Expression]:
2821        if self._match_set(self.UNARY_PARSERS):
2822            return self.UNARY_PARSERS[self._prev.token_type](self)
2823        return self._parse_at_time_zone(self._parse_type())
2824
2825    def _parse_type(self) -> t.Optional[exp.Expression]:
2826        interval = self._parse_interval()
2827        if interval:
2828            return interval
2829
2830        index = self._index
2831        data_type = self._parse_types(check_func=True)
2832        this = self._parse_column()
2833
2834        if data_type:
2835            if isinstance(this, exp.Literal):
2836                parser = self.TYPE_LITERAL_PARSERS.get(data_type.this)
2837                if parser:
2838                    return parser(self, this, data_type)
2839                return self.expression(exp.Cast, this=this, to=data_type)
2840            if not data_type.expressions:
2841                self._retreat(index)
2842                return self._parse_column()
2843            return data_type
2844
2845        return this
2846
2847    def _parse_type_size(self) -> t.Optional[exp.Expression]:
2848        this = self._parse_type()
2849        if not this:
2850            return None
2851
2852        return self.expression(
2853            exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True)
2854        )
2855
2856    def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]:
2857        index = self._index
2858
2859        prefix = self._match_text_seq("SYSUDTLIB", ".")
2860
2861        if not self._match_set(self.TYPE_TOKENS):
2862            return None
2863
2864        type_token = self._prev.token_type
2865
2866        if type_token == TokenType.PSEUDO_TYPE:
2867            return self.expression(exp.PseudoType, this=self._prev.text)
2868
2869        nested = type_token in self.NESTED_TYPE_TOKENS
2870        is_struct = type_token == TokenType.STRUCT
2871        expressions = None
2872        maybe_func = False
2873
2874        if self._match(TokenType.L_PAREN):
2875            if is_struct:
2876                expressions = self._parse_csv(self._parse_struct_types)
2877            elif nested:
2878                expressions = self._parse_csv(self._parse_types)
2879            else:
2880                expressions = self._parse_csv(self._parse_type_size)
2881
2882            if not expressions or not self._match(TokenType.R_PAREN):
2883                self._retreat(index)
2884                return None
2885
2886            maybe_func = True
2887
2888        if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2889            this = exp.DataType(
2890                this=exp.DataType.Type.ARRAY,
2891                expressions=[exp.DataType.build(type_token.value, expressions=expressions)],
2892                nested=True,
2893            )
2894
2895            while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2896                this = exp.DataType(
2897                    this=exp.DataType.Type.ARRAY,
2898                    expressions=[this],
2899                    nested=True,
2900                )
2901
2902            return this
2903
2904        if self._match(TokenType.L_BRACKET):
2905            self._retreat(index)
2906            return None
2907
2908        values: t.Optional[t.List[t.Optional[exp.Expression]]] = None
2909        if nested and self._match(TokenType.LT):
2910            if is_struct:
2911                expressions = self._parse_csv(self._parse_struct_types)
2912            else:
2913                expressions = self._parse_csv(self._parse_types)
2914
2915            if not self._match(TokenType.GT):
2916                self.raise_error("Expecting >")
2917
2918            if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)):
2919                values = self._parse_csv(self._parse_conjunction)
2920                self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN))
2921
2922        value: t.Optional[exp.Expression] = None
2923        if type_token in self.TIMESTAMPS:
2924            if self._match_text_seq("WITH", "TIME", "ZONE") or type_token == TokenType.TIMESTAMPTZ:
2925                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions)
2926            elif (
2927                self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE")
2928                or type_token == TokenType.TIMESTAMPLTZ
2929            ):
2930                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions)
2931            elif self._match_text_seq("WITHOUT", "TIME", "ZONE"):
2932                if type_token == TokenType.TIME:
2933                    value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions)
2934                else:
2935                    value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2936
2937            maybe_func = maybe_func and value is None
2938
2939            if value is None:
2940                value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2941        elif type_token == TokenType.INTERVAL:
2942            unit = self._parse_var()
2943
2944            if not unit:
2945                value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL)
2946            else:
2947                value = self.expression(exp.Interval, unit=unit)
2948
2949        if maybe_func and check_func:
2950            index2 = self._index
2951            peek = self._parse_string()
2952
2953            if not peek:
2954                self._retreat(index)
2955                return None
2956
2957            self._retreat(index2)
2958
2959        if value:
2960            return value
2961
2962        return exp.DataType(
2963            this=exp.DataType.Type[type_token.value.upper()],
2964            expressions=expressions,
2965            nested=nested,
2966            values=values,
2967            prefix=prefix,
2968        )
2969
2970    def _parse_struct_types(self) -> t.Optional[exp.Expression]:
2971        this = self._parse_type() or self._parse_id_var()
2972        self._match(TokenType.COLON)
2973        return self._parse_column_def(this)
2974
2975    def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2976        if not self._match_text_seq("AT", "TIME", "ZONE"):
2977            return this
2978        return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary())
2979
2980    def _parse_column(self) -> t.Optional[exp.Expression]:
2981        this = self._parse_field()
2982        if isinstance(this, exp.Identifier):
2983            this = self.expression(exp.Column, this=this)
2984        elif not this:
2985            return self._parse_bracket(this)
2986        this = self._parse_bracket(this)
2987
2988        while self._match_set(self.COLUMN_OPERATORS):
2989            op_token = self._prev.token_type
2990            op = self.COLUMN_OPERATORS.get(op_token)
2991
2992            if op_token == TokenType.DCOLON:
2993                field = self._parse_types()
2994                if not field:
2995                    self.raise_error("Expected type")
2996            elif op and self._curr:
2997                self._advance()
2998                value = self._prev.text
2999                field = (
3000                    exp.Literal.number(value)
3001                    if self._prev.token_type == TokenType.NUMBER
3002                    else exp.Literal.string(value)
3003                )
3004            else:
3005                field = (
3006                    self._parse_star()
3007                    or self._parse_function(anonymous=True)
3008                    or self._parse_id_var()
3009                )
3010
3011            if isinstance(field, exp.Func):
3012                # bigquery allows function calls like x.y.count(...)
3013                # SAFE.SUBSTR(...)
3014                # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
3015                this = self._replace_columns_with_dots(this)
3016
3017            if op:
3018                this = op(self, this, field)
3019            elif isinstance(this, exp.Column) and not this.args.get("catalog"):
3020                this = self.expression(
3021                    exp.Column,
3022                    this=field,
3023                    table=this.this,
3024                    db=this.args.get("table"),
3025                    catalog=this.args.get("db"),
3026                )
3027            else:
3028                this = self.expression(exp.Dot, this=this, expression=field)
3029            this = self._parse_bracket(this)
3030
3031        return this
3032
3033    def _parse_primary(self) -> t.Optional[exp.Expression]:
3034        if self._match_set(self.PRIMARY_PARSERS):
3035            token_type = self._prev.token_type
3036            primary = self.PRIMARY_PARSERS[token_type](self, self._prev)
3037
3038            if token_type == TokenType.STRING:
3039                expressions = [primary]
3040                while self._match(TokenType.STRING):
3041                    expressions.append(exp.Literal.string(self._prev.text))
3042                if len(expressions) > 1:
3043                    return self.expression(exp.Concat, expressions=expressions)
3044            return primary
3045
3046        if self._match_pair(TokenType.DOT, TokenType.NUMBER):
3047            return exp.Literal.number(f"0.{self._prev.text}")
3048
3049        if self._match(TokenType.L_PAREN):
3050            comments = self._prev_comments
3051            query = self._parse_select()
3052
3053            if query:
3054                expressions = [query]
3055            else:
3056                expressions = self._parse_csv(self._parse_expression)
3057
3058            this = self._parse_query_modifiers(seq_get(expressions, 0))
3059
3060            if isinstance(this, exp.Subqueryable):
3061                this = self._parse_set_operations(
3062                    self._parse_subquery(this=this, parse_alias=False)
3063                )
3064            elif len(expressions) > 1:
3065                this = self.expression(exp.Tuple, expressions=expressions)
3066            else:
3067                this = self.expression(exp.Paren, this=self._parse_set_operations(this))
3068
3069            if this:
3070                this.add_comments(comments)
3071            self._match_r_paren(expression=this)
3072
3073            return this
3074
3075        return None
3076
3077    def _parse_field(
3078        self,
3079        any_token: bool = False,
3080        tokens: t.Optional[t.Collection[TokenType]] = None,
3081    ) -> t.Optional[exp.Expression]:
3082        return (
3083            self._parse_primary()
3084            or self._parse_function()
3085            or self._parse_id_var(any_token=any_token, tokens=tokens)
3086        )
3087
3088    def _parse_function(
3089        self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False
3090    ) -> t.Optional[exp.Expression]:
3091        if not self._curr:
3092            return None
3093
3094        token_type = self._curr.token_type
3095
3096        if self._match_set(self.NO_PAREN_FUNCTION_PARSERS):
3097            return self.NO_PAREN_FUNCTION_PARSERS[token_type](self)
3098
3099        if not self._next or self._next.token_type != TokenType.L_PAREN:
3100            if token_type in self.NO_PAREN_FUNCTIONS:
3101                self._advance()
3102                return self.expression(self.NO_PAREN_FUNCTIONS[token_type])
3103
3104            return None
3105
3106        if token_type not in self.FUNC_TOKENS:
3107            return None
3108
3109        this = self._curr.text
3110        upper = this.upper()
3111        self._advance(2)
3112
3113        parser = self.FUNCTION_PARSERS.get(upper)
3114
3115        if parser and not anonymous:
3116            this = parser(self)
3117        else:
3118            subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
3119
3120            if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH):
3121                this = self.expression(subquery_predicate, this=self._parse_select())
3122                self._match_r_paren()
3123                return this
3124
3125            if functions is None:
3126                functions = self.FUNCTIONS
3127
3128            function = functions.get(upper)
3129
3130            alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS
3131            args = self._parse_csv(lambda: self._parse_lambda(alias=alias))
3132
3133            if function and not anonymous:
3134                this = function(args)
3135                self.validate_expression(this, args)
3136            else:
3137                this = self.expression(exp.Anonymous, this=this, expressions=args)
3138
3139        self._match_r_paren(this)
3140        return self._parse_window(this)
3141
3142    def _parse_function_parameter(self) -> t.Optional[exp.Expression]:
3143        return self._parse_column_def(self._parse_id_var())
3144
3145    def _parse_user_defined_function(
3146        self, kind: t.Optional[TokenType] = None
3147    ) -> t.Optional[exp.Expression]:
3148        this = self._parse_id_var()
3149
3150        while self._match(TokenType.DOT):
3151            this = self.expression(exp.Dot, this=this, expression=self._parse_id_var())
3152
3153        if not self._match(TokenType.L_PAREN):
3154            return this
3155
3156        expressions = self._parse_csv(self._parse_function_parameter)
3157        self._match_r_paren()
3158        return self.expression(
3159            exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True
3160        )
3161
3162    def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]:
3163        literal = self._parse_primary()
3164        if literal:
3165            return self.expression(exp.Introducer, this=token.text, expression=literal)
3166
3167        return self.expression(exp.Identifier, this=token.text)
3168
3169    def _parse_session_parameter(self) -> exp.Expression:
3170        kind = None
3171        this = self._parse_id_var() or self._parse_primary()
3172
3173        if this and self._match(TokenType.DOT):
3174            kind = this.name
3175            this = self._parse_var() or self._parse_primary()
3176
3177        return self.expression(exp.SessionParameter, this=this, kind=kind)
3178
3179    def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]:
3180        index = self._index
3181
3182        if self._match(TokenType.L_PAREN):
3183            expressions = self._parse_csv(self._parse_id_var)
3184
3185            if not self._match(TokenType.R_PAREN):
3186                self._retreat(index)
3187        else:
3188            expressions = [self._parse_id_var()]
3189
3190        if self._match_set(self.LAMBDAS):
3191            return self.LAMBDAS[self._prev.token_type](self, expressions)
3192
3193        self._retreat(index)
3194
3195        this: t.Optional[exp.Expression]
3196
3197        if self._match(TokenType.DISTINCT):
3198            this = self.expression(
3199                exp.Distinct, expressions=self._parse_csv(self._parse_conjunction)
3200            )
3201        else:
3202            this = self._parse_select_or_expression(alias=alias)
3203
3204            if isinstance(this, exp.EQ):
3205                left = this.this
3206                if isinstance(left, exp.Column):
3207                    left.replace(exp.Var(this=left.text("this")))
3208
3209        return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this)))
3210
3211    def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
3212        index = self._index
3213
3214        try:
3215            if self._parse_select(nested=True):
3216                return this
3217        except Exception:
3218            pass
3219        finally:
3220            self._retreat(index)
3221
3222        if not self._match(TokenType.L_PAREN):
3223            return this
3224
3225        args = self._parse_csv(
3226            lambda: self._parse_constraint()
3227            or self._parse_column_def(self._parse_field(any_token=True))
3228        )
3229        self._match_r_paren()
3230        return self.expression(exp.Schema, this=this, expressions=args)
3231
3232    def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3233        # column defs are not really columns, they're identifiers
3234        if isinstance(this, exp.Column):
3235            this = this.this
3236        kind = self._parse_types()
3237
3238        if self._match_text_seq("FOR", "ORDINALITY"):
3239            return self.expression(exp.ColumnDef, this=this, ordinality=True)
3240
3241        constraints = []
3242        while True:
3243            constraint = self._parse_column_constraint()
3244            if not constraint:
3245                break
3246            constraints.append(constraint)
3247
3248        if not kind and not constraints:
3249            return this
3250
3251        return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints)
3252
3253    def _parse_auto_increment(self) -> exp.Expression:
3254        start = None
3255        increment = None
3256
3257        if self._match(TokenType.L_PAREN, advance=False):
3258            args = self._parse_wrapped_csv(self._parse_bitwise)
3259            start = seq_get(args, 0)
3260            increment = seq_get(args, 1)
3261        elif self._match_text_seq("START"):
3262            start = self._parse_bitwise()
3263            self._match_text_seq("INCREMENT")
3264            increment = self._parse_bitwise()
3265
3266        if start and increment:
3267            return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment)
3268
3269        return exp.AutoIncrementColumnConstraint()
3270
3271    def _parse_compress(self) -> exp.Expression:
3272        if self._match(TokenType.L_PAREN, advance=False):
3273            return self.expression(
3274                exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise)
3275            )
3276
3277        return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise())
3278
3279    def _parse_generated_as_identity(self) -> exp.Expression:
3280        if self._match_text_seq("BY", "DEFAULT"):
3281            on_null = self._match_pair(TokenType.ON, TokenType.NULL)
3282            this = self.expression(
3283                exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null
3284            )
3285        else:
3286            self._match_text_seq("ALWAYS")
3287            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True)
3288
3289        self._match(TokenType.ALIAS)
3290        identity = self._match_text_seq("IDENTITY")
3291
3292        if self._match(TokenType.L_PAREN):
3293            if self._match_text_seq("START", "WITH"):
3294                this.set("start", self._parse_bitwise())
3295            if self._match_text_seq("INCREMENT", "BY"):
3296                this.set("increment", self._parse_bitwise())
3297            if self._match_text_seq("MINVALUE"):
3298                this.set("minvalue", self._parse_bitwise())
3299            if self._match_text_seq("MAXVALUE"):
3300                this.set("maxvalue", self._parse_bitwise())
3301
3302            if self._match_text_seq("CYCLE"):
3303                this.set("cycle", True)
3304            elif self._match_text_seq("NO", "CYCLE"):
3305                this.set("cycle", False)
3306
3307            if not identity:
3308                this.set("expression", self._parse_bitwise())
3309
3310            self._match_r_paren()
3311
3312        return this
3313
3314    def _parse_inline(self) -> t.Optional[exp.Expression]:
3315        self._match_text_seq("LENGTH")
3316        return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise())
3317
3318    def _parse_not_constraint(self) -> t.Optional[exp.Expression]:
3319        if self._match_text_seq("NULL"):
3320            return self.expression(exp.NotNullColumnConstraint)
3321        if self._match_text_seq("CASESPECIFIC"):
3322            return self.expression(exp.CaseSpecificColumnConstraint, not_=True)
3323        return None
3324
3325    def _parse_column_constraint(self) -> t.Optional[exp.Expression]:
3326        if self._match(TokenType.CONSTRAINT):
3327            this = self._parse_id_var()
3328        else:
3329            this = None
3330
3331        if self._match_texts(self.CONSTRAINT_PARSERS):
3332            return self.expression(
3333                exp.ColumnConstraint,
3334                this=this,
3335                kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self),
3336            )
3337
3338        return this
3339
3340    def _parse_constraint(self) -> t.Optional[exp.Expression]:
3341        if not self._match(TokenType.CONSTRAINT):
3342            return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS)
3343
3344        this = self._parse_id_var()
3345        expressions = []
3346
3347        while True:
3348            constraint = self._parse_unnamed_constraint() or self._parse_function()
3349            if not constraint:
3350                break
3351            expressions.append(constraint)
3352
3353        return self.expression(exp.Constraint, this=this, expressions=expressions)
3354
3355    def _parse_unnamed_constraint(
3356        self, constraints: t.Optional[t.Collection[str]] = None
3357    ) -> t.Optional[exp.Expression]:
3358        if not self._match_texts(constraints or self.CONSTRAINT_PARSERS):
3359            return None
3360
3361        constraint = self._prev.text.upper()
3362        if constraint not in self.CONSTRAINT_PARSERS:
3363            self.raise_error(f"No parser found for schema constraint {constraint}.")
3364
3365        return self.CONSTRAINT_PARSERS[constraint](self)
3366
3367    def _parse_unique(self) -> exp.Expression:
3368        if not self._match(TokenType.L_PAREN, advance=False):
3369            return self.expression(exp.UniqueColumnConstraint)
3370        return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars())
3371
3372    def _parse_key_constraint_options(self) -> t.List[str]:
3373        options = []
3374        while True:
3375            if not self._curr:
3376                break
3377
3378            if self._match(TokenType.ON):
3379                action = None
3380                on = self._advance_any() and self._prev.text
3381
3382                if self._match_text_seq("NO", "ACTION"):
3383                    action = "NO ACTION"
3384                elif self._match_text_seq("CASCADE"):
3385                    action = "CASCADE"
3386                elif self._match_pair(TokenType.SET, TokenType.NULL):
3387                    action = "SET NULL"
3388                elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
3389                    action = "SET DEFAULT"
3390                else:
3391                    self.raise_error("Invalid key constraint")
3392
3393                options.append(f"ON {on} {action}")
3394            elif self._match_text_seq("NOT", "ENFORCED"):
3395                options.append("NOT ENFORCED")
3396            elif self._match_text_seq("DEFERRABLE"):
3397                options.append("DEFERRABLE")
3398            elif self._match_text_seq("INITIALLY", "DEFERRED"):
3399                options.append("INITIALLY DEFERRED")
3400            elif self._match_text_seq("NORELY"):
3401                options.append("NORELY")
3402            elif self._match_text_seq("MATCH", "FULL"):
3403                options.append("MATCH FULL")
3404            else:
3405                break
3406
3407        return options
3408
3409    def _parse_references(self, match: bool = True) -> t.Optional[exp.Expression]:
3410        if match and not self._match(TokenType.REFERENCES):
3411            return None
3412
3413        expressions = None
3414        this = self._parse_id_var()
3415
3416        if self._match(TokenType.L_PAREN, advance=False):
3417            expressions = self._parse_wrapped_id_vars()
3418
3419        options = self._parse_key_constraint_options()
3420        return self.expression(exp.Reference, this=this, expressions=expressions, options=options)
3421
3422    def _parse_foreign_key(self) -> exp.Expression:
3423        expressions = self._parse_wrapped_id_vars()
3424        reference = self._parse_references()
3425        options = {}
3426
3427        while self._match(TokenType.ON):
3428            if not self._match_set((TokenType.DELETE, TokenType.UPDATE)):
3429                self.raise_error("Expected DELETE or UPDATE")
3430
3431            kind = self._prev.text.lower()
3432
3433            if self._match_text_seq("NO", "ACTION"):
3434                action = "NO ACTION"
3435            elif self._match(TokenType.SET):
3436                self._match_set((TokenType.NULL, TokenType.DEFAULT))
3437                action = "SET " + self._prev.text.upper()
3438            else:
3439                self._advance()
3440                action = self._prev.text.upper()
3441
3442            options[kind] = action
3443
3444        return self.expression(
3445            exp.ForeignKey, expressions=expressions, reference=reference, **options  # type: ignore
3446        )
3447
3448    def _parse_primary_key(self) -> exp.Expression:
3449        desc = (
3450            self._match_set((TokenType.ASC, TokenType.DESC))
3451            and self._prev.token_type == TokenType.DESC
3452        )
3453
3454        if not self._match(TokenType.L_PAREN, advance=False):
3455            return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc)
3456
3457        expressions = self._parse_wrapped_csv(self._parse_field)
3458        options = self._parse_key_constraint_options()
3459        return self.expression(exp.PrimaryKey, expressions=expressions, options=options)
3460
3461    def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3462        if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)):
3463            return this
3464
3465        bracket_kind = self._prev.token_type
3466        expressions: t.List[t.Optional[exp.Expression]]
3467
3468        if self._match(TokenType.COLON):
3469            expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())]
3470        else:
3471            expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction()))
3472
3473        # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
3474        if bracket_kind == TokenType.L_BRACE:
3475            this = self.expression(exp.Struct, expressions=expressions)
3476        elif not this or this.name.upper() == "ARRAY":
3477            this = self.expression(exp.Array, expressions=expressions)
3478        else:
3479            expressions = apply_index_offset(this, expressions, -self.index_offset)
3480            this = self.expression(exp.Bracket, this=this, expressions=expressions)
3481
3482        if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET:
3483            self.raise_error("Expected ]")
3484        elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE:
3485            self.raise_error("Expected }")
3486
3487        self._add_comments(this)
3488        return self._parse_bracket(this)
3489
3490    def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3491        if self._match(TokenType.COLON):
3492            return self.expression(exp.Slice, this=this, expression=self._parse_conjunction())
3493        return this
3494
3495    def _parse_case(self) -> t.Optional[exp.Expression]:
3496        ifs = []
3497        default = None
3498
3499        expression = self._parse_conjunction()
3500
3501        while self._match(TokenType.WHEN):
3502            this = self._parse_conjunction()
3503            self._match(TokenType.THEN)
3504            then = self._parse_conjunction()
3505            ifs.append(self.expression(exp.If, this=this, true=then))
3506
3507        if self._match(TokenType.ELSE):
3508            default = self._parse_conjunction()
3509
3510        if not self._match(TokenType.END):
3511            self.raise_error("Expected END after CASE", self._prev)
3512
3513        return self._parse_window(
3514            self.expression(exp.Case, this=expression, ifs=ifs, default=default)
3515        )
3516
3517    def _parse_if(self) -> t.Optional[exp.Expression]:
3518        if self._match(TokenType.L_PAREN):
3519            args = self._parse_csv(self._parse_conjunction)
3520            this = exp.If.from_arg_list(args)
3521            self.validate_expression(this, args)
3522            self._match_r_paren()
3523        else:
3524            index = self._index - 1
3525            condition = self._parse_conjunction()
3526
3527            if not condition:
3528                self._retreat(index)
3529                return None
3530
3531            self._match(TokenType.THEN)
3532            true = self._parse_conjunction()
3533            false = self._parse_conjunction() if self._match(TokenType.ELSE) else None
3534            self._match(TokenType.END)
3535            this = self.expression(exp.If, this=condition, true=true, false=false)
3536
3537        return self._parse_window(this)
3538
3539    def _parse_extract(self) -> exp.Expression:
3540        this = self._parse_function() or self._parse_var() or self._parse_type()
3541
3542        if self._match(TokenType.FROM):
3543            return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3544
3545        if not self._match(TokenType.COMMA):
3546            self.raise_error("Expected FROM or comma after EXTRACT", self._prev)
3547
3548        return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3549
3550    def _parse_cast(self, strict: bool) -> exp.Expression:
3551        this = self._parse_conjunction()
3552
3553        if not self._match(TokenType.ALIAS):
3554            if self._match(TokenType.COMMA):
3555                return self.expression(
3556                    exp.CastToStrType, this=this, expression=self._parse_string()
3557                )
3558            else:
3559                self.raise_error("Expected AS after CAST")
3560
3561        to = self._parse_types()
3562
3563        if not to:
3564            self.raise_error("Expected TYPE after CAST")
3565        elif to.this == exp.DataType.Type.CHAR:
3566            if self._match(TokenType.CHARACTER_SET):
3567                to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())
3568
3569        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3570
3571    def _parse_string_agg(self) -> exp.Expression:
3572        expression: t.Optional[exp.Expression]
3573
3574        if self._match(TokenType.DISTINCT):
3575            args = self._parse_csv(self._parse_conjunction)
3576            expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)])
3577        else:
3578            args = self._parse_csv(self._parse_conjunction)
3579            expression = seq_get(args, 0)
3580
3581        index = self._index
3582        if not self._match(TokenType.R_PAREN):
3583            # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]])
3584            order = self._parse_order(this=expression)
3585            return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3586
3587        # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]).
3588        # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that
3589        # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them.
3590        if not self._match_text_seq("WITHIN", "GROUP"):
3591            self._retreat(index)
3592            this = exp.GroupConcat.from_arg_list(args)
3593            self.validate_expression(this, args)
3594            return this
3595
3596        self._match_l_paren()  # The corresponding match_r_paren will be called in parse_function (caller)
3597        order = self._parse_order(this=expression)
3598        return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3599
3600    def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]:
3601        to: t.Optional[exp.Expression]
3602        this = self._parse_bitwise()
3603
3604        if self._match(TokenType.USING):
3605            to = self.expression(exp.CharacterSet, this=self._parse_var())
3606        elif self._match(TokenType.COMMA):
3607            to = self._parse_bitwise()
3608        else:
3609            to = None
3610
3611        # Swap the argument order if needed to produce the correct AST
3612        if self.CONVERT_TYPE_FIRST:
3613            this, to = to, this
3614
3615        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3616
3617    def _parse_decode(self) -> t.Optional[exp.Expression]:
3618        """
3619        There are generally two variants of the DECODE function:
3620
3621        - DECODE(bin, charset)
3622        - DECODE(expression, search, result [, search, result] ... [, default])
3623
3624        The second variant will always be parsed into a CASE expression. Note that NULL
3625        needs special treatment, since we need to explicitly check for it with `IS NULL`,
3626        instead of relying on pattern matching.
3627        """
3628        args = self._parse_csv(self._parse_conjunction)
3629
3630        if len(args) < 3:
3631            return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1))
3632
3633        expression, *expressions = args
3634        if not expression:
3635            return None
3636
3637        ifs = []
3638        for search, result in zip(expressions[::2], expressions[1::2]):
3639            if not search or not result:
3640                return None
3641
3642            if isinstance(search, exp.Literal):
3643                ifs.append(
3644                    exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result)
3645                )
3646            elif isinstance(search, exp.Null):
3647                ifs.append(
3648                    exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result)
3649                )
3650            else:
3651                cond = exp.or_(
3652                    exp.EQ(this=expression.copy(), expression=search),
3653                    exp.and_(
3654                        exp.Is(this=expression.copy(), expression=exp.Null()),
3655                        exp.Is(this=search.copy(), expression=exp.Null()),
3656                        copy=False,
3657                    ),
3658                    copy=False,
3659                )
3660                ifs.append(exp.If(this=cond, true=result))
3661
3662        return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None)
3663
3664    def _parse_json_key_value(self) -> t.Optional[exp.Expression]:
3665        self._match_text_seq("KEY")
3666        key = self._parse_field()
3667        self._match(TokenType.COLON)
3668        self._match_text_seq("VALUE")
3669        value = self._parse_field()
3670        if not key and not value:
3671            return None
3672        return self.expression(exp.JSONKeyValue, this=key, expression=value)
3673
3674    def _parse_json_object(self) -> exp.Expression:
3675        expressions = self._parse_csv(self._parse_json_key_value)
3676
3677        null_handling = None
3678        if self._match_text_seq("NULL", "ON", "NULL"):
3679            null_handling = "NULL ON NULL"
3680        elif self._match_text_seq("ABSENT", "ON", "NULL"):
3681            null_handling = "ABSENT ON NULL"
3682
3683        unique_keys = None
3684        if self._match_text_seq("WITH", "UNIQUE"):
3685            unique_keys = True
3686        elif self._match_text_seq("WITHOUT", "UNIQUE"):
3687            unique_keys = False
3688
3689        self._match_text_seq("KEYS")
3690
3691        return_type = self._match_text_seq("RETURNING") and self._parse_type()
3692        format_json = self._match_text_seq("FORMAT", "JSON")
3693        encoding = self._match_text_seq("ENCODING") and self._parse_var()
3694
3695        return self.expression(
3696            exp.JSONObject,
3697            expressions=expressions,
3698            null_handling=null_handling,
3699            unique_keys=unique_keys,
3700            return_type=return_type,
3701            format_json=format_json,
3702            encoding=encoding,
3703        )
3704
3705    def _parse_logarithm(self) -> exp.Expression:
3706        # Default argument order is base, expression
3707        args = self._parse_csv(self._parse_range)
3708
3709        if len(args) > 1:
3710            if not self.LOG_BASE_FIRST:
3711                args.reverse()
3712            return exp.Log.from_arg_list(args)
3713
3714        return self.expression(
3715            exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0)
3716        )
3717
3718    def _parse_match_against(self) -> exp.Expression:
3719        expressions = self._parse_csv(self._parse_column)
3720
3721        self._match_text_seq(")", "AGAINST", "(")
3722
3723        this = self._parse_string()
3724
3725        if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"):
3726            modifier = "IN NATURAL LANGUAGE MODE"
3727            if self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3728                modifier = f"{modifier} WITH QUERY EXPANSION"
3729        elif self._match_text_seq("IN", "BOOLEAN", "MODE"):
3730            modifier = "IN BOOLEAN MODE"
3731        elif self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3732            modifier = "WITH QUERY EXPANSION"
3733        else:
3734            modifier = None
3735
3736        return self.expression(
3737            exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier
3738        )
3739
3740    # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16
3741    def _parse_open_json(self) -> exp.Expression:
3742        this = self._parse_bitwise()
3743        path = self._match(TokenType.COMMA) and self._parse_string()
3744
3745        def _parse_open_json_column_def() -> exp.Expression:
3746            this = self._parse_field(any_token=True)
3747            kind = self._parse_types()
3748            path = self._parse_string()
3749            as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON)
3750            return self.expression(
3751                exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json
3752            )
3753
3754        expressions = None
3755        if self._match_pair(TokenType.R_PAREN, TokenType.WITH):
3756            self._match_l_paren()
3757            expressions = self._parse_csv(_parse_open_json_column_def)
3758
3759        return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions)
3760
3761    def _parse_position(self, haystack_first: bool = False) -> exp.Expression:
3762        args = self._parse_csv(self._parse_bitwise)
3763
3764        if self._match(TokenType.IN):
3765            return self.expression(
3766                exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0)
3767            )
3768
3769        if haystack_first:
3770            haystack = seq_get(args, 0)
3771            needle = seq_get(args, 1)
3772        else:
3773            needle = seq_get(args, 0)
3774            haystack = seq_get(args, 1)
3775
3776        this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2))
3777
3778        self.validate_expression(this, args)
3779
3780        return this
3781
3782    def _parse_join_hint(self, func_name: str) -> exp.Expression:
3783        args = self._parse_csv(self._parse_table)
3784        return exp.JoinHint(this=func_name.upper(), expressions=args)
3785
3786    def _parse_substring(self) -> exp.Expression:
3787        # Postgres supports the form: substring(string [from int] [for int])
3788        # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
3789
3790        args = self._parse_csv(self._parse_bitwise)
3791
3792        if self._match(TokenType.FROM):
3793            args.append(self._parse_bitwise())
3794            if self._match(TokenType.FOR):
3795                args.append(self._parse_bitwise())
3796
3797        this = exp.Substring.from_arg_list(args)
3798        self.validate_expression(this, args)
3799
3800        return this
3801
3802    def _parse_trim(self) -> exp.Expression:
3803        # https://www.w3resource.com/sql/character-functions/trim.php
3804        # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html
3805
3806        position = None
3807        collation = None
3808
3809        if self._match_texts(self.TRIM_TYPES):
3810            position = self._prev.text.upper()
3811
3812        expression = self._parse_bitwise()
3813        if self._match_set((TokenType.FROM, TokenType.COMMA)):
3814            this = self._parse_bitwise()
3815        else:
3816            this = expression
3817            expression = None
3818
3819        if self._match(TokenType.COLLATE):
3820            collation = self._parse_bitwise()
3821
3822        return self.expression(
3823            exp.Trim,
3824            this=this,
3825            position=position,
3826            expression=expression,
3827            collation=collation,
3828        )
3829
3830    def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3831        return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window)
3832
3833    def _parse_named_window(self) -> t.Optional[exp.Expression]:
3834        return self._parse_window(self._parse_id_var(), alias=True)
3835
3836    def _parse_respect_or_ignore_nulls(
3837        self, this: t.Optional[exp.Expression]
3838    ) -> t.Optional[exp.Expression]:
3839        if self._match_text_seq("IGNORE", "NULLS"):
3840            return self.expression(exp.IgnoreNulls, this=this)
3841        if self._match_text_seq("RESPECT", "NULLS"):
3842            return self.expression(exp.RespectNulls, this=this)
3843        return this
3844
3845    def _parse_window(
3846        self, this: t.Optional[exp.Expression], alias: bool = False
3847    ) -> t.Optional[exp.Expression]:
3848        if self._match_pair(TokenType.FILTER, TokenType.L_PAREN):
3849            this = self.expression(exp.Filter, this=this, expression=self._parse_where())
3850            self._match_r_paren()
3851
3852        # T-SQL allows the OVER (...) syntax after WITHIN GROUP.
3853        # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
3854        if self._match_text_seq("WITHIN", "GROUP"):
3855            order = self._parse_wrapped(self._parse_order)
3856            this = self.expression(exp.WithinGroup, this=this, expression=order)
3857
3858        # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
3859        # Some dialects choose to implement and some do not.
3860        # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
3861
3862        # There is some code above in _parse_lambda that handles
3863        #   SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ...
3864
3865        # The below changes handle
3866        #   SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ...
3867
3868        # Oracle allows both formats
3869        #   (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html)
3870        #   and Snowflake chose to do the same for familiarity
3871        #   https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes
3872        this = self._parse_respect_or_ignore_nulls(this)
3873
3874        # bigquery select from window x AS (partition by ...)
3875        if alias:
3876            over = None
3877            self._match(TokenType.ALIAS)
3878        elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS):
3879            return this
3880        else:
3881            over = self._prev.text.upper()
3882
3883        if not self._match(TokenType.L_PAREN):
3884            return self.expression(
3885                exp.Window, this=this, alias=self._parse_id_var(False), over=over
3886            )
3887
3888        window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
3889
3890        first = self._match(TokenType.FIRST)
3891        if self._match_text_seq("LAST"):
3892            first = False
3893
3894        partition = self._parse_partition_by()
3895        order = self._parse_order()
3896        kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text
3897
3898        if kind:
3899            self._match(TokenType.BETWEEN)
3900            start = self._parse_window_spec()
3901            self._match(TokenType.AND)
3902            end = self._parse_window_spec()
3903
3904            spec = self.expression(
3905                exp.WindowSpec,
3906                kind=kind,
3907                start=start["value"],
3908                start_side=start["side"],
3909                end=end["value"],
3910                end_side=end["side"],
3911            )
3912        else:
3913            spec = None
3914
3915        self._match_r_paren()
3916
3917        return self.expression(
3918            exp.Window,
3919            this=this,
3920            partition_by=partition,
3921            order=order,
3922            spec=spec,
3923            alias=window_alias,
3924            over=over,
3925            first=first,
3926        )
3927
3928    def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]:
3929        self._match(TokenType.BETWEEN)
3930
3931        return {
3932            "value": (
3933                (self._match_text_seq("UNBOUNDED") and "UNBOUNDED")
3934                or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW")
3935                or self._parse_bitwise()
3936            ),
3937            "side": self._match_texts(("PRECEDING", "FOLLOWING")) and self._prev.text,
3938        }
3939
3940    def _parse_alias(
3941        self, this: t.Optional[exp.Expression], explicit: bool = False
3942    ) -> t.Optional[exp.Expression]:
3943        any_token = self._match(TokenType.ALIAS)
3944
3945        if explicit and not any_token:
3946            return this
3947
3948        if self._match(TokenType.L_PAREN):
3949            aliases = self.expression(
3950                exp.Aliases,
3951                this=this,
3952                expressions=self._parse_csv(lambda: self._parse_id_var(any_token)),
3953            )
3954            self._match_r_paren(aliases)
3955            return aliases
3956
3957        alias = self._parse_id_var(any_token)
3958
3959        if alias:
3960            return self.expression(exp.Alias, this=this, alias=alias)
3961
3962        return this
3963
3964    def _parse_id_var(
3965        self,
3966        any_token: bool = True,
3967        tokens: t.Optional[t.Collection[TokenType]] = None,
3968        prefix_tokens: t.Optional[t.Collection[TokenType]] = None,
3969    ) -> t.Optional[exp.Expression]:
3970        identifier = self._parse_identifier()
3971
3972        if identifier:
3973            return identifier
3974
3975        prefix = ""
3976
3977        if prefix_tokens:
3978            while self._match_set(prefix_tokens):
3979                prefix += self._prev.text
3980
3981        if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS):
3982            quoted = self._prev.token_type == TokenType.STRING
3983            return exp.Identifier(this=prefix + self._prev.text, quoted=quoted)
3984
3985        return None
3986
3987    def _parse_string(self) -> t.Optional[exp.Expression]:
3988        if self._match(TokenType.STRING):
3989            return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev)
3990        return self._parse_placeholder()
3991
3992    def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]:
3993        return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True)
3994
3995    def _parse_number(self) -> t.Optional[exp.Expression]:
3996        if self._match(TokenType.NUMBER):
3997            return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev)
3998        return self._parse_placeholder()
3999
4000    def _parse_identifier(self) -> t.Optional[exp.Expression]:
4001        if self._match(TokenType.IDENTIFIER):
4002            return self.expression(exp.Identifier, this=self._prev.text, quoted=True)
4003        return self._parse_placeholder()
4004
4005    def _parse_var(
4006        self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None
4007    ) -> t.Optional[exp.Expression]:
4008        if (
4009            (any_token and self._advance_any())
4010            or self._match(TokenType.VAR)
4011            or (self._match_set(tokens) if tokens else False)
4012        ):
4013            return self.expression(exp.Var, this=self._prev.text)
4014        return self._parse_placeholder()
4015
4016    def _advance_any(self) -> t.Optional[Token]:
4017        if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS:
4018            self._advance()
4019            return self._prev
4020        return None
4021
4022    def _parse_var_or_string(self) -> t.Optional[exp.Expression]:
4023        return self._parse_var() or self._parse_string()
4024
4025    def _parse_null(self) -> t.Optional[exp.Expression]:
4026        if self._match(TokenType.NULL):
4027            return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev)
4028        return None
4029
4030    def _parse_boolean(self) -> t.Optional[exp.Expression]:
4031        if self._match(TokenType.TRUE):
4032            return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev)
4033        if self._match(TokenType.FALSE):
4034            return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev)
4035        return None
4036
4037    def _parse_star(self) -> t.Optional[exp.Expression]:
4038        if self._match(TokenType.STAR):
4039            return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev)
4040        return None
4041
4042    def _parse_parameter(self) -> exp.Expression:
4043        wrapped = self._match(TokenType.L_BRACE)
4044        this = self._parse_var() or self._parse_identifier() or self._parse_primary()
4045        self._match(TokenType.R_BRACE)
4046        return self.expression(exp.Parameter, this=this, wrapped=wrapped)
4047
4048    def _parse_placeholder(self) -> t.Optional[exp.Expression]:
4049        if self._match_set(self.PLACEHOLDER_PARSERS):
4050            placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self)
4051            if placeholder:
4052                return placeholder
4053            self._advance(-1)
4054        return None
4055
4056    def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4057        if not self._match(TokenType.EXCEPT):
4058            return None
4059        if self._match(TokenType.L_PAREN, advance=False):
4060            return self._parse_wrapped_csv(self._parse_column)
4061        return self._parse_csv(self._parse_column)
4062
4063    def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4064        if not self._match(TokenType.REPLACE):
4065            return None
4066        if self._match(TokenType.L_PAREN, advance=False):
4067            return self._parse_wrapped_csv(self._parse_expression)
4068        return self._parse_csv(self._parse_expression)
4069
4070    def _parse_csv(
4071        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
4072    ) -> t.List[t.Optional[exp.Expression]]:
4073        parse_result = parse_method()
4074        items = [parse_result] if parse_result is not None else []
4075
4076        while self._match(sep):
4077            self._add_comments(parse_result)
4078            parse_result = parse_method()
4079            if parse_result is not None:
4080                items.append(parse_result)
4081
4082        return items
4083
4084    def _parse_tokens(
4085        self, parse_method: t.Callable, expressions: t.Dict
4086    ) -> t.Optional[exp.Expression]:
4087        this = parse_method()
4088
4089        while self._match_set(expressions):
4090            this = self.expression(
4091                expressions[self._prev.token_type],
4092                this=this,
4093                comments=self._prev_comments,
4094                expression=parse_method(),
4095            )
4096
4097        return this
4098
4099    def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]:
4100        return self._parse_wrapped_csv(self._parse_id_var, optional=optional)
4101
4102    def _parse_wrapped_csv(
4103        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False
4104    ) -> t.List[t.Optional[exp.Expression]]:
4105        return self._parse_wrapped(
4106            lambda: self._parse_csv(parse_method, sep=sep), optional=optional
4107        )
4108
4109    def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any:
4110        wrapped = self._match(TokenType.L_PAREN)
4111        if not wrapped and not optional:
4112            self.raise_error("Expecting (")
4113        parse_result = parse_method()
4114        if wrapped:
4115            self._match_r_paren()
4116        return parse_result
4117
4118    def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]:
4119        return self._parse_select() or self._parse_set_operations(
4120            self._parse_expression() if alias else self._parse_conjunction()
4121        )
4122
4123    def _parse_ddl_select(self) -> t.Optional[exp.Expression]:
4124        return self._parse_set_operations(
4125            self._parse_select(nested=True, parse_subquery_alias=False)
4126        )
4127
4128    def _parse_transaction(self) -> exp.Expression:
4129        this = None
4130        if self._match_texts(self.TRANSACTION_KIND):
4131            this = self._prev.text
4132
4133        self._match_texts({"TRANSACTION", "WORK"})
4134
4135        modes = []
4136        while True:
4137            mode = []
4138            while self._match(TokenType.VAR):
4139                mode.append(self._prev.text)
4140
4141            if mode:
4142                modes.append(" ".join(mode))
4143            if not self._match(TokenType.COMMA):
4144                break
4145
4146        return self.expression(exp.Transaction, this=this, modes=modes)
4147
4148    def _parse_commit_or_rollback(self) -> exp.Expression:
4149        chain = None
4150        savepoint = None
4151        is_rollback = self._prev.token_type == TokenType.ROLLBACK
4152
4153        self._match_texts({"TRANSACTION", "WORK"})
4154
4155        if self._match_text_seq("TO"):
4156            self._match_text_seq("SAVEPOINT")
4157            savepoint = self._parse_id_var()
4158
4159        if self._match(TokenType.AND):
4160            chain = not self._match_text_seq("NO")
4161            self._match_text_seq("CHAIN")
4162
4163        if is_rollback:
4164            return self.expression(exp.Rollback, savepoint=savepoint)
4165        return self.expression(exp.Commit, chain=chain)
4166
4167    def _parse_add_column(self) -> t.Optional[exp.Expression]:
4168        if not self._match_text_seq("ADD"):
4169            return None
4170
4171        self._match(TokenType.COLUMN)
4172        exists_column = self._parse_exists(not_=True)
4173        expression = self._parse_column_def(self._parse_field(any_token=True))
4174
4175        if expression:
4176            expression.set("exists", exists_column)
4177
4178            # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns
4179            if self._match_texts(("FIRST", "AFTER")):
4180                position = self._prev.text
4181                column_position = self.expression(
4182                    exp.ColumnPosition, this=self._parse_column(), position=position
4183                )
4184                expression.set("position", column_position)
4185
4186        return expression
4187
4188    def _parse_drop_column(self) -> t.Optional[exp.Expression]:
4189        drop = self._match(TokenType.DROP) and self._parse_drop()
4190        if drop and not isinstance(drop, exp.Command):
4191            drop.set("kind", drop.args.get("kind", "COLUMN"))
4192        return drop
4193
4194    # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html
4195    def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression:
4196        return self.expression(
4197            exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists
4198        )
4199
4200    def _parse_add_constraint(self) -> t.Optional[exp.Expression]:
4201        this = None
4202        kind = self._prev.token_type
4203
4204        if kind == TokenType.CONSTRAINT:
4205            this = self._parse_id_var()
4206
4207            if self._match_text_seq("CHECK"):
4208                expression = self._parse_wrapped(self._parse_conjunction)
4209                enforced = self._match_text_seq("ENFORCED")
4210
4211                return self.expression(
4212                    exp.AddConstraint, this=this, expression=expression, enforced=enforced
4213                )
4214
4215        if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY):
4216            expression = self._parse_foreign_key()
4217        elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY):
4218            expression = self._parse_primary_key()
4219        else:
4220            expression = None
4221
4222        return self.expression(exp.AddConstraint, this=this, expression=expression)
4223
4224    def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]:
4225        index = self._index - 1
4226
4227        if self._match_set(self.ADD_CONSTRAINT_TOKENS):
4228            return self._parse_csv(self._parse_add_constraint)
4229
4230        self._retreat(index)
4231        return self._parse_csv(self._parse_add_column)
4232
4233    def _parse_alter_table_alter(self) -> exp.Expression:
4234        self._match(TokenType.COLUMN)
4235        column = self._parse_field(any_token=True)
4236
4237        if self._match_pair(TokenType.DROP, TokenType.DEFAULT):
4238            return self.expression(exp.AlterColumn, this=column, drop=True)
4239        if self._match_pair(TokenType.SET, TokenType.DEFAULT):
4240            return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction())
4241
4242        self._match_text_seq("SET", "DATA")
4243        return self.expression(
4244            exp.AlterColumn,
4245            this=column,
4246            dtype=self._match_text_seq("TYPE") and self._parse_types(),
4247            collate=self._match(TokenType.COLLATE) and self._parse_term(),
4248            using=self._match(TokenType.USING) and self._parse_conjunction(),
4249        )
4250
4251    def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]:
4252        index = self._index - 1
4253
4254        partition_exists = self._parse_exists()
4255        if self._match(TokenType.PARTITION, advance=False):
4256            return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists))
4257
4258        self._retreat(index)
4259        return self._parse_csv(self._parse_drop_column)
4260
4261    def _parse_alter_table_rename(self) -> exp.Expression:
4262        self._match_text_seq("TO")
4263        return self.expression(exp.RenameTable, this=self._parse_table(schema=True))
4264
4265    def _parse_alter(self) -> t.Optional[exp.Expression]:
4266        start = self._prev
4267
4268        if not self._match(TokenType.TABLE):
4269            return self._parse_as_command(start)
4270
4271        exists = self._parse_exists()
4272        this = self._parse_table(schema=True)
4273
4274        if self._next:
4275            self._advance()
4276        parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None
4277
4278        if parser:
4279            actions = ensure_list(parser(self))
4280
4281            if not self._curr:
4282                return self.expression(
4283                    exp.AlterTable,
4284                    this=this,
4285                    exists=exists,
4286                    actions=actions,
4287                )
4288        return self._parse_as_command(start)
4289
4290    def _parse_merge(self) -> exp.Expression:
4291        self._match(TokenType.INTO)
4292        target = self._parse_table()
4293
4294        self._match(TokenType.USING)
4295        using = self._parse_table()
4296
4297        self._match(TokenType.ON)
4298        on = self._parse_conjunction()
4299
4300        whens = []
4301        while self._match(TokenType.WHEN):
4302            matched = not self._match(TokenType.NOT)
4303            self._match_text_seq("MATCHED")
4304            source = (
4305                False
4306                if self._match_text_seq("BY", "TARGET")
4307                else self._match_text_seq("BY", "SOURCE")
4308            )
4309            condition = self._parse_conjunction() if self._match(TokenType.AND) else None
4310
4311            self._match(TokenType.THEN)
4312
4313            if self._match(TokenType.INSERT):
4314                _this = self._parse_star()
4315                if _this:
4316                    then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this)
4317                else:
4318                    then = self.expression(
4319                        exp.Insert,
4320                        this=self._parse_value(),
4321                        expression=self._match(TokenType.VALUES) and self._parse_value(),
4322                    )
4323            elif self._match(TokenType.UPDATE):
4324                expressions = self._parse_star()
4325                if expressions:
4326                    then = self.expression(exp.Update, expressions=expressions)
4327                else:
4328                    then = self.expression(
4329                        exp.Update,
4330                        expressions=self._match(TokenType.SET)
4331                        and self._parse_csv(self._parse_equality),
4332                    )
4333            elif self._match(TokenType.DELETE):
4334                then = self.expression(exp.Var, this=self._prev.text)
4335            else:
4336                then = None
4337
4338            whens.append(
4339                self.expression(
4340                    exp.When,
4341                    matched=matched,
4342                    source=source,
4343                    condition=condition,
4344                    then=then,
4345                )
4346            )
4347
4348        return self.expression(
4349            exp.Merge,
4350            this=target,
4351            using=using,
4352            on=on,
4353            expressions=whens,
4354        )
4355
4356    def _parse_show(self) -> t.Optional[exp.Expression]:
4357        parser = self._find_parser(self.SHOW_PARSERS, self._show_trie)  # type: ignore
4358        if parser:
4359            return parser(self)
4360        self._advance()
4361        return self.expression(exp.Show, this=self._prev.text.upper())
4362
4363    def _parse_set_item_assignment(
4364        self, kind: t.Optional[str] = None
4365    ) -> t.Optional[exp.Expression]:
4366        index = self._index
4367
4368        if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"):
4369            return self._parse_set_transaction(global_=kind == "GLOBAL")
4370
4371        left = self._parse_primary() or self._parse_id_var()
4372
4373        if not self._match_texts(("=", "TO")):
4374            self._retreat(index)
4375            return None
4376
4377        right = self._parse_statement() or self._parse_id_var()
4378        this = self.expression(
4379            exp.EQ,
4380            this=left,
4381            expression=right,
4382        )
4383
4384        return self.expression(
4385            exp.SetItem,
4386            this=this,
4387            kind=kind,
4388        )
4389
4390    def _parse_set_transaction(self, global_: bool = False) -> exp.Expression:
4391        self._match_text_seq("TRANSACTION")
4392        characteristics = self._parse_csv(
4393            lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS)
4394        )
4395        return self.expression(
4396            exp.SetItem,
4397            expressions=characteristics,
4398            kind="TRANSACTION",
4399            **{"global": global_},  # type: ignore
4400        )
4401
4402    def _parse_set_item(self) -> t.Optional[exp.Expression]:
4403        parser = self._find_parser(self.SET_PARSERS, self._set_trie)  # type: ignore
4404        return parser(self) if parser else self._parse_set_item_assignment(kind=None)
4405
4406    def _parse_set(self) -> exp.Expression:
4407        index = self._index
4408        set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item))
4409
4410        if self._curr:
4411            self._retreat(index)
4412            return self._parse_as_command(self._prev)
4413
4414        return set_
4415
4416    def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]:
4417        for option in options:
4418            if self._match_text_seq(*option.split(" ")):
4419                return exp.Var(this=option)
4420        return None
4421
4422    def _parse_as_command(self, start: Token) -> exp.Command:
4423        while self._curr:
4424            self._advance()
4425        text = self._find_sql(start, self._prev)
4426        size = len(start.text)
4427        return exp.Command(this=text[:size], expression=text[size:])
4428
4429    def _find_parser(
4430        self, parsers: t.Dict[str, t.Callable], trie: t.Dict
4431    ) -> t.Optional[t.Callable]:
4432        if not self._curr:
4433            return None
4434
4435        index = self._index
4436        this = []
4437        while True:
4438            # The current token might be multiple words
4439            curr = self._curr.text.upper()
4440            key = curr.split(" ")
4441            this.append(curr)
4442            self._advance()
4443            result, trie = in_trie(trie, key)
4444            if result == 0:
4445                break
4446            if result == 2:
4447                subparser = parsers[" ".join(this)]
4448                return subparser
4449        self._retreat(index)
4450        return None
4451
4452    def _match(self, token_type, advance=True, expression=None):
4453        if not self._curr:
4454            return None
4455
4456        if self._curr.token_type == token_type:
4457            if advance:
4458                self._advance()
4459            self._add_comments(expression)
4460            return True
4461
4462        return None
4463
4464    def _match_set(self, types, advance=True):
4465        if not self._curr:
4466            return None
4467
4468        if self._curr.token_type in types:
4469            if advance:
4470                self._advance()
4471            return True
4472
4473        return None
4474
4475    def _match_pair(self, token_type_a, token_type_b, advance=True):
4476        if not self._curr or not self._next:
4477            return None
4478
4479        if self._curr.token_type == token_type_a and self._next.token_type == token_type_b:
4480            if advance:
4481                self._advance(2)
4482            return True
4483
4484        return None
4485
4486    def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None:
4487        if not self._match(TokenType.L_PAREN, expression=expression):
4488            self.raise_error("Expecting (")
4489
4490    def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None:
4491        if not self._match(TokenType.R_PAREN, expression=expression):
4492            self.raise_error("Expecting )")
4493
4494    def _match_texts(self, texts, advance=True):
4495        if self._curr and self._curr.text.upper() in texts:
4496            if advance:
4497                self._advance()
4498            return True
4499        return False
4500
4501    def _match_text_seq(self, *texts, advance=True):
4502        index = self._index
4503        for text in texts:
4504            if self._curr and self._curr.text.upper() == text:
4505                self._advance()
4506            else:
4507                self._retreat(index)
4508                return False
4509
4510        if not advance:
4511            self._retreat(index)
4512
4513        return True
4514
4515    def _replace_columns_with_dots(
4516        self, this: t.Optional[exp.Expression]
4517    ) -> t.Optional[exp.Expression]:
4518        if isinstance(this, exp.Dot):
4519            exp.replace_children(this, self._replace_columns_with_dots)
4520        elif isinstance(this, exp.Column):
4521            exp.replace_children(this, self._replace_columns_with_dots)
4522            table = this.args.get("table")
4523            this = (
4524                self.expression(exp.Dot, this=table, expression=this.this)
4525                if table
4526                else self.expression(exp.Var, this=this.name)
4527            )
4528        elif isinstance(this, exp.Identifier):
4529            this = self.expression(exp.Var, this=this.name)
4530
4531        return this
4532
4533    def _replace_lambda(
4534        self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str]
4535    ) -> t.Optional[exp.Expression]:
4536        if not node:
4537            return node
4538
4539        for column in node.find_all(exp.Column):
4540            if column.parts[0].name in lambda_variables:
4541                dot_or_id = column.to_dot() if column.table else column.this
4542                parent = column.parent
4543
4544                while isinstance(parent, exp.Dot):
4545                    if not isinstance(parent.parent, exp.Dot):
4546                        parent.replace(dot_or_id)
4547                        break
4548                    parent = parent.parent
4549                else:
4550                    if column is node:
4551                        node = dot_or_id
4552                    else:
4553                        column.replace(dot_or_id)
4554        return node

Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer and produces a parsed syntax tree.

Arguments:
  • error_level: the desired error level. Default: ErrorLevel.RAISE
  • error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
  • index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
  • alias_post_tablesample: If the table alias comes after tablesample. Default: False
  • max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
  • null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
Parser( error_level: Optional[sqlglot.errors.ErrorLevel] = None, error_message_context: int = 100, index_offset: int = 0, unnest_column_only: bool = False, alias_post_tablesample: bool = False, max_errors: int = 3, null_ordering: Optional[str] = None)
781    def __init__(
782        self,
783        error_level: t.Optional[ErrorLevel] = None,
784        error_message_context: int = 100,
785        index_offset: int = 0,
786        unnest_column_only: bool = False,
787        alias_post_tablesample: bool = False,
788        max_errors: int = 3,
789        null_ordering: t.Optional[str] = None,
790    ):
791        self.error_level = error_level or ErrorLevel.IMMEDIATE
792        self.error_message_context = error_message_context
793        self.index_offset = index_offset
794        self.unnest_column_only = unnest_column_only
795        self.alias_post_tablesample = alias_post_tablesample
796        self.max_errors = max_errors
797        self.null_ordering = null_ordering
798        self.reset()
def reset(self):
800    def reset(self):
801        self.sql = ""
802        self.errors = []
803        self._tokens = []
804        self._index = 0
805        self._curr = None
806        self._next = None
807        self._prev = None
808        self._prev_comments = None
def parse( self, raw_tokens: List[sqlglot.tokens.Token], sql: Optional[str] = None) -> List[Optional[sqlglot.expressions.Expression]]:
810    def parse(
811        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
812    ) -> t.List[t.Optional[exp.Expression]]:
813        """
814        Parses a list of tokens and returns a list of syntax trees, one tree
815        per parsed SQL statement.
816
817        Args:
818            raw_tokens: the list of tokens.
819            sql: the original SQL string, used to produce helpful debug messages.
820
821        Returns:
822            The list of syntax trees.
823        """
824        return self._parse(
825            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
826        )

Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.

Arguments:
  • raw_tokens: the list of tokens.
  • sql: the original SQL string, used to produce helpful debug messages.
Returns:

The list of syntax trees.

def parse_into( self, expression_types: Union[str, Type[sqlglot.expressions.Expression], Collection[Union[str, Type[sqlglot.expressions.Expression]]]], raw_tokens: List[sqlglot.tokens.Token], sql: Optional[str] = None) -> List[Optional[sqlglot.expressions.Expression]]:
828    def parse_into(
829        self,
830        expression_types: exp.IntoType,
831        raw_tokens: t.List[Token],
832        sql: t.Optional[str] = None,
833    ) -> t.List[t.Optional[exp.Expression]]:
834        """
835        Parses a list of tokens into a given Expression type. If a collection of Expression
836        types is given instead, this method will try to parse the token list into each one
837        of them, stopping at the first for which the parsing succeeds.
838
839        Args:
840            expression_types: the expression type(s) to try and parse the token list into.
841            raw_tokens: the list of tokens.
842            sql: the original SQL string, used to produce helpful debug messages.
843
844        Returns:
845            The target Expression.
846        """
847        errors = []
848        for expression_type in ensure_collection(expression_types):
849            parser = self.EXPRESSION_PARSERS.get(expression_type)
850            if not parser:
851                raise TypeError(f"No parser registered for {expression_type}")
852            try:
853                return self._parse(parser, raw_tokens, sql)
854            except ParseError as e:
855                e.errors[0]["into_expression"] = expression_type
856                errors.append(e)
857        raise ParseError(
858            f"Failed to parse into {expression_types}",
859            errors=merge_errors(errors),
860        ) from errors[-1]

Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.

Arguments:
  • expression_types: the expression type(s) to try and parse the token list into.
  • raw_tokens: the list of tokens.
  • sql: the original SQL string, used to produce helpful debug messages.
Returns:

The target Expression.

def check_errors(self) -> None:
896    def check_errors(self) -> None:
897        """
898        Logs or raises any found errors, depending on the chosen error level setting.
899        """
900        if self.error_level == ErrorLevel.WARN:
901            for error in self.errors:
902                logger.error(str(error))
903        elif self.error_level == ErrorLevel.RAISE and self.errors:
904            raise ParseError(
905                concat_messages(self.errors, self.max_errors),
906                errors=merge_errors(self.errors),
907            )

Logs or raises any found errors, depending on the chosen error level setting.

def raise_error(self, message: str, token: Optional[sqlglot.tokens.Token] = None) -> None:
909    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
910        """
911        Appends an error in the list of recorded errors or raises it, depending on the chosen
912        error level setting.
913        """
914        token = token or self._curr or self._prev or Token.string("")
915        start = token.start
916        end = token.end + 1
917        start_context = self.sql[max(start - self.error_message_context, 0) : start]
918        highlight = self.sql[start:end]
919        end_context = self.sql[end : end + self.error_message_context]
920
921        error = ParseError.new(
922            f"{message}. Line {token.line}, Col: {token.col}.\n"
923            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
924            description=message,
925            line=token.line,
926            col=token.col,
927            start_context=start_context,
928            highlight=highlight,
929            end_context=end_context,
930        )
931
932        if self.error_level == ErrorLevel.IMMEDIATE:
933            raise error
934
935        self.errors.append(error)

Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.

def expression( self, exp_class: Type[~E], comments: Optional[List[str]] = None, **kwargs) -> ~E:
937    def expression(
938        self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs
939    ) -> E:
940        """
941        Creates a new, validated Expression.
942
943        Args:
944            exp_class: the expression class to instantiate.
945            comments: an optional list of comments to attach to the expression.
946            kwargs: the arguments to set for the expression along with their respective values.
947
948        Returns:
949            The target expression.
950        """
951        instance = exp_class(**kwargs)
952        instance.add_comments(comments) if comments else self._add_comments(instance)
953        self.validate_expression(instance)
954        return instance

Creates a new, validated Expression.

Arguments:
  • exp_class: the expression class to instantiate.
  • comments: an optional list of comments to attach to the expression.
  • kwargs: the arguments to set for the expression along with their respective values.
Returns:

The target expression.

def validate_expression( self, expression: sqlglot.expressions.Expression, args: Optional[List] = None) -> None:
961    def validate_expression(
962        self, expression: exp.Expression, args: t.Optional[t.List] = None
963    ) -> None:
964        """
965        Validates an already instantiated expression, making sure that all its mandatory arguments
966        are set.
967
968        Args:
969            expression: the expression to validate.
970            args: an optional list of items that was used to instantiate the expression, if it's a Func.
971        """
972        if self.error_level == ErrorLevel.IGNORE:
973            return
974
975        for error_message in expression.error_messages(args):
976            self.raise_error(error_message)

Validates an already instantiated expression, making sure that all its mandatory arguments are set.

Arguments:
  • expression: the expression to validate.
  • args: an optional list of items that was used to instantiate the expression, if it's a Func.