Edit on GitHub

sqlglot.parser

   1from __future__ import annotations
   2
   3import logging
   4import typing as t
   5from collections import defaultdict
   6
   7from sqlglot import exp
   8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors
   9from sqlglot.helper import (
  10    apply_index_offset,
  11    count_params,
  12    ensure_collection,
  13    ensure_list,
  14    seq_get,
  15)
  16from sqlglot.tokens import Token, Tokenizer, TokenType
  17from sqlglot.trie import in_trie, new_trie
  18
  19logger = logging.getLogger("sqlglot")
  20
  21
  22def parse_var_map(args):
  23    keys = []
  24    values = []
  25    for i in range(0, len(args), 2):
  26        keys.append(args[i])
  27        values.append(args[i + 1])
  28    return exp.VarMap(
  29        keys=exp.Array(expressions=keys),
  30        values=exp.Array(expressions=values),
  31    )
  32
  33
  34def binary_range_parser(
  35    expr_type: t.Type[exp.Expression],
  36) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]:
  37    return lambda self, this: self._parse_escape(
  38        self.expression(expr_type, this=this, expression=self._parse_bitwise())
  39    )
  40
  41
  42class _Parser(type):
  43    def __new__(cls, clsname, bases, attrs):
  44        klass = super().__new__(cls, clsname, bases, attrs)
  45        klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS)
  46        klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS)
  47
  48        return klass
  49
  50
  51class Parser(metaclass=_Parser):
  52    """
  53    Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces
  54    a parsed syntax tree.
  55
  56    Args:
  57        error_level: the desired error level.
  58            Default: ErrorLevel.RAISE
  59        error_message_context: determines the amount of context to capture from a
  60            query string when displaying the error message (in number of characters).
  61            Default: 50.
  62        index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list.
  63            Default: 0
  64        alias_post_tablesample: If the table alias comes after tablesample.
  65            Default: False
  66        max_errors: Maximum number of error messages to include in a raised ParseError.
  67            This is only relevant if error_level is ErrorLevel.RAISE.
  68            Default: 3
  69        null_ordering: Indicates the default null ordering method to use if not explicitly set.
  70            Options are "nulls_are_small", "nulls_are_large", "nulls_are_last".
  71            Default: "nulls_are_small"
  72    """
  73
  74    FUNCTIONS: t.Dict[str, t.Callable] = {
  75        **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()},
  76        "DATE_TO_DATE_STR": lambda args: exp.Cast(
  77            this=seq_get(args, 0),
  78            to=exp.DataType(this=exp.DataType.Type.TEXT),
  79        ),
  80        "TIME_TO_TIME_STR": lambda args: exp.Cast(
  81            this=seq_get(args, 0),
  82            to=exp.DataType(this=exp.DataType.Type.TEXT),
  83        ),
  84        "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
  85            this=exp.Cast(
  86                this=seq_get(args, 0),
  87                to=exp.DataType(this=exp.DataType.Type.TEXT),
  88            ),
  89            start=exp.Literal.number(1),
  90            length=exp.Literal.number(10),
  91        ),
  92        "VAR_MAP": parse_var_map,
  93        "IFNULL": exp.Coalesce.from_arg_list,
  94    }
  95
  96    NO_PAREN_FUNCTIONS = {
  97        TokenType.CURRENT_DATE: exp.CurrentDate,
  98        TokenType.CURRENT_DATETIME: exp.CurrentDate,
  99        TokenType.CURRENT_TIME: exp.CurrentTime,
 100        TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
 101    }
 102
 103    NESTED_TYPE_TOKENS = {
 104        TokenType.ARRAY,
 105        TokenType.MAP,
 106        TokenType.STRUCT,
 107        TokenType.NULLABLE,
 108    }
 109
 110    TYPE_TOKENS = {
 111        TokenType.BIT,
 112        TokenType.BOOLEAN,
 113        TokenType.TINYINT,
 114        TokenType.SMALLINT,
 115        TokenType.INT,
 116        TokenType.BIGINT,
 117        TokenType.FLOAT,
 118        TokenType.DOUBLE,
 119        TokenType.CHAR,
 120        TokenType.NCHAR,
 121        TokenType.VARCHAR,
 122        TokenType.NVARCHAR,
 123        TokenType.TEXT,
 124        TokenType.MEDIUMTEXT,
 125        TokenType.LONGTEXT,
 126        TokenType.MEDIUMBLOB,
 127        TokenType.LONGBLOB,
 128        TokenType.BINARY,
 129        TokenType.VARBINARY,
 130        TokenType.JSON,
 131        TokenType.JSONB,
 132        TokenType.INTERVAL,
 133        TokenType.TIME,
 134        TokenType.TIMESTAMP,
 135        TokenType.TIMESTAMPTZ,
 136        TokenType.TIMESTAMPLTZ,
 137        TokenType.DATETIME,
 138        TokenType.DATE,
 139        TokenType.DECIMAL,
 140        TokenType.UUID,
 141        TokenType.GEOGRAPHY,
 142        TokenType.GEOMETRY,
 143        TokenType.HLLSKETCH,
 144        TokenType.HSTORE,
 145        TokenType.PSEUDO_TYPE,
 146        TokenType.SUPER,
 147        TokenType.SERIAL,
 148        TokenType.SMALLSERIAL,
 149        TokenType.BIGSERIAL,
 150        TokenType.XML,
 151        TokenType.UNIQUEIDENTIFIER,
 152        TokenType.MONEY,
 153        TokenType.SMALLMONEY,
 154        TokenType.ROWVERSION,
 155        TokenType.IMAGE,
 156        TokenType.VARIANT,
 157        TokenType.OBJECT,
 158        TokenType.INET,
 159        *NESTED_TYPE_TOKENS,
 160    }
 161
 162    SUBQUERY_PREDICATES = {
 163        TokenType.ANY: exp.Any,
 164        TokenType.ALL: exp.All,
 165        TokenType.EXISTS: exp.Exists,
 166        TokenType.SOME: exp.Any,
 167    }
 168
 169    RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT}
 170
 171    DB_CREATABLES = {
 172        TokenType.DATABASE,
 173        TokenType.SCHEMA,
 174        TokenType.TABLE,
 175        TokenType.VIEW,
 176    }
 177
 178    CREATABLES = {
 179        TokenType.COLUMN,
 180        TokenType.FUNCTION,
 181        TokenType.INDEX,
 182        TokenType.PROCEDURE,
 183        *DB_CREATABLES,
 184    }
 185
 186    ID_VAR_TOKENS = {
 187        TokenType.VAR,
 188        TokenType.ANTI,
 189        TokenType.APPLY,
 190        TokenType.AUTO_INCREMENT,
 191        TokenType.BEGIN,
 192        TokenType.BOTH,
 193        TokenType.BUCKET,
 194        TokenType.CACHE,
 195        TokenType.CASCADE,
 196        TokenType.COLLATE,
 197        TokenType.COMMAND,
 198        TokenType.COMMENT,
 199        TokenType.COMMIT,
 200        TokenType.COMPOUND,
 201        TokenType.CONSTRAINT,
 202        TokenType.DEFAULT,
 203        TokenType.DELETE,
 204        TokenType.DESCRIBE,
 205        TokenType.DIV,
 206        TokenType.END,
 207        TokenType.EXECUTE,
 208        TokenType.ESCAPE,
 209        TokenType.FALSE,
 210        TokenType.FIRST,
 211        TokenType.FILTER,
 212        TokenType.FOLLOWING,
 213        TokenType.FORMAT,
 214        TokenType.IF,
 215        TokenType.ISNULL,
 216        TokenType.INTERVAL,
 217        TokenType.LAZY,
 218        TokenType.LEADING,
 219        TokenType.LEFT,
 220        TokenType.LOCAL,
 221        TokenType.MATERIALIZED,
 222        TokenType.MERGE,
 223        TokenType.NATURAL,
 224        TokenType.NEXT,
 225        TokenType.OFFSET,
 226        TokenType.ONLY,
 227        TokenType.OPTIONS,
 228        TokenType.ORDINALITY,
 229        TokenType.PERCENT,
 230        TokenType.PIVOT,
 231        TokenType.PRECEDING,
 232        TokenType.RANGE,
 233        TokenType.REFERENCES,
 234        TokenType.RIGHT,
 235        TokenType.ROW,
 236        TokenType.ROWS,
 237        TokenType.SEED,
 238        TokenType.SEMI,
 239        TokenType.SET,
 240        TokenType.SHOW,
 241        TokenType.SORTKEY,
 242        TokenType.TEMPORARY,
 243        TokenType.TOP,
 244        TokenType.TRAILING,
 245        TokenType.TRUE,
 246        TokenType.UNBOUNDED,
 247        TokenType.UNIQUE,
 248        TokenType.UNLOGGED,
 249        TokenType.UNPIVOT,
 250        TokenType.VOLATILE,
 251        TokenType.WINDOW,
 252        *CREATABLES,
 253        *SUBQUERY_PREDICATES,
 254        *TYPE_TOKENS,
 255        *NO_PAREN_FUNCTIONS,
 256    }
 257
 258    TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {
 259        TokenType.APPLY,
 260        TokenType.LEFT,
 261        TokenType.NATURAL,
 262        TokenType.OFFSET,
 263        TokenType.RIGHT,
 264        TokenType.WINDOW,
 265    }
 266
 267    UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
 268
 269    TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH}
 270
 271    FUNC_TOKENS = {
 272        TokenType.COMMAND,
 273        TokenType.CURRENT_DATE,
 274        TokenType.CURRENT_DATETIME,
 275        TokenType.CURRENT_TIMESTAMP,
 276        TokenType.CURRENT_TIME,
 277        TokenType.FILTER,
 278        TokenType.FIRST,
 279        TokenType.FORMAT,
 280        TokenType.IDENTIFIER,
 281        TokenType.INDEX,
 282        TokenType.ISNULL,
 283        TokenType.ILIKE,
 284        TokenType.LIKE,
 285        TokenType.MERGE,
 286        TokenType.OFFSET,
 287        TokenType.PRIMARY_KEY,
 288        TokenType.REPLACE,
 289        TokenType.ROW,
 290        TokenType.UNNEST,
 291        TokenType.VAR,
 292        TokenType.LEFT,
 293        TokenType.RIGHT,
 294        TokenType.DATE,
 295        TokenType.DATETIME,
 296        TokenType.TABLE,
 297        TokenType.TIMESTAMP,
 298        TokenType.TIMESTAMPTZ,
 299        TokenType.WINDOW,
 300        *TYPE_TOKENS,
 301        *SUBQUERY_PREDICATES,
 302    }
 303
 304    CONJUNCTION = {
 305        TokenType.AND: exp.And,
 306        TokenType.OR: exp.Or,
 307    }
 308
 309    EQUALITY = {
 310        TokenType.EQ: exp.EQ,
 311        TokenType.NEQ: exp.NEQ,
 312        TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
 313    }
 314
 315    COMPARISON = {
 316        TokenType.GT: exp.GT,
 317        TokenType.GTE: exp.GTE,
 318        TokenType.LT: exp.LT,
 319        TokenType.LTE: exp.LTE,
 320    }
 321
 322    BITWISE = {
 323        TokenType.AMP: exp.BitwiseAnd,
 324        TokenType.CARET: exp.BitwiseXor,
 325        TokenType.PIPE: exp.BitwiseOr,
 326        TokenType.DPIPE: exp.DPipe,
 327    }
 328
 329    TERM = {
 330        TokenType.DASH: exp.Sub,
 331        TokenType.PLUS: exp.Add,
 332        TokenType.MOD: exp.Mod,
 333        TokenType.COLLATE: exp.Collate,
 334    }
 335
 336    FACTOR = {
 337        TokenType.DIV: exp.IntDiv,
 338        TokenType.LR_ARROW: exp.Distance,
 339        TokenType.SLASH: exp.Div,
 340        TokenType.STAR: exp.Mul,
 341    }
 342
 343    TIMESTAMPS = {
 344        TokenType.TIME,
 345        TokenType.TIMESTAMP,
 346        TokenType.TIMESTAMPTZ,
 347        TokenType.TIMESTAMPLTZ,
 348    }
 349
 350    SET_OPERATIONS = {
 351        TokenType.UNION,
 352        TokenType.INTERSECT,
 353        TokenType.EXCEPT,
 354    }
 355
 356    JOIN_SIDES = {
 357        TokenType.LEFT,
 358        TokenType.RIGHT,
 359        TokenType.FULL,
 360    }
 361
 362    JOIN_KINDS = {
 363        TokenType.INNER,
 364        TokenType.OUTER,
 365        TokenType.CROSS,
 366        TokenType.SEMI,
 367        TokenType.ANTI,
 368    }
 369
 370    LAMBDAS = {
 371        TokenType.ARROW: lambda self, expressions: self.expression(
 372            exp.Lambda,
 373            this=self._parse_conjunction().transform(
 374                self._replace_lambda, {node.name for node in expressions}
 375            ),
 376            expressions=expressions,
 377        ),
 378        TokenType.FARROW: lambda self, expressions: self.expression(
 379            exp.Kwarg,
 380            this=exp.Var(this=expressions[0].name),
 381            expression=self._parse_conjunction(),
 382        ),
 383    }
 384
 385    COLUMN_OPERATORS = {
 386        TokenType.DOT: None,
 387        TokenType.DCOLON: lambda self, this, to: self.expression(
 388            exp.Cast,
 389            this=this,
 390            to=to,
 391        ),
 392        TokenType.ARROW: lambda self, this, path: self.expression(
 393            exp.JSONExtract,
 394            this=this,
 395            expression=path,
 396        ),
 397        TokenType.DARROW: lambda self, this, path: self.expression(
 398            exp.JSONExtractScalar,
 399            this=this,
 400            expression=path,
 401        ),
 402        TokenType.HASH_ARROW: lambda self, this, path: self.expression(
 403            exp.JSONBExtract,
 404            this=this,
 405            expression=path,
 406        ),
 407        TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
 408            exp.JSONBExtractScalar,
 409            this=this,
 410            expression=path,
 411        ),
 412        TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
 413            exp.JSONBContains,
 414            this=this,
 415            expression=key,
 416        ),
 417    }
 418
 419    EXPRESSION_PARSERS = {
 420        exp.Column: lambda self: self._parse_column(),
 421        exp.DataType: lambda self: self._parse_types(),
 422        exp.From: lambda self: self._parse_from(),
 423        exp.Group: lambda self: self._parse_group(),
 424        exp.Identifier: lambda self: self._parse_id_var(),
 425        exp.Lateral: lambda self: self._parse_lateral(),
 426        exp.Join: lambda self: self._parse_join(),
 427        exp.Order: lambda self: self._parse_order(),
 428        exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
 429        exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
 430        exp.Lambda: lambda self: self._parse_lambda(),
 431        exp.Limit: lambda self: self._parse_limit(),
 432        exp.Offset: lambda self: self._parse_offset(),
 433        exp.TableAlias: lambda self: self._parse_table_alias(),
 434        exp.Table: lambda self: self._parse_table(),
 435        exp.Condition: lambda self: self._parse_conjunction(),
 436        exp.Expression: lambda self: self._parse_statement(),
 437        exp.Properties: lambda self: self._parse_properties(),
 438        exp.Where: lambda self: self._parse_where(),
 439        exp.Ordered: lambda self: self._parse_ordered(),
 440        exp.Having: lambda self: self._parse_having(),
 441        exp.With: lambda self: self._parse_with(),
 442        exp.Window: lambda self: self._parse_named_window(),
 443        exp.Qualify: lambda self: self._parse_qualify(),
 444        exp.Returning: lambda self: self._parse_returning(),
 445        "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(),
 446    }
 447
 448    STATEMENT_PARSERS = {
 449        TokenType.ALTER: lambda self: self._parse_alter(),
 450        TokenType.BEGIN: lambda self: self._parse_transaction(),
 451        TokenType.CACHE: lambda self: self._parse_cache(),
 452        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
 453        TokenType.COMMENT: lambda self: self._parse_comment(),
 454        TokenType.CREATE: lambda self: self._parse_create(),
 455        TokenType.DELETE: lambda self: self._parse_delete(),
 456        TokenType.DESC: lambda self: self._parse_describe(),
 457        TokenType.DESCRIBE: lambda self: self._parse_describe(),
 458        TokenType.DROP: lambda self: self._parse_drop(),
 459        TokenType.END: lambda self: self._parse_commit_or_rollback(),
 460        TokenType.INSERT: lambda self: self._parse_insert(),
 461        TokenType.LOAD_DATA: lambda self: self._parse_load_data(),
 462        TokenType.MERGE: lambda self: self._parse_merge(),
 463        TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
 464        TokenType.SET: lambda self: self._parse_set(),
 465        TokenType.UNCACHE: lambda self: self._parse_uncache(),
 466        TokenType.UPDATE: lambda self: self._parse_update(),
 467        TokenType.USE: lambda self: self.expression(
 468            exp.Use,
 469            kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"))
 470            and exp.Var(this=self._prev.text),
 471            this=self._parse_table(schema=False),
 472        ),
 473    }
 474
 475    UNARY_PARSERS = {
 476        TokenType.PLUS: lambda self: self._parse_unary(),  # Unary + is handled as a no-op
 477        TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()),
 478        TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()),
 479        TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()),
 480    }
 481
 482    PRIMARY_PARSERS = {
 483        TokenType.STRING: lambda self, token: self.expression(
 484            exp.Literal, this=token.text, is_string=True
 485        ),
 486        TokenType.NUMBER: lambda self, token: self.expression(
 487            exp.Literal, this=token.text, is_string=False
 488        ),
 489        TokenType.STAR: lambda self, _: self.expression(
 490            exp.Star,
 491            **{"except": self._parse_except(), "replace": self._parse_replace()},
 492        ),
 493        TokenType.NULL: lambda self, _: self.expression(exp.Null),
 494        TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True),
 495        TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False),
 496        TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text),
 497        TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text),
 498        TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text),
 499        TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
 500        TokenType.NATIONAL: lambda self, token: self._parse_national(token),
 501        TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
 502    }
 503
 504    PLACEHOLDER_PARSERS = {
 505        TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
 506        TokenType.PARAMETER: lambda self: self._parse_parameter(),
 507        TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text)
 508        if self._match_set((TokenType.NUMBER, TokenType.VAR))
 509        else None,
 510    }
 511
 512    RANGE_PARSERS = {
 513        TokenType.BETWEEN: lambda self, this: self._parse_between(this),
 514        TokenType.GLOB: binary_range_parser(exp.Glob),
 515        TokenType.OVERLAPS: binary_range_parser(exp.Overlaps),
 516        TokenType.IN: lambda self, this: self._parse_in(this),
 517        TokenType.IS: lambda self, this: self._parse_is(this),
 518        TokenType.LIKE: binary_range_parser(exp.Like),
 519        TokenType.ILIKE: binary_range_parser(exp.ILike),
 520        TokenType.IRLIKE: binary_range_parser(exp.RegexpILike),
 521        TokenType.RLIKE: binary_range_parser(exp.RegexpLike),
 522        TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo),
 523    }
 524
 525    PROPERTY_PARSERS = {
 526        "AFTER": lambda self: self._parse_afterjournal(
 527            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 528        ),
 529        "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
 530        "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
 531        "BEFORE": lambda self: self._parse_journal(
 532            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 533        ),
 534        "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
 535        "CHARACTER SET": lambda self: self._parse_character_set(),
 536        "CHECKSUM": lambda self: self._parse_checksum(),
 537        "CLUSTER BY": lambda self: self.expression(
 538            exp.Cluster, expressions=self._parse_csv(self._parse_ordered)
 539        ),
 540        "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty),
 541        "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
 542        "DATABLOCKSIZE": lambda self: self._parse_datablocksize(
 543            default=self._prev.text.upper() == "DEFAULT"
 544        ),
 545        "DEFINER": lambda self: self._parse_definer(),
 546        "DETERMINISTIC": lambda self: self.expression(
 547            exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE")
 548        ),
 549        "DISTKEY": lambda self: self._parse_distkey(),
 550        "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
 551        "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
 552        "EXTERNAL": lambda self: self.expression(exp.ExternalProperty),
 553        "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"),
 554        "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 555        "FREESPACE": lambda self: self._parse_freespace(),
 556        "GLOBAL": lambda self: self._parse_temporary(global_=True),
 557        "IMMUTABLE": lambda self: self.expression(
 558            exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE")
 559        ),
 560        "JOURNAL": lambda self: self._parse_journal(
 561            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 562        ),
 563        "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
 564        "LIKE": lambda self: self._parse_create_like(),
 565        "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True),
 566        "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
 567        "LOCK": lambda self: self._parse_locking(),
 568        "LOCKING": lambda self: self._parse_locking(),
 569        "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"),
 570        "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty),
 571        "MAX": lambda self: self._parse_datablocksize(),
 572        "MAXIMUM": lambda self: self._parse_datablocksize(),
 573        "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio(
 574            no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT"
 575        ),
 576        "MIN": lambda self: self._parse_datablocksize(),
 577        "MINIMUM": lambda self: self._parse_datablocksize(),
 578        "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True),
 579        "NO": lambda self: self._parse_noprimaryindex(),
 580        "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False),
 581        "ON": lambda self: self._parse_oncommit(),
 582        "PARTITION BY": lambda self: self._parse_partitioned_by(),
 583        "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
 584        "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
 585        "RETURNS": lambda self: self._parse_returns(),
 586        "ROW": lambda self: self._parse_row(),
 587        "SET": lambda self: self.expression(exp.SetProperty, multi=False),
 588        "SORTKEY": lambda self: self._parse_sortkey(),
 589        "STABLE": lambda self: self.expression(
 590            exp.VolatilityProperty, this=exp.Literal.string("STABLE")
 591        ),
 592        "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 593        "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty),
 594        "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property),
 595        "TEMPORARY": lambda self: self._parse_temporary(global_=False),
 596        "TRANSIENT": lambda self: self.expression(exp.TransientProperty),
 597        "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty),
 598        "VOLATILE": lambda self: self.expression(
 599            exp.VolatilityProperty, this=exp.Literal.string("VOLATILE")
 600        ),
 601        "WITH": lambda self: self._parse_with_property(),
 602    }
 603
 604    CONSTRAINT_PARSERS = {
 605        "AUTOINCREMENT": lambda self: self._parse_auto_increment(),
 606        "AUTO_INCREMENT": lambda self: self._parse_auto_increment(),
 607        "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False),
 608        "CHARACTER SET": lambda self: self.expression(
 609            exp.CharacterSetColumnConstraint, this=self._parse_var_or_string()
 610        ),
 611        "CHECK": lambda self: self.expression(
 612            exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction)
 613        ),
 614        "COLLATE": lambda self: self.expression(
 615            exp.CollateColumnConstraint, this=self._parse_var()
 616        ),
 617        "COMMENT": lambda self: self.expression(
 618            exp.CommentColumnConstraint, this=self._parse_string()
 619        ),
 620        "COMPRESS": lambda self: self._parse_compress(),
 621        "DEFAULT": lambda self: self.expression(
 622            exp.DefaultColumnConstraint, this=self._parse_bitwise()
 623        ),
 624        "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()),
 625        "FOREIGN KEY": lambda self: self._parse_foreign_key(),
 626        "FORMAT": lambda self: self.expression(
 627            exp.DateFormatColumnConstraint, this=self._parse_var_or_string()
 628        ),
 629        "GENERATED": lambda self: self._parse_generated_as_identity(),
 630        "IDENTITY": lambda self: self._parse_auto_increment(),
 631        "INLINE": lambda self: self._parse_inline(),
 632        "LIKE": lambda self: self._parse_create_like(),
 633        "NOT": lambda self: self._parse_not_constraint(),
 634        "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True),
 635        "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()),
 636        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 637        "TITLE": lambda self: self.expression(
 638            exp.TitleColumnConstraint, this=self._parse_var_or_string()
 639        ),
 640        "UNIQUE": lambda self: self._parse_unique(),
 641        "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint),
 642    }
 643
 644    ALTER_PARSERS = {
 645        "ADD": lambda self: self._parse_alter_table_add(),
 646        "ALTER": lambda self: self._parse_alter_table_alter(),
 647        "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()),
 648        "DROP": lambda self: self._parse_alter_table_drop(),
 649        "RENAME": lambda self: self._parse_alter_table_rename(),
 650    }
 651
 652    SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"}
 653
 654    NO_PAREN_FUNCTION_PARSERS = {
 655        TokenType.CASE: lambda self: self._parse_case(),
 656        TokenType.IF: lambda self: self._parse_if(),
 657        TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()),
 658    }
 659
 660    FUNCTION_PARSERS: t.Dict[str, t.Callable] = {
 661        "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
 662        "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
 663        "EXTRACT": lambda self: self._parse_extract(),
 664        "POSITION": lambda self: self._parse_position(),
 665        "STRING_AGG": lambda self: self._parse_string_agg(),
 666        "SUBSTRING": lambda self: self._parse_substring(),
 667        "TRIM": lambda self: self._parse_trim(),
 668        "TRY_CAST": lambda self: self._parse_cast(False),
 669        "TRY_CONVERT": lambda self: self._parse_convert(False),
 670    }
 671
 672    QUERY_MODIFIER_PARSERS = {
 673        "match": lambda self: self._parse_match_recognize(),
 674        "where": lambda self: self._parse_where(),
 675        "group": lambda self: self._parse_group(),
 676        "having": lambda self: self._parse_having(),
 677        "qualify": lambda self: self._parse_qualify(),
 678        "windows": lambda self: self._parse_window_clause(),
 679        "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute),
 680        "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
 681        "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
 682        "order": lambda self: self._parse_order(),
 683        "limit": lambda self: self._parse_limit(),
 684        "offset": lambda self: self._parse_offset(),
 685        "lock": lambda self: self._parse_lock(),
 686        "sample": lambda self: self._parse_table_sample(as_modifier=True),
 687    }
 688
 689    SET_PARSERS = {
 690        "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"),
 691        "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"),
 692        "SESSION": lambda self: self._parse_set_item_assignment("SESSION"),
 693        "TRANSACTION": lambda self: self._parse_set_transaction(),
 694    }
 695
 696    SHOW_PARSERS: t.Dict[str, t.Callable] = {}
 697
 698    MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
 699
 700    TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
 701
 702    TRANSACTION_CHARACTERISTICS = {
 703        "ISOLATION LEVEL REPEATABLE READ",
 704        "ISOLATION LEVEL READ COMMITTED",
 705        "ISOLATION LEVEL READ UNCOMMITTED",
 706        "ISOLATION LEVEL SERIALIZABLE",
 707        "READ WRITE",
 708        "READ ONLY",
 709    }
 710
 711    INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
 712
 713    WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
 714
 715    ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY}
 716
 717    STRICT_CAST = True
 718
 719    CONVERT_TYPE_FIRST = False
 720
 721    __slots__ = (
 722        "error_level",
 723        "error_message_context",
 724        "sql",
 725        "errors",
 726        "index_offset",
 727        "unnest_column_only",
 728        "alias_post_tablesample",
 729        "max_errors",
 730        "null_ordering",
 731        "_tokens",
 732        "_index",
 733        "_curr",
 734        "_next",
 735        "_prev",
 736        "_prev_comments",
 737        "_show_trie",
 738        "_set_trie",
 739    )
 740
 741    def __init__(
 742        self,
 743        error_level: t.Optional[ErrorLevel] = None,
 744        error_message_context: int = 100,
 745        index_offset: int = 0,
 746        unnest_column_only: bool = False,
 747        alias_post_tablesample: bool = False,
 748        max_errors: int = 3,
 749        null_ordering: t.Optional[str] = None,
 750    ):
 751        self.error_level = error_level or ErrorLevel.IMMEDIATE
 752        self.error_message_context = error_message_context
 753        self.index_offset = index_offset
 754        self.unnest_column_only = unnest_column_only
 755        self.alias_post_tablesample = alias_post_tablesample
 756        self.max_errors = max_errors
 757        self.null_ordering = null_ordering
 758        self.reset()
 759
 760    def reset(self):
 761        self.sql = ""
 762        self.errors = []
 763        self._tokens = []
 764        self._index = 0
 765        self._curr = None
 766        self._next = None
 767        self._prev = None
 768        self._prev_comments = None
 769
 770    def parse(
 771        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
 772    ) -> t.List[t.Optional[exp.Expression]]:
 773        """
 774        Parses a list of tokens and returns a list of syntax trees, one tree
 775        per parsed SQL statement.
 776
 777        Args:
 778            raw_tokens: the list of tokens.
 779            sql: the original SQL string, used to produce helpful debug messages.
 780
 781        Returns:
 782            The list of syntax trees.
 783        """
 784        return self._parse(
 785            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
 786        )
 787
 788    def parse_into(
 789        self,
 790        expression_types: exp.IntoType,
 791        raw_tokens: t.List[Token],
 792        sql: t.Optional[str] = None,
 793    ) -> t.List[t.Optional[exp.Expression]]:
 794        """
 795        Parses a list of tokens into a given Expression type. If a collection of Expression
 796        types is given instead, this method will try to parse the token list into each one
 797        of them, stopping at the first for which the parsing succeeds.
 798
 799        Args:
 800            expression_types: the expression type(s) to try and parse the token list into.
 801            raw_tokens: the list of tokens.
 802            sql: the original SQL string, used to produce helpful debug messages.
 803
 804        Returns:
 805            The target Expression.
 806        """
 807        errors = []
 808        for expression_type in ensure_collection(expression_types):
 809            parser = self.EXPRESSION_PARSERS.get(expression_type)
 810            if not parser:
 811                raise TypeError(f"No parser registered for {expression_type}")
 812            try:
 813                return self._parse(parser, raw_tokens, sql)
 814            except ParseError as e:
 815                e.errors[0]["into_expression"] = expression_type
 816                errors.append(e)
 817        raise ParseError(
 818            f"Failed to parse into {expression_types}",
 819            errors=merge_errors(errors),
 820        ) from errors[-1]
 821
 822    def _parse(
 823        self,
 824        parse_method: t.Callable[[Parser], t.Optional[exp.Expression]],
 825        raw_tokens: t.List[Token],
 826        sql: t.Optional[str] = None,
 827    ) -> t.List[t.Optional[exp.Expression]]:
 828        self.reset()
 829        self.sql = sql or ""
 830        total = len(raw_tokens)
 831        chunks: t.List[t.List[Token]] = [[]]
 832
 833        for i, token in enumerate(raw_tokens):
 834            if token.token_type == TokenType.SEMICOLON:
 835                if i < total - 1:
 836                    chunks.append([])
 837            else:
 838                chunks[-1].append(token)
 839
 840        expressions = []
 841
 842        for tokens in chunks:
 843            self._index = -1
 844            self._tokens = tokens
 845            self._advance()
 846
 847            expressions.append(parse_method(self))
 848
 849            if self._index < len(self._tokens):
 850                self.raise_error("Invalid expression / Unexpected token")
 851
 852            self.check_errors()
 853
 854        return expressions
 855
 856    def check_errors(self) -> None:
 857        """
 858        Logs or raises any found errors, depending on the chosen error level setting.
 859        """
 860        if self.error_level == ErrorLevel.WARN:
 861            for error in self.errors:
 862                logger.error(str(error))
 863        elif self.error_level == ErrorLevel.RAISE and self.errors:
 864            raise ParseError(
 865                concat_messages(self.errors, self.max_errors),
 866                errors=merge_errors(self.errors),
 867            )
 868
 869    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
 870        """
 871        Appends an error in the list of recorded errors or raises it, depending on the chosen
 872        error level setting.
 873        """
 874        token = token or self._curr or self._prev or Token.string("")
 875        start = self._find_token(token)
 876        end = start + len(token.text)
 877        start_context = self.sql[max(start - self.error_message_context, 0) : start]
 878        highlight = self.sql[start:end]
 879        end_context = self.sql[end : end + self.error_message_context]
 880
 881        error = ParseError.new(
 882            f"{message}. Line {token.line}, Col: {token.col}.\n"
 883            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
 884            description=message,
 885            line=token.line,
 886            col=token.col,
 887            start_context=start_context,
 888            highlight=highlight,
 889            end_context=end_context,
 890        )
 891
 892        if self.error_level == ErrorLevel.IMMEDIATE:
 893            raise error
 894
 895        self.errors.append(error)
 896
 897    def expression(
 898        self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs
 899    ) -> exp.Expression:
 900        """
 901        Creates a new, validated Expression.
 902
 903        Args:
 904            exp_class: the expression class to instantiate.
 905            comments: an optional list of comments to attach to the expression.
 906            kwargs: the arguments to set for the expression along with their respective values.
 907
 908        Returns:
 909            The target expression.
 910        """
 911        instance = exp_class(**kwargs)
 912        if self._prev_comments:
 913            instance.comments = self._prev_comments
 914            self._prev_comments = None
 915        if comments:
 916            instance.comments = comments
 917        self.validate_expression(instance)
 918        return instance
 919
 920    def validate_expression(
 921        self, expression: exp.Expression, args: t.Optional[t.List] = None
 922    ) -> None:
 923        """
 924        Validates an already instantiated expression, making sure that all its mandatory arguments
 925        are set.
 926
 927        Args:
 928            expression: the expression to validate.
 929            args: an optional list of items that was used to instantiate the expression, if it's a Func.
 930        """
 931        if self.error_level == ErrorLevel.IGNORE:
 932            return
 933
 934        for error_message in expression.error_messages(args):
 935            self.raise_error(error_message)
 936
 937    def _find_sql(self, start: Token, end: Token) -> str:
 938        return self.sql[self._find_token(start) : self._find_token(end) + len(end.text)]
 939
 940    def _find_token(self, token: Token) -> int:
 941        line = 1
 942        col = 1
 943        index = 0
 944
 945        while line < token.line or col < token.col:
 946            if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK:
 947                line += 1
 948                col = 1
 949            else:
 950                col += 1
 951            index += 1
 952
 953        return index
 954
 955    def _advance(self, times: int = 1) -> None:
 956        self._index += times
 957        self._curr = seq_get(self._tokens, self._index)
 958        self._next = seq_get(self._tokens, self._index + 1)
 959        if self._index > 0:
 960            self._prev = self._tokens[self._index - 1]
 961            self._prev_comments = self._prev.comments
 962        else:
 963            self._prev = None
 964            self._prev_comments = None
 965
 966    def _retreat(self, index: int) -> None:
 967        if index != self._index:
 968            self._advance(index - self._index)
 969
 970    def _parse_command(self) -> exp.Expression:
 971        return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string())
 972
 973    def _parse_comment(self, allow_exists: bool = True) -> exp.Expression:
 974        start = self._prev
 975        exists = self._parse_exists() if allow_exists else None
 976
 977        self._match(TokenType.ON)
 978
 979        kind = self._match_set(self.CREATABLES) and self._prev
 980
 981        if not kind:
 982            return self._parse_as_command(start)
 983
 984        if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
 985            this = self._parse_user_defined_function(kind=kind.token_type)
 986        elif kind.token_type == TokenType.TABLE:
 987            this = self._parse_table()
 988        elif kind.token_type == TokenType.COLUMN:
 989            this = self._parse_column()
 990        else:
 991            this = self._parse_id_var()
 992
 993        self._match(TokenType.IS)
 994
 995        return self.expression(
 996            exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists
 997        )
 998
 999    def _parse_statement(self) -> t.Optional[exp.Expression]:
1000        if self._curr is None:
1001            return None
1002
1003        if self._match_set(self.STATEMENT_PARSERS):
1004            return self.STATEMENT_PARSERS[self._prev.token_type](self)
1005
1006        if self._match_set(Tokenizer.COMMANDS):
1007            return self._parse_command()
1008
1009        expression = self._parse_expression()
1010        expression = self._parse_set_operations(expression) if expression else self._parse_select()
1011
1012        self._parse_query_modifiers(expression)
1013        return expression
1014
1015    def _parse_drop(self, default_kind: t.Optional[str] = None) -> t.Optional[exp.Expression]:
1016        start = self._prev
1017        temporary = self._match(TokenType.TEMPORARY)
1018        materialized = self._match(TokenType.MATERIALIZED)
1019        kind = self._match_set(self.CREATABLES) and self._prev.text
1020        if not kind:
1021            if default_kind:
1022                kind = default_kind
1023            else:
1024                return self._parse_as_command(start)
1025
1026        return self.expression(
1027            exp.Drop,
1028            exists=self._parse_exists(),
1029            this=self._parse_table(schema=True),
1030            kind=kind,
1031            temporary=temporary,
1032            materialized=materialized,
1033            cascade=self._match(TokenType.CASCADE),
1034        )
1035
1036    def _parse_exists(self, not_: bool = False) -> t.Optional[bool]:
1037        return (
1038            self._match(TokenType.IF)
1039            and (not not_ or self._match(TokenType.NOT))
1040            and self._match(TokenType.EXISTS)
1041        )
1042
1043    def _parse_create(self) -> t.Optional[exp.Expression]:
1044        start = self._prev
1045        replace = self._prev.text.upper() == "REPLACE" or self._match_pair(
1046            TokenType.OR, TokenType.REPLACE
1047        )
1048        unique = self._match(TokenType.UNIQUE)
1049        volatile = self._match(TokenType.VOLATILE)
1050
1051        if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
1052            self._match(TokenType.TABLE)
1053
1054        properties = None
1055        create_token = self._match_set(self.CREATABLES) and self._prev
1056
1057        if not create_token:
1058            properties = self._parse_properties()  # exp.Properties.Location.POST_CREATE
1059            create_token = self._match_set(self.CREATABLES) and self._prev
1060
1061            if not properties or not create_token:
1062                return self._parse_as_command(start)
1063
1064        exists = self._parse_exists(not_=True)
1065        this = None
1066        expression = None
1067        indexes = None
1068        no_schema_binding = None
1069        begin = None
1070
1071        if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1072            this = self._parse_user_defined_function(kind=create_token.token_type)
1073            temp_properties = self._parse_properties()
1074            if properties and temp_properties:
1075                properties.expressions.extend(temp_properties.expressions)
1076            elif temp_properties:
1077                properties = temp_properties
1078
1079            self._match(TokenType.ALIAS)
1080            begin = self._match(TokenType.BEGIN)
1081            return_ = self._match_text_seq("RETURN")
1082            expression = self._parse_statement()
1083
1084            if return_:
1085                expression = self.expression(exp.Return, this=expression)
1086        elif create_token.token_type == TokenType.INDEX:
1087            this = self._parse_index()
1088        elif create_token.token_type in self.DB_CREATABLES:
1089            table_parts = self._parse_table_parts(schema=True)
1090
1091            # exp.Properties.Location.POST_NAME
1092            if self._match(TokenType.COMMA):
1093                temp_properties = self._parse_properties(before=True)
1094                if properties and temp_properties:
1095                    properties.expressions.extend(temp_properties.expressions)
1096                elif temp_properties:
1097                    properties = temp_properties
1098
1099            this = self._parse_schema(this=table_parts)
1100
1101            # exp.Properties.Location.POST_SCHEMA and POST_WITH
1102            temp_properties = self._parse_properties()
1103            if properties and temp_properties:
1104                properties.expressions.extend(temp_properties.expressions)
1105            elif temp_properties:
1106                properties = temp_properties
1107
1108            self._match(TokenType.ALIAS)
1109
1110            # exp.Properties.Location.POST_ALIAS
1111            if not (
1112                self._match(TokenType.SELECT, advance=False)
1113                or self._match(TokenType.WITH, advance=False)
1114                or self._match(TokenType.L_PAREN, advance=False)
1115            ):
1116                temp_properties = self._parse_properties()
1117                if properties and temp_properties:
1118                    properties.expressions.extend(temp_properties.expressions)
1119                elif temp_properties:
1120                    properties = temp_properties
1121
1122            expression = self._parse_ddl_select()
1123
1124            if create_token.token_type == TokenType.TABLE:
1125                # exp.Properties.Location.POST_EXPRESSION
1126                temp_properties = self._parse_properties()
1127                if properties and temp_properties:
1128                    properties.expressions.extend(temp_properties.expressions)
1129                elif temp_properties:
1130                    properties = temp_properties
1131
1132                indexes = []
1133                while True:
1134                    index = self._parse_create_table_index()
1135
1136                    # exp.Properties.Location.POST_INDEX
1137                    if self._match(TokenType.PARTITION_BY, advance=False):
1138                        temp_properties = self._parse_properties()
1139                        if properties and temp_properties:
1140                            properties.expressions.extend(temp_properties.expressions)
1141                        elif temp_properties:
1142                            properties = temp_properties
1143
1144                    if not index:
1145                        break
1146                    else:
1147                        indexes.append(index)
1148            elif create_token.token_type == TokenType.VIEW:
1149                if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
1150                    no_schema_binding = True
1151
1152        return self.expression(
1153            exp.Create,
1154            this=this,
1155            kind=create_token.text,
1156            replace=replace,
1157            unique=unique,
1158            volatile=volatile,
1159            expression=expression,
1160            exists=exists,
1161            properties=properties,
1162            indexes=indexes,
1163            no_schema_binding=no_schema_binding,
1164            begin=begin,
1165        )
1166
1167    def _parse_property_before(self) -> t.Optional[exp.Expression]:
1168        self._match(TokenType.COMMA)
1169
1170        # parsers look to _prev for no/dual/default, so need to consume first
1171        self._match_text_seq("NO")
1172        self._match_text_seq("DUAL")
1173        self._match_text_seq("DEFAULT")
1174
1175        if self.PROPERTY_PARSERS.get(self._curr.text.upper()):
1176            return self.PROPERTY_PARSERS[self._curr.text.upper()](self)
1177
1178        return None
1179
1180    def _parse_property(self) -> t.Optional[exp.Expression]:
1181        if self._match_texts(self.PROPERTY_PARSERS):
1182            return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
1183
1184        if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET):
1185            return self._parse_character_set(default=True)
1186
1187        if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY):
1188            return self._parse_sortkey(compound=True)
1189
1190        if self._match_text_seq("SQL", "SECURITY"):
1191            return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER"))
1192
1193        assignment = self._match_pair(
1194            TokenType.VAR, TokenType.EQ, advance=False
1195        ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False)
1196
1197        if assignment:
1198            key = self._parse_var_or_string()
1199            self._match(TokenType.EQ)
1200            return self.expression(exp.Property, this=key, value=self._parse_column())
1201
1202        return None
1203
1204    def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression:
1205        self._match(TokenType.EQ)
1206        self._match(TokenType.ALIAS)
1207        return self.expression(
1208            exp_class,
1209            this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
1210        )
1211
1212    def _parse_properties(self, before=None) -> t.Optional[exp.Expression]:
1213        properties = []
1214
1215        while True:
1216            if before:
1217                identified_property = self._parse_property_before()
1218            else:
1219                identified_property = self._parse_property()
1220
1221            if not identified_property:
1222                break
1223            for p in ensure_collection(identified_property):
1224                properties.append(p)
1225
1226        if properties:
1227            return self.expression(exp.Properties, expressions=properties)
1228
1229        return None
1230
1231    def _parse_fallback(self, no=False) -> exp.Expression:
1232        self._match_text_seq("FALLBACK")
1233        return self.expression(
1234            exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
1235        )
1236
1237    def _parse_with_property(
1238        self,
1239    ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]:
1240        self._match(TokenType.WITH)
1241        if self._match(TokenType.L_PAREN, advance=False):
1242            return self._parse_wrapped_csv(self._parse_property)
1243
1244        if self._match_text_seq("JOURNAL"):
1245            return self._parse_withjournaltable()
1246
1247        if self._match_text_seq("DATA"):
1248            return self._parse_withdata(no=False)
1249        elif self._match_text_seq("NO", "DATA"):
1250            return self._parse_withdata(no=True)
1251
1252        if not self._next:
1253            return None
1254
1255        return self._parse_withisolatedloading()
1256
1257    # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
1258    def _parse_definer(self) -> t.Optional[exp.Expression]:
1259        self._match(TokenType.EQ)
1260
1261        user = self._parse_id_var()
1262        self._match(TokenType.PARAMETER)
1263        host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
1264
1265        if not user or not host:
1266            return None
1267
1268        return exp.DefinerProperty(this=f"{user}@{host}")
1269
1270    def _parse_withjournaltable(self) -> exp.Expression:
1271        self._match(TokenType.TABLE)
1272        self._match(TokenType.EQ)
1273        return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
1274
1275    def _parse_log(self, no=False) -> exp.Expression:
1276        self._match_text_seq("LOG")
1277        return self.expression(exp.LogProperty, no=no)
1278
1279    def _parse_journal(self, no=False, dual=False) -> exp.Expression:
1280        before = self._match_text_seq("BEFORE")
1281        self._match_text_seq("JOURNAL")
1282        return self.expression(exp.JournalProperty, no=no, dual=dual, before=before)
1283
1284    def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression:
1285        self._match_text_seq("NOT")
1286        self._match_text_seq("LOCAL")
1287        self._match_text_seq("AFTER", "JOURNAL")
1288        return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local)
1289
1290    def _parse_checksum(self) -> exp.Expression:
1291        self._match_text_seq("CHECKSUM")
1292        self._match(TokenType.EQ)
1293
1294        on = None
1295        if self._match(TokenType.ON):
1296            on = True
1297        elif self._match_text_seq("OFF"):
1298            on = False
1299        default = self._match(TokenType.DEFAULT)
1300
1301        return self.expression(
1302            exp.ChecksumProperty,
1303            on=on,
1304            default=default,
1305        )
1306
1307    def _parse_freespace(self) -> exp.Expression:
1308        self._match_text_seq("FREESPACE")
1309        self._match(TokenType.EQ)
1310        return self.expression(
1311            exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
1312        )
1313
1314    def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression:
1315        self._match_text_seq("MERGEBLOCKRATIO")
1316        if self._match(TokenType.EQ):
1317            return self.expression(
1318                exp.MergeBlockRatioProperty,
1319                this=self._parse_number(),
1320                percent=self._match(TokenType.PERCENT),
1321            )
1322        else:
1323            return self.expression(
1324                exp.MergeBlockRatioProperty,
1325                no=no,
1326                default=default,
1327            )
1328
1329    def _parse_datablocksize(self, default=None) -> exp.Expression:
1330        if default:
1331            self._match_text_seq("DATABLOCKSIZE")
1332            return self.expression(exp.DataBlocksizeProperty, default=True)
1333        elif self._match_texts(("MIN", "MINIMUM")):
1334            self._match_text_seq("DATABLOCKSIZE")
1335            return self.expression(exp.DataBlocksizeProperty, min=True)
1336        elif self._match_texts(("MAX", "MAXIMUM")):
1337            self._match_text_seq("DATABLOCKSIZE")
1338            return self.expression(exp.DataBlocksizeProperty, min=False)
1339
1340        self._match_text_seq("DATABLOCKSIZE")
1341        self._match(TokenType.EQ)
1342        size = self._parse_number()
1343        units = None
1344        if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
1345            units = self._prev.text
1346        return self.expression(exp.DataBlocksizeProperty, size=size, units=units)
1347
1348    def _parse_blockcompression(self) -> exp.Expression:
1349        self._match_text_seq("BLOCKCOMPRESSION")
1350        self._match(TokenType.EQ)
1351        always = self._match_text_seq("ALWAYS")
1352        manual = self._match_text_seq("MANUAL")
1353        never = self._match_text_seq("NEVER")
1354        default = self._match_text_seq("DEFAULT")
1355        autotemp = None
1356        if self._match_text_seq("AUTOTEMP"):
1357            autotemp = self._parse_schema()
1358
1359        return self.expression(
1360            exp.BlockCompressionProperty,
1361            always=always,
1362            manual=manual,
1363            never=never,
1364            default=default,
1365            autotemp=autotemp,
1366        )
1367
1368    def _parse_withisolatedloading(self) -> exp.Expression:
1369        no = self._match_text_seq("NO")
1370        concurrent = self._match_text_seq("CONCURRENT")
1371        self._match_text_seq("ISOLATED", "LOADING")
1372        for_all = self._match_text_seq("FOR", "ALL")
1373        for_insert = self._match_text_seq("FOR", "INSERT")
1374        for_none = self._match_text_seq("FOR", "NONE")
1375        return self.expression(
1376            exp.IsolatedLoadingProperty,
1377            no=no,
1378            concurrent=concurrent,
1379            for_all=for_all,
1380            for_insert=for_insert,
1381            for_none=for_none,
1382        )
1383
1384    def _parse_locking(self) -> exp.Expression:
1385        if self._match(TokenType.TABLE):
1386            kind = "TABLE"
1387        elif self._match(TokenType.VIEW):
1388            kind = "VIEW"
1389        elif self._match(TokenType.ROW):
1390            kind = "ROW"
1391        elif self._match_text_seq("DATABASE"):
1392            kind = "DATABASE"
1393        else:
1394            kind = None
1395
1396        if kind in ("DATABASE", "TABLE", "VIEW"):
1397            this = self._parse_table_parts()
1398        else:
1399            this = None
1400
1401        if self._match(TokenType.FOR):
1402            for_or_in = "FOR"
1403        elif self._match(TokenType.IN):
1404            for_or_in = "IN"
1405        else:
1406            for_or_in = None
1407
1408        if self._match_text_seq("ACCESS"):
1409            lock_type = "ACCESS"
1410        elif self._match_texts(("EXCL", "EXCLUSIVE")):
1411            lock_type = "EXCLUSIVE"
1412        elif self._match_text_seq("SHARE"):
1413            lock_type = "SHARE"
1414        elif self._match_text_seq("READ"):
1415            lock_type = "READ"
1416        elif self._match_text_seq("WRITE"):
1417            lock_type = "WRITE"
1418        elif self._match_text_seq("CHECKSUM"):
1419            lock_type = "CHECKSUM"
1420        else:
1421            lock_type = None
1422
1423        override = self._match_text_seq("OVERRIDE")
1424
1425        return self.expression(
1426            exp.LockingProperty,
1427            this=this,
1428            kind=kind,
1429            for_or_in=for_or_in,
1430            lock_type=lock_type,
1431            override=override,
1432        )
1433
1434    def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]:
1435        if self._match(TokenType.PARTITION_BY):
1436            return self._parse_csv(self._parse_conjunction)
1437        return []
1438
1439    def _parse_partitioned_by(self) -> exp.Expression:
1440        self._match(TokenType.EQ)
1441        return self.expression(
1442            exp.PartitionedByProperty,
1443            this=self._parse_schema() or self._parse_bracket(self._parse_field()),
1444        )
1445
1446    def _parse_withdata(self, no=False) -> exp.Expression:
1447        if self._match_text_seq("AND", "STATISTICS"):
1448            statistics = True
1449        elif self._match_text_seq("AND", "NO", "STATISTICS"):
1450            statistics = False
1451        else:
1452            statistics = None
1453
1454        return self.expression(exp.WithDataProperty, no=no, statistics=statistics)
1455
1456    def _parse_noprimaryindex(self) -> exp.Expression:
1457        self._match_text_seq("PRIMARY", "INDEX")
1458        return exp.NoPrimaryIndexProperty()
1459
1460    def _parse_oncommit(self) -> exp.Expression:
1461        self._match_text_seq("COMMIT", "PRESERVE", "ROWS")
1462        return exp.OnCommitProperty()
1463
1464    def _parse_distkey(self) -> exp.Expression:
1465        return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
1466
1467    def _parse_create_like(self) -> t.Optional[exp.Expression]:
1468        table = self._parse_table(schema=True)
1469        options = []
1470        while self._match_texts(("INCLUDING", "EXCLUDING")):
1471            this = self._prev.text.upper()
1472            id_var = self._parse_id_var()
1473
1474            if not id_var:
1475                return None
1476
1477            options.append(
1478                self.expression(
1479                    exp.Property,
1480                    this=this,
1481                    value=exp.Var(this=id_var.this.upper()),
1482                )
1483            )
1484        return self.expression(exp.LikeProperty, this=table, expressions=options)
1485
1486    def _parse_sortkey(self, compound: bool = False) -> exp.Expression:
1487        return self.expression(
1488            exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound
1489        )
1490
1491    def _parse_character_set(self, default: bool = False) -> exp.Expression:
1492        self._match(TokenType.EQ)
1493        return self.expression(
1494            exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default
1495        )
1496
1497    def _parse_returns(self) -> exp.Expression:
1498        value: t.Optional[exp.Expression]
1499        is_table = self._match(TokenType.TABLE)
1500
1501        if is_table:
1502            if self._match(TokenType.LT):
1503                value = self.expression(
1504                    exp.Schema,
1505                    this="TABLE",
1506                    expressions=self._parse_csv(self._parse_struct_kwargs),
1507                )
1508                if not self._match(TokenType.GT):
1509                    self.raise_error("Expecting >")
1510            else:
1511                value = self._parse_schema(exp.Var(this="TABLE"))
1512        else:
1513            value = self._parse_types()
1514
1515        return self.expression(exp.ReturnsProperty, this=value, is_table=is_table)
1516
1517    def _parse_temporary(self, global_=False) -> exp.Expression:
1518        self._match(TokenType.TEMPORARY)  # in case calling from "GLOBAL"
1519        return self.expression(exp.TemporaryProperty, global_=global_)
1520
1521    def _parse_describe(self) -> exp.Expression:
1522        kind = self._match_set(self.CREATABLES) and self._prev.text
1523        this = self._parse_table()
1524
1525        return self.expression(exp.Describe, this=this, kind=kind)
1526
1527    def _parse_insert(self) -> exp.Expression:
1528        overwrite = self._match(TokenType.OVERWRITE)
1529        local = self._match(TokenType.LOCAL)
1530        alternative = None
1531
1532        if self._match_text_seq("DIRECTORY"):
1533            this: t.Optional[exp.Expression] = self.expression(
1534                exp.Directory,
1535                this=self._parse_var_or_string(),
1536                local=local,
1537                row_format=self._parse_row_format(match_row=True),
1538            )
1539        else:
1540            if self._match(TokenType.OR):
1541                alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text
1542
1543            self._match(TokenType.INTO)
1544            self._match(TokenType.TABLE)
1545            this = self._parse_table(schema=True)
1546
1547        return self.expression(
1548            exp.Insert,
1549            this=this,
1550            exists=self._parse_exists(),
1551            partition=self._parse_partition(),
1552            expression=self._parse_ddl_select(),
1553            returning=self._parse_returning(),
1554            overwrite=overwrite,
1555            alternative=alternative,
1556        )
1557
1558    def _parse_returning(self) -> t.Optional[exp.Expression]:
1559        if not self._match(TokenType.RETURNING):
1560            return None
1561
1562        return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column))
1563
1564    def _parse_row(self) -> t.Optional[exp.Expression]:
1565        if not self._match(TokenType.FORMAT):
1566            return None
1567        return self._parse_row_format()
1568
1569    def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]:
1570        if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
1571            return None
1572
1573        if self._match_text_seq("SERDE"):
1574            return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string())
1575
1576        self._match_text_seq("DELIMITED")
1577
1578        kwargs = {}
1579
1580        if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
1581            kwargs["fields"] = self._parse_string()
1582            if self._match_text_seq("ESCAPED", "BY"):
1583                kwargs["escaped"] = self._parse_string()
1584        if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
1585            kwargs["collection_items"] = self._parse_string()
1586        if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
1587            kwargs["map_keys"] = self._parse_string()
1588        if self._match_text_seq("LINES", "TERMINATED", "BY"):
1589            kwargs["lines"] = self._parse_string()
1590        if self._match_text_seq("NULL", "DEFINED", "AS"):
1591            kwargs["null"] = self._parse_string()
1592
1593        return self.expression(exp.RowFormatDelimitedProperty, **kwargs)  # type: ignore
1594
1595    def _parse_load_data(self) -> exp.Expression:
1596        local = self._match(TokenType.LOCAL)
1597        self._match_text_seq("INPATH")
1598        inpath = self._parse_string()
1599        overwrite = self._match(TokenType.OVERWRITE)
1600        self._match_pair(TokenType.INTO, TokenType.TABLE)
1601
1602        return self.expression(
1603            exp.LoadData,
1604            this=self._parse_table(schema=True),
1605            local=local,
1606            overwrite=overwrite,
1607            inpath=inpath,
1608            partition=self._parse_partition(),
1609            input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
1610            serde=self._match_text_seq("SERDE") and self._parse_string(),
1611        )
1612
1613    def _parse_delete(self) -> exp.Expression:
1614        self._match(TokenType.FROM)
1615
1616        return self.expression(
1617            exp.Delete,
1618            this=self._parse_table(schema=True),
1619            using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()),
1620            where=self._parse_where(),
1621            returning=self._parse_returning(),
1622        )
1623
1624    def _parse_update(self) -> exp.Expression:
1625        return self.expression(
1626            exp.Update,
1627            **{  # type: ignore
1628                "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS),
1629                "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality),
1630                "from": self._parse_from(),
1631                "where": self._parse_where(),
1632                "returning": self._parse_returning(),
1633            },
1634        )
1635
1636    def _parse_uncache(self) -> exp.Expression:
1637        if not self._match(TokenType.TABLE):
1638            self.raise_error("Expecting TABLE after UNCACHE")
1639
1640        return self.expression(
1641            exp.Uncache,
1642            exists=self._parse_exists(),
1643            this=self._parse_table(schema=True),
1644        )
1645
1646    def _parse_cache(self) -> exp.Expression:
1647        lazy = self._match(TokenType.LAZY)
1648        self._match(TokenType.TABLE)
1649        table = self._parse_table(schema=True)
1650        options = []
1651
1652        if self._match(TokenType.OPTIONS):
1653            self._match_l_paren()
1654            k = self._parse_string()
1655            self._match(TokenType.EQ)
1656            v = self._parse_string()
1657            options = [k, v]
1658            self._match_r_paren()
1659
1660        self._match(TokenType.ALIAS)
1661        return self.expression(
1662            exp.Cache,
1663            this=table,
1664            lazy=lazy,
1665            options=options,
1666            expression=self._parse_select(nested=True),
1667        )
1668
1669    def _parse_partition(self) -> t.Optional[exp.Expression]:
1670        if not self._match(TokenType.PARTITION):
1671            return None
1672
1673        return self.expression(
1674            exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction)
1675        )
1676
1677    def _parse_value(self) -> exp.Expression:
1678        if self._match(TokenType.L_PAREN):
1679            expressions = self._parse_csv(self._parse_conjunction)
1680            self._match_r_paren()
1681            return self.expression(exp.Tuple, expressions=expressions)
1682
1683        # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows.
1684        # Source: https://prestodb.io/docs/current/sql/values.html
1685        return self.expression(exp.Tuple, expressions=[self._parse_conjunction()])
1686
1687    def _parse_select(
1688        self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True
1689    ) -> t.Optional[exp.Expression]:
1690        cte = self._parse_with()
1691        if cte:
1692            this = self._parse_statement()
1693
1694            if not this:
1695                self.raise_error("Failed to parse any statement following CTE")
1696                return cte
1697
1698            if "with" in this.arg_types:
1699                this.set("with", cte)
1700            else:
1701                self.raise_error(f"{this.key} does not support CTE")
1702                this = cte
1703        elif self._match(TokenType.SELECT):
1704            comments = self._prev_comments
1705
1706            hint = self._parse_hint()
1707            all_ = self._match(TokenType.ALL)
1708            distinct = self._match(TokenType.DISTINCT)
1709
1710            if distinct:
1711                distinct = self.expression(
1712                    exp.Distinct,
1713                    on=self._parse_value() if self._match(TokenType.ON) else None,
1714                )
1715
1716            if all_ and distinct:
1717                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
1718
1719            limit = self._parse_limit(top=True)
1720            expressions = self._parse_csv(self._parse_expression)
1721
1722            this = self.expression(
1723                exp.Select,
1724                hint=hint,
1725                distinct=distinct,
1726                expressions=expressions,
1727                limit=limit,
1728            )
1729            this.comments = comments
1730
1731            into = self._parse_into()
1732            if into:
1733                this.set("into", into)
1734
1735            from_ = self._parse_from()
1736            if from_:
1737                this.set("from", from_)
1738
1739            self._parse_query_modifiers(this)
1740        elif (table or nested) and self._match(TokenType.L_PAREN):
1741            this = self._parse_table() if table else self._parse_select(nested=True)
1742            self._parse_query_modifiers(this)
1743            this = self._parse_set_operations(this)
1744            self._match_r_paren()
1745
1746            # early return so that subquery unions aren't parsed again
1747            # SELECT * FROM (SELECT 1) UNION ALL SELECT 1
1748            # Union ALL should be a property of the top select node, not the subquery
1749            return self._parse_subquery(this, parse_alias=parse_subquery_alias)
1750        elif self._match(TokenType.VALUES):
1751            this = self.expression(
1752                exp.Values,
1753                expressions=self._parse_csv(self._parse_value),
1754                alias=self._parse_table_alias(),
1755            )
1756        else:
1757            this = None
1758
1759        return self._parse_set_operations(this)
1760
1761    def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]:
1762        if not skip_with_token and not self._match(TokenType.WITH):
1763            return None
1764
1765        recursive = self._match(TokenType.RECURSIVE)
1766
1767        expressions = []
1768        while True:
1769            expressions.append(self._parse_cte())
1770
1771            if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
1772                break
1773            else:
1774                self._match(TokenType.WITH)
1775
1776        return self.expression(exp.With, expressions=expressions, recursive=recursive)
1777
1778    def _parse_cte(self) -> exp.Expression:
1779        alias = self._parse_table_alias()
1780        if not alias or not alias.this:
1781            self.raise_error("Expected CTE to have alias")
1782
1783        self._match(TokenType.ALIAS)
1784
1785        return self.expression(
1786            exp.CTE,
1787            this=self._parse_wrapped(self._parse_statement),
1788            alias=alias,
1789        )
1790
1791    def _parse_table_alias(
1792        self, alias_tokens: t.Optional[t.Collection[TokenType]] = None
1793    ) -> t.Optional[exp.Expression]:
1794        any_token = self._match(TokenType.ALIAS)
1795        alias = (
1796            self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
1797            or self._parse_string_as_identifier()
1798        )
1799
1800        index = self._index
1801        if self._match(TokenType.L_PAREN):
1802            columns = self._parse_csv(self._parse_function_parameter)
1803            self._match_r_paren() if columns else self._retreat(index)
1804        else:
1805            columns = None
1806
1807        if not alias and not columns:
1808            return None
1809
1810        return self.expression(exp.TableAlias, this=alias, columns=columns)
1811
1812    def _parse_subquery(
1813        self, this: t.Optional[exp.Expression], parse_alias: bool = True
1814    ) -> exp.Expression:
1815        return self.expression(
1816            exp.Subquery,
1817            this=this,
1818            pivots=self._parse_pivots(),
1819            alias=self._parse_table_alias() if parse_alias else None,
1820        )
1821
1822    def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None:
1823        if not isinstance(this, self.MODIFIABLES):
1824            return
1825
1826        table = isinstance(this, exp.Table)
1827
1828        while True:
1829            lateral = self._parse_lateral()
1830            join = self._parse_join()
1831            comma = None if table else self._match(TokenType.COMMA)
1832            if lateral:
1833                this.append("laterals", lateral)
1834            if join:
1835                this.append("joins", join)
1836            if comma:
1837                this.args["from"].append("expressions", self._parse_table())
1838            if not (lateral or join or comma):
1839                break
1840
1841        for key, parser in self.QUERY_MODIFIER_PARSERS.items():
1842            expression = parser(self)
1843
1844            if expression:
1845                this.set(key, expression)
1846
1847    def _parse_hint(self) -> t.Optional[exp.Expression]:
1848        if self._match(TokenType.HINT):
1849            hints = self._parse_csv(self._parse_function)
1850            if not self._match_pair(TokenType.STAR, TokenType.SLASH):
1851                self.raise_error("Expected */ after HINT")
1852            return self.expression(exp.Hint, expressions=hints)
1853
1854        return None
1855
1856    def _parse_into(self) -> t.Optional[exp.Expression]:
1857        if not self._match(TokenType.INTO):
1858            return None
1859
1860        temp = self._match(TokenType.TEMPORARY)
1861        unlogged = self._match(TokenType.UNLOGGED)
1862        self._match(TokenType.TABLE)
1863
1864        return self.expression(
1865            exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged
1866        )
1867
1868    def _parse_from(self) -> t.Optional[exp.Expression]:
1869        if not self._match(TokenType.FROM):
1870            return None
1871
1872        return self.expression(
1873            exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table)
1874        )
1875
1876    def _parse_match_recognize(self) -> t.Optional[exp.Expression]:
1877        if not self._match(TokenType.MATCH_RECOGNIZE):
1878            return None
1879        self._match_l_paren()
1880
1881        partition = self._parse_partition_by()
1882        order = self._parse_order()
1883        measures = (
1884            self._parse_alias(self._parse_conjunction())
1885            if self._match_text_seq("MEASURES")
1886            else None
1887        )
1888
1889        if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
1890            rows = exp.Var(this="ONE ROW PER MATCH")
1891        elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
1892            text = "ALL ROWS PER MATCH"
1893            if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
1894                text += f" SHOW EMPTY MATCHES"
1895            elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
1896                text += f" OMIT EMPTY MATCHES"
1897            elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
1898                text += f" WITH UNMATCHED ROWS"
1899            rows = exp.Var(this=text)
1900        else:
1901            rows = None
1902
1903        if self._match_text_seq("AFTER", "MATCH", "SKIP"):
1904            text = "AFTER MATCH SKIP"
1905            if self._match_text_seq("PAST", "LAST", "ROW"):
1906                text += f" PAST LAST ROW"
1907            elif self._match_text_seq("TO", "NEXT", "ROW"):
1908                text += f" TO NEXT ROW"
1909            elif self._match_text_seq("TO", "FIRST"):
1910                text += f" TO FIRST {self._advance_any().text}"  # type: ignore
1911            elif self._match_text_seq("TO", "LAST"):
1912                text += f" TO LAST {self._advance_any().text}"  # type: ignore
1913            after = exp.Var(this=text)
1914        else:
1915            after = None
1916
1917        if self._match_text_seq("PATTERN"):
1918            self._match_l_paren()
1919
1920            if not self._curr:
1921                self.raise_error("Expecting )", self._curr)
1922
1923            paren = 1
1924            start = self._curr
1925
1926            while self._curr and paren > 0:
1927                if self._curr.token_type == TokenType.L_PAREN:
1928                    paren += 1
1929                if self._curr.token_type == TokenType.R_PAREN:
1930                    paren -= 1
1931                end = self._prev
1932                self._advance()
1933            if paren > 0:
1934                self.raise_error("Expecting )", self._curr)
1935            pattern = exp.Var(this=self._find_sql(start, end))
1936        else:
1937            pattern = None
1938
1939        define = (
1940            self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None
1941        )
1942        self._match_r_paren()
1943
1944        return self.expression(
1945            exp.MatchRecognize,
1946            partition_by=partition,
1947            order=order,
1948            measures=measures,
1949            rows=rows,
1950            after=after,
1951            pattern=pattern,
1952            define=define,
1953        )
1954
1955    def _parse_lateral(self) -> t.Optional[exp.Expression]:
1956        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY)
1957        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
1958
1959        if outer_apply or cross_apply:
1960            this = self._parse_select(table=True)
1961            view = None
1962            outer = not cross_apply
1963        elif self._match(TokenType.LATERAL):
1964            this = self._parse_select(table=True)
1965            view = self._match(TokenType.VIEW)
1966            outer = self._match(TokenType.OUTER)
1967        else:
1968            return None
1969
1970        if not this:
1971            this = self._parse_function() or self._parse_id_var(any_token=False)
1972            while self._match(TokenType.DOT):
1973                this = exp.Dot(
1974                    this=this,
1975                    expression=self._parse_function() or self._parse_id_var(any_token=False),
1976                )
1977
1978        table_alias: t.Optional[exp.Expression]
1979
1980        if view:
1981            table = self._parse_id_var(any_token=False)
1982            columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
1983            table_alias = self.expression(exp.TableAlias, this=table, columns=columns)
1984        else:
1985            table_alias = self._parse_table_alias()
1986
1987        expression = self.expression(
1988            exp.Lateral,
1989            this=this,
1990            view=view,
1991            outer=outer,
1992            alias=table_alias,
1993        )
1994
1995        if outer_apply or cross_apply:
1996            return self.expression(exp.Join, this=expression, side=None if cross_apply else "LEFT")
1997
1998        return expression
1999
2000    def _parse_join_side_and_kind(
2001        self,
2002    ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
2003        return (
2004            self._match(TokenType.NATURAL) and self._prev,
2005            self._match_set(self.JOIN_SIDES) and self._prev,
2006            self._match_set(self.JOIN_KINDS) and self._prev,
2007        )
2008
2009    def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]:
2010        natural, side, kind = self._parse_join_side_and_kind()
2011
2012        if not skip_join_token and not self._match(TokenType.JOIN):
2013            return None
2014
2015        kwargs: t.Dict[
2016            str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]]
2017        ] = {"this": self._parse_table()}
2018
2019        if natural:
2020            kwargs["natural"] = True
2021        if side:
2022            kwargs["side"] = side.text
2023        if kind:
2024            kwargs["kind"] = kind.text
2025
2026        if self._match(TokenType.ON):
2027            kwargs["on"] = self._parse_conjunction()
2028        elif self._match(TokenType.USING):
2029            kwargs["using"] = self._parse_wrapped_id_vars()
2030
2031        return self.expression(exp.Join, **kwargs)  # type: ignore
2032
2033    def _parse_index(self) -> exp.Expression:
2034        index = self._parse_id_var()
2035        self._match(TokenType.ON)
2036        self._match(TokenType.TABLE)  # hive
2037
2038        return self.expression(
2039            exp.Index,
2040            this=index,
2041            table=self.expression(exp.Table, this=self._parse_id_var()),
2042            columns=self._parse_expression(),
2043        )
2044
2045    def _parse_create_table_index(self) -> t.Optional[exp.Expression]:
2046        unique = self._match(TokenType.UNIQUE)
2047        primary = self._match_text_seq("PRIMARY")
2048        amp = self._match_text_seq("AMP")
2049        if not self._match(TokenType.INDEX):
2050            return None
2051        index = self._parse_id_var()
2052        columns = None
2053        if self._match(TokenType.L_PAREN, advance=False):
2054            columns = self._parse_wrapped_csv(self._parse_column)
2055        return self.expression(
2056            exp.Index,
2057            this=index,
2058            columns=columns,
2059            unique=unique,
2060            primary=primary,
2061            amp=amp,
2062        )
2063
2064    def _parse_table_parts(self, schema: bool = False) -> exp.Expression:
2065        catalog = None
2066        db = None
2067
2068        table = (
2069            (not schema and self._parse_function())
2070            or self._parse_id_var(any_token=False)
2071            or self._parse_string_as_identifier()
2072        )
2073
2074        while self._match(TokenType.DOT):
2075            if catalog:
2076                # This allows nesting the table in arbitrarily many dot expressions if needed
2077                table = self.expression(exp.Dot, this=table, expression=self._parse_id_var())
2078            else:
2079                catalog = db
2080                db = table
2081                table = self._parse_id_var()
2082
2083        if not table:
2084            self.raise_error(f"Expected table name but got {self._curr}")
2085
2086        return self.expression(
2087            exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots()
2088        )
2089
2090    def _parse_table(
2091        self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
2092    ) -> t.Optional[exp.Expression]:
2093        lateral = self._parse_lateral()
2094
2095        if lateral:
2096            return lateral
2097
2098        unnest = self._parse_unnest()
2099
2100        if unnest:
2101            return unnest
2102
2103        values = self._parse_derived_table_values()
2104
2105        if values:
2106            return values
2107
2108        subquery = self._parse_select(table=True)
2109
2110        if subquery:
2111            if not subquery.args.get("pivots"):
2112                subquery.set("pivots", self._parse_pivots())
2113            return subquery
2114
2115        this = self._parse_table_parts(schema=schema)
2116
2117        if schema:
2118            return self._parse_schema(this=this)
2119
2120        if self.alias_post_tablesample:
2121            table_sample = self._parse_table_sample()
2122
2123        alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
2124
2125        if alias:
2126            this.set("alias", alias)
2127
2128        if not this.args.get("pivots"):
2129            this.set("pivots", self._parse_pivots())
2130
2131        if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
2132            this.set(
2133                "hints",
2134                self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)),
2135            )
2136            self._match_r_paren()
2137
2138        if not self.alias_post_tablesample:
2139            table_sample = self._parse_table_sample()
2140
2141        if table_sample:
2142            table_sample.set("this", this)
2143            this = table_sample
2144
2145        return this
2146
2147    def _parse_unnest(self) -> t.Optional[exp.Expression]:
2148        if not self._match(TokenType.UNNEST):
2149            return None
2150
2151        expressions = self._parse_wrapped_csv(self._parse_column)
2152        ordinality = bool(self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY))
2153        alias = self._parse_table_alias()
2154
2155        if alias and self.unnest_column_only:
2156            if alias.args.get("columns"):
2157                self.raise_error("Unexpected extra column alias in unnest.")
2158            alias.set("columns", [alias.this])
2159            alias.set("this", None)
2160
2161        offset = None
2162        if self._match_pair(TokenType.WITH, TokenType.OFFSET):
2163            self._match(TokenType.ALIAS)
2164            offset = self._parse_conjunction()
2165
2166        return self.expression(
2167            exp.Unnest,
2168            expressions=expressions,
2169            ordinality=ordinality,
2170            alias=alias,
2171            offset=offset,
2172        )
2173
2174    def _parse_derived_table_values(self) -> t.Optional[exp.Expression]:
2175        is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES)
2176        if not is_derived and not self._match(TokenType.VALUES):
2177            return None
2178
2179        expressions = self._parse_csv(self._parse_value)
2180
2181        if is_derived:
2182            self._match_r_paren()
2183
2184        return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias())
2185
2186    def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]:
2187        if not self._match(TokenType.TABLE_SAMPLE) and not (
2188            as_modifier and self._match_text_seq("USING", "SAMPLE")
2189        ):
2190            return None
2191
2192        bucket_numerator = None
2193        bucket_denominator = None
2194        bucket_field = None
2195        percent = None
2196        rows = None
2197        size = None
2198        seed = None
2199
2200        kind = "TABLESAMPLE" if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE"
2201        method = self._parse_var(tokens=(TokenType.ROW,))
2202
2203        self._match(TokenType.L_PAREN)
2204
2205        num = self._parse_number()
2206
2207        if self._match(TokenType.BUCKET):
2208            bucket_numerator = self._parse_number()
2209            self._match(TokenType.OUT_OF)
2210            bucket_denominator = bucket_denominator = self._parse_number()
2211            self._match(TokenType.ON)
2212            bucket_field = self._parse_field()
2213        elif self._match_set((TokenType.PERCENT, TokenType.MOD)):
2214            percent = num
2215        elif self._match(TokenType.ROWS):
2216            rows = num
2217        else:
2218            size = num
2219
2220        self._match(TokenType.R_PAREN)
2221
2222        if self._match(TokenType.L_PAREN):
2223            method = self._parse_var()
2224            seed = self._match(TokenType.COMMA) and self._parse_number()
2225            self._match_r_paren()
2226        elif self._match_texts(("SEED", "REPEATABLE")):
2227            seed = self._parse_wrapped(self._parse_number)
2228
2229        return self.expression(
2230            exp.TableSample,
2231            method=method,
2232            bucket_numerator=bucket_numerator,
2233            bucket_denominator=bucket_denominator,
2234            bucket_field=bucket_field,
2235            percent=percent,
2236            rows=rows,
2237            size=size,
2238            seed=seed,
2239            kind=kind,
2240        )
2241
2242    def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]:
2243        return list(iter(self._parse_pivot, None))
2244
2245    def _parse_pivot(self) -> t.Optional[exp.Expression]:
2246        index = self._index
2247
2248        if self._match(TokenType.PIVOT):
2249            unpivot = False
2250        elif self._match(TokenType.UNPIVOT):
2251            unpivot = True
2252        else:
2253            return None
2254
2255        expressions = []
2256        field = None
2257
2258        if not self._match(TokenType.L_PAREN):
2259            self._retreat(index)
2260            return None
2261
2262        if unpivot:
2263            expressions = self._parse_csv(self._parse_column)
2264        else:
2265            expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function()))
2266
2267        if not self._match(TokenType.FOR):
2268            self.raise_error("Expecting FOR")
2269
2270        value = self._parse_column()
2271
2272        if not self._match(TokenType.IN):
2273            self.raise_error("Expecting IN")
2274
2275        field = self._parse_in(value)
2276
2277        self._match_r_paren()
2278
2279        pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot)
2280
2281        if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False):
2282            pivot.set("alias", self._parse_table_alias())
2283
2284        return pivot
2285
2286    def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]:
2287        if not skip_where_token and not self._match(TokenType.WHERE):
2288            return None
2289
2290        return self.expression(
2291            exp.Where, comments=self._prev_comments, this=self._parse_conjunction()
2292        )
2293
2294    def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]:
2295        if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
2296            return None
2297
2298        elements = defaultdict(list)
2299
2300        while True:
2301            expressions = self._parse_csv(self._parse_conjunction)
2302            if expressions:
2303                elements["expressions"].extend(expressions)
2304
2305            grouping_sets = self._parse_grouping_sets()
2306            if grouping_sets:
2307                elements["grouping_sets"].extend(grouping_sets)
2308
2309            rollup = None
2310            cube = None
2311
2312            with_ = self._match(TokenType.WITH)
2313            if self._match(TokenType.ROLLUP):
2314                rollup = with_ or self._parse_wrapped_csv(self._parse_column)
2315                elements["rollup"].extend(ensure_list(rollup))
2316
2317            if self._match(TokenType.CUBE):
2318                cube = with_ or self._parse_wrapped_csv(self._parse_column)
2319                elements["cube"].extend(ensure_list(cube))
2320
2321            if not (expressions or grouping_sets or rollup or cube):
2322                break
2323
2324        return self.expression(exp.Group, **elements)  # type: ignore
2325
2326    def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
2327        if not self._match(TokenType.GROUPING_SETS):
2328            return None
2329
2330        return self._parse_wrapped_csv(self._parse_grouping_set)
2331
2332    def _parse_grouping_set(self) -> t.Optional[exp.Expression]:
2333        if self._match(TokenType.L_PAREN):
2334            grouping_set = self._parse_csv(self._parse_column)
2335            self._match_r_paren()
2336            return self.expression(exp.Tuple, expressions=grouping_set)
2337
2338        return self._parse_column()
2339
2340    def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]:
2341        if not skip_having_token and not self._match(TokenType.HAVING):
2342            return None
2343        return self.expression(exp.Having, this=self._parse_conjunction())
2344
2345    def _parse_qualify(self) -> t.Optional[exp.Expression]:
2346        if not self._match(TokenType.QUALIFY):
2347            return None
2348        return self.expression(exp.Qualify, this=self._parse_conjunction())
2349
2350    def _parse_order(
2351        self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
2352    ) -> t.Optional[exp.Expression]:
2353        if not skip_order_token and not self._match(TokenType.ORDER_BY):
2354            return this
2355
2356        return self.expression(
2357            exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
2358        )
2359
2360    def _parse_sort(
2361        self, token_type: TokenType, exp_class: t.Type[exp.Expression]
2362    ) -> t.Optional[exp.Expression]:
2363        if not self._match(token_type):
2364            return None
2365        return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered))
2366
2367    def _parse_ordered(self) -> exp.Expression:
2368        this = self._parse_conjunction()
2369        self._match(TokenType.ASC)
2370        is_desc = self._match(TokenType.DESC)
2371        is_nulls_first = self._match(TokenType.NULLS_FIRST)
2372        is_nulls_last = self._match(TokenType.NULLS_LAST)
2373        desc = is_desc or False
2374        asc = not desc
2375        nulls_first = is_nulls_first or False
2376        explicitly_null_ordered = is_nulls_first or is_nulls_last
2377        if (
2378            not explicitly_null_ordered
2379            and (
2380                (asc and self.null_ordering == "nulls_are_small")
2381                or (desc and self.null_ordering != "nulls_are_small")
2382            )
2383            and self.null_ordering != "nulls_are_last"
2384        ):
2385            nulls_first = True
2386
2387        return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first)
2388
2389    def _parse_limit(
2390        self, this: t.Optional[exp.Expression] = None, top: bool = False
2391    ) -> t.Optional[exp.Expression]:
2392        if self._match(TokenType.TOP if top else TokenType.LIMIT):
2393            limit_paren = self._match(TokenType.L_PAREN)
2394            limit_exp = self.expression(
2395                exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term()
2396            )
2397
2398            if limit_paren:
2399                self._match_r_paren()
2400
2401            return limit_exp
2402
2403        if self._match(TokenType.FETCH):
2404            direction = self._match_set((TokenType.FIRST, TokenType.NEXT))
2405            direction = self._prev.text if direction else "FIRST"
2406            count = self._parse_number()
2407            self._match_set((TokenType.ROW, TokenType.ROWS))
2408            self._match(TokenType.ONLY)
2409            return self.expression(exp.Fetch, direction=direction, count=count)
2410
2411        return this
2412
2413    def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2414        if not self._match_set((TokenType.OFFSET, TokenType.COMMA)):
2415            return this
2416
2417        count = self._parse_number()
2418        self._match_set((TokenType.ROW, TokenType.ROWS))
2419        return self.expression(exp.Offset, this=this, expression=count)
2420
2421    def _parse_lock(self) -> t.Optional[exp.Expression]:
2422        if self._match_text_seq("FOR", "UPDATE"):
2423            return self.expression(exp.Lock, update=True)
2424        if self._match_text_seq("FOR", "SHARE"):
2425            return self.expression(exp.Lock, update=False)
2426
2427        return None
2428
2429    def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2430        if not self._match_set(self.SET_OPERATIONS):
2431            return this
2432
2433        token_type = self._prev.token_type
2434
2435        if token_type == TokenType.UNION:
2436            expression = exp.Union
2437        elif token_type == TokenType.EXCEPT:
2438            expression = exp.Except
2439        else:
2440            expression = exp.Intersect
2441
2442        return self.expression(
2443            expression,
2444            this=this,
2445            distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
2446            expression=self._parse_set_operations(self._parse_select(nested=True)),
2447        )
2448
2449    def _parse_expression(self) -> t.Optional[exp.Expression]:
2450        return self._parse_alias(self._parse_conjunction())
2451
2452    def _parse_conjunction(self) -> t.Optional[exp.Expression]:
2453        return self._parse_tokens(self._parse_equality, self.CONJUNCTION)
2454
2455    def _parse_equality(self) -> t.Optional[exp.Expression]:
2456        return self._parse_tokens(self._parse_comparison, self.EQUALITY)
2457
2458    def _parse_comparison(self) -> t.Optional[exp.Expression]:
2459        return self._parse_tokens(self._parse_range, self.COMPARISON)
2460
2461    def _parse_range(self) -> t.Optional[exp.Expression]:
2462        this = self._parse_bitwise()
2463        negate = self._match(TokenType.NOT)
2464
2465        if self._match_set(self.RANGE_PARSERS):
2466            this = self.RANGE_PARSERS[self._prev.token_type](self, this)
2467        elif self._match(TokenType.ISNULL):
2468            this = self.expression(exp.Is, this=this, expression=exp.Null())
2469
2470        # Postgres supports ISNULL and NOTNULL for conditions.
2471        # https://blog.andreiavram.ro/postgresql-null-composite-type/
2472        if self._match(TokenType.NOTNULL):
2473            this = self.expression(exp.Is, this=this, expression=exp.Null())
2474            this = self.expression(exp.Not, this=this)
2475
2476        if negate:
2477            this = self.expression(exp.Not, this=this)
2478
2479        if self._match(TokenType.IS):
2480            this = self._parse_is(this)
2481
2482        return this
2483
2484    def _parse_is(self, this: t.Optional[exp.Expression]) -> exp.Expression:
2485        negate = self._match(TokenType.NOT)
2486        if self._match(TokenType.DISTINCT_FROM):
2487            klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ
2488            return self.expression(klass, this=this, expression=self._parse_expression())
2489
2490        this = self.expression(
2491            exp.Is,
2492            this=this,
2493            expression=self._parse_null() or self._parse_boolean(),
2494        )
2495        return self.expression(exp.Not, this=this) if negate else this
2496
2497    def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression:
2498        unnest = self._parse_unnest()
2499        if unnest:
2500            this = self.expression(exp.In, this=this, unnest=unnest)
2501        elif self._match(TokenType.L_PAREN):
2502            expressions = self._parse_csv(self._parse_select_or_expression)
2503
2504            if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable):
2505                this = self.expression(exp.In, this=this, query=expressions[0])
2506            else:
2507                this = self.expression(exp.In, this=this, expressions=expressions)
2508
2509            self._match_r_paren()
2510        else:
2511            this = self.expression(exp.In, this=this, field=self._parse_field())
2512
2513        return this
2514
2515    def _parse_between(self, this: exp.Expression) -> exp.Expression:
2516        low = self._parse_bitwise()
2517        self._match(TokenType.AND)
2518        high = self._parse_bitwise()
2519        return self.expression(exp.Between, this=this, low=low, high=high)
2520
2521    def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2522        if not self._match(TokenType.ESCAPE):
2523            return this
2524        return self.expression(exp.Escape, this=this, expression=self._parse_string())
2525
2526    def _parse_bitwise(self) -> t.Optional[exp.Expression]:
2527        this = self._parse_term()
2528
2529        while True:
2530            if self._match_set(self.BITWISE):
2531                this = self.expression(
2532                    self.BITWISE[self._prev.token_type],
2533                    this=this,
2534                    expression=self._parse_term(),
2535                )
2536            elif self._match_pair(TokenType.LT, TokenType.LT):
2537                this = self.expression(
2538                    exp.BitwiseLeftShift, this=this, expression=self._parse_term()
2539                )
2540            elif self._match_pair(TokenType.GT, TokenType.GT):
2541                this = self.expression(
2542                    exp.BitwiseRightShift, this=this, expression=self._parse_term()
2543                )
2544            else:
2545                break
2546
2547        return this
2548
2549    def _parse_term(self) -> t.Optional[exp.Expression]:
2550        return self._parse_tokens(self._parse_factor, self.TERM)
2551
2552    def _parse_factor(self) -> t.Optional[exp.Expression]:
2553        return self._parse_tokens(self._parse_unary, self.FACTOR)
2554
2555    def _parse_unary(self) -> t.Optional[exp.Expression]:
2556        if self._match_set(self.UNARY_PARSERS):
2557            return self.UNARY_PARSERS[self._prev.token_type](self)
2558        return self._parse_at_time_zone(self._parse_type())
2559
2560    def _parse_type(self) -> t.Optional[exp.Expression]:
2561        if self._match(TokenType.INTERVAL):
2562            return self.expression(exp.Interval, this=self._parse_term(), unit=self._parse_field())
2563
2564        index = self._index
2565        type_token = self._parse_types(check_func=True)
2566        this = self._parse_column()
2567
2568        if type_token:
2569            if isinstance(this, exp.Literal):
2570                return self.expression(exp.Cast, this=this, to=type_token)
2571            if not type_token.args.get("expressions"):
2572                self._retreat(index)
2573                return self._parse_column()
2574            return type_token
2575
2576        return this
2577
2578    def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]:
2579        index = self._index
2580
2581        prefix = self._match_text_seq("SYSUDTLIB", ".")
2582
2583        if not self._match_set(self.TYPE_TOKENS):
2584            return None
2585
2586        type_token = self._prev.token_type
2587
2588        if type_token == TokenType.PSEUDO_TYPE:
2589            return self.expression(exp.PseudoType, this=self._prev.text)
2590
2591        nested = type_token in self.NESTED_TYPE_TOKENS
2592        is_struct = type_token == TokenType.STRUCT
2593        expressions = None
2594        maybe_func = False
2595
2596        if self._match(TokenType.L_PAREN):
2597            if is_struct:
2598                expressions = self._parse_csv(self._parse_struct_kwargs)
2599            elif nested:
2600                expressions = self._parse_csv(self._parse_types)
2601            else:
2602                expressions = self._parse_csv(self._parse_conjunction)
2603
2604            if not expressions:
2605                self._retreat(index)
2606                return None
2607
2608            self._match_r_paren()
2609            maybe_func = True
2610
2611        if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2612            this = exp.DataType(
2613                this=exp.DataType.Type.ARRAY,
2614                expressions=[exp.DataType.build(type_token.value, expressions=expressions)],
2615                nested=True,
2616            )
2617
2618            while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2619                this = exp.DataType(
2620                    this=exp.DataType.Type.ARRAY,
2621                    expressions=[this],
2622                    nested=True,
2623                )
2624
2625            return this
2626
2627        if self._match(TokenType.L_BRACKET):
2628            self._retreat(index)
2629            return None
2630
2631        values: t.Optional[t.List[t.Optional[exp.Expression]]] = None
2632        if nested and self._match(TokenType.LT):
2633            if is_struct:
2634                expressions = self._parse_csv(self._parse_struct_kwargs)
2635            else:
2636                expressions = self._parse_csv(self._parse_types)
2637
2638            if not self._match(TokenType.GT):
2639                self.raise_error("Expecting >")
2640
2641            if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)):
2642                values = self._parse_csv(self._parse_conjunction)
2643                self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN))
2644
2645        value: t.Optional[exp.Expression] = None
2646        if type_token in self.TIMESTAMPS:
2647            if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ:
2648                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions)
2649            elif (
2650                self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ
2651            ):
2652                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions)
2653            elif self._match(TokenType.WITHOUT_TIME_ZONE):
2654                if type_token == TokenType.TIME:
2655                    value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions)
2656                else:
2657                    value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2658
2659            maybe_func = maybe_func and value is None
2660
2661            if value is None:
2662                value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2663        elif type_token == TokenType.INTERVAL:
2664            unit = self._parse_var()
2665
2666            if not unit:
2667                value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL)
2668            else:
2669                value = self.expression(exp.Interval, unit=unit)
2670
2671        if maybe_func and check_func:
2672            index2 = self._index
2673            peek = self._parse_string()
2674
2675            if not peek:
2676                self._retreat(index)
2677                return None
2678
2679            self._retreat(index2)
2680
2681        if value:
2682            return value
2683
2684        return exp.DataType(
2685            this=exp.DataType.Type[type_token.value.upper()],
2686            expressions=expressions,
2687            nested=nested,
2688            values=values,
2689            prefix=prefix,
2690        )
2691
2692    def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]:
2693        if self._curr and self._curr.token_type in self.TYPE_TOKENS:
2694            return self._parse_types()
2695
2696        this = self._parse_id_var()
2697        self._match(TokenType.COLON)
2698        data_type = self._parse_types()
2699
2700        if not data_type:
2701            return None
2702        return self.expression(exp.StructKwarg, this=this, expression=data_type)
2703
2704    def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2705        if not self._match(TokenType.AT_TIME_ZONE):
2706            return this
2707        return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary())
2708
2709    def _parse_column(self) -> t.Optional[exp.Expression]:
2710        this = self._parse_field()
2711        if isinstance(this, exp.Identifier):
2712            this = self.expression(exp.Column, this=this)
2713        elif not this:
2714            return self._parse_bracket(this)
2715        this = self._parse_bracket(this)
2716
2717        while self._match_set(self.COLUMN_OPERATORS):
2718            op_token = self._prev.token_type
2719            op = self.COLUMN_OPERATORS.get(op_token)
2720
2721            if op_token == TokenType.DCOLON:
2722                field = self._parse_types()
2723                if not field:
2724                    self.raise_error("Expected type")
2725            elif op:
2726                self._advance()
2727                value = self._prev.text
2728                field = (
2729                    exp.Literal.number(value)
2730                    if self._prev.token_type == TokenType.NUMBER
2731                    else exp.Literal.string(value)
2732                )
2733            else:
2734                field = self._parse_star() or self._parse_function() or self._parse_id_var()
2735
2736            if isinstance(field, exp.Func):
2737                # bigquery allows function calls like x.y.count(...)
2738                # SAFE.SUBSTR(...)
2739                # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
2740                this = self._replace_columns_with_dots(this)
2741
2742            if op:
2743                this = op(self, this, field)
2744            elif isinstance(this, exp.Column) and not this.args.get("catalog"):
2745                this = self.expression(
2746                    exp.Column,
2747                    this=field,
2748                    table=this.this,
2749                    db=this.args.get("table"),
2750                    catalog=this.args.get("db"),
2751                )
2752            else:
2753                this = self.expression(exp.Dot, this=this, expression=field)
2754            this = self._parse_bracket(this)
2755
2756        return this
2757
2758    def _parse_primary(self) -> t.Optional[exp.Expression]:
2759        if self._match_set(self.PRIMARY_PARSERS):
2760            token_type = self._prev.token_type
2761            primary = self.PRIMARY_PARSERS[token_type](self, self._prev)
2762
2763            if token_type == TokenType.STRING:
2764                expressions = [primary]
2765                while self._match(TokenType.STRING):
2766                    expressions.append(exp.Literal.string(self._prev.text))
2767                if len(expressions) > 1:
2768                    return self.expression(exp.Concat, expressions=expressions)
2769            return primary
2770
2771        if self._match_pair(TokenType.DOT, TokenType.NUMBER):
2772            return exp.Literal.number(f"0.{self._prev.text}")
2773
2774        if self._match(TokenType.L_PAREN):
2775            comments = self._prev_comments
2776            query = self._parse_select()
2777
2778            if query:
2779                expressions = [query]
2780            else:
2781                expressions = self._parse_csv(
2782                    lambda: self._parse_alias(self._parse_conjunction(), explicit=True)
2783                )
2784
2785            this = seq_get(expressions, 0)
2786            self._parse_query_modifiers(this)
2787            self._match_r_paren()
2788
2789            if isinstance(this, exp.Subqueryable):
2790                this = self._parse_set_operations(
2791                    self._parse_subquery(this=this, parse_alias=False)
2792                )
2793            elif len(expressions) > 1:
2794                this = self.expression(exp.Tuple, expressions=expressions)
2795            else:
2796                this = self.expression(exp.Paren, this=this)
2797
2798            if this and comments:
2799                this.comments = comments
2800
2801            return this
2802
2803        return None
2804
2805    def _parse_field(self, any_token: bool = False) -> t.Optional[exp.Expression]:
2806        return self._parse_primary() or self._parse_function() or self._parse_id_var(any_token)
2807
2808    def _parse_function(
2809        self, functions: t.Optional[t.Dict[str, t.Callable]] = None
2810    ) -> t.Optional[exp.Expression]:
2811        if not self._curr:
2812            return None
2813
2814        token_type = self._curr.token_type
2815
2816        if self._match_set(self.NO_PAREN_FUNCTION_PARSERS):
2817            return self.NO_PAREN_FUNCTION_PARSERS[token_type](self)
2818
2819        if not self._next or self._next.token_type != TokenType.L_PAREN:
2820            if token_type in self.NO_PAREN_FUNCTIONS:
2821                self._advance()
2822                return self.expression(self.NO_PAREN_FUNCTIONS[token_type])
2823
2824            return None
2825
2826        if token_type not in self.FUNC_TOKENS:
2827            return None
2828
2829        this = self._curr.text
2830        upper = this.upper()
2831        self._advance(2)
2832
2833        parser = self.FUNCTION_PARSERS.get(upper)
2834
2835        if parser:
2836            this = parser(self)
2837        else:
2838            subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
2839
2840            if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH):
2841                this = self.expression(subquery_predicate, this=self._parse_select())
2842                self._match_r_paren()
2843                return this
2844
2845            if functions is None:
2846                functions = self.FUNCTIONS
2847
2848            function = functions.get(upper)
2849            args = self._parse_csv(self._parse_lambda)
2850
2851            if function:
2852                # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the
2853                # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists.
2854                if count_params(function) == 2:
2855                    params = None
2856                    if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN):
2857                        params = self._parse_csv(self._parse_lambda)
2858
2859                    this = function(args, params)
2860                else:
2861                    this = function(args)
2862
2863                self.validate_expression(this, args)
2864            else:
2865                this = self.expression(exp.Anonymous, this=this, expressions=args)
2866
2867        self._match_r_paren(this)
2868        return self._parse_window(this)
2869
2870    def _parse_function_parameter(self) -> t.Optional[exp.Expression]:
2871        return self._parse_column_def(self._parse_id_var())
2872
2873    def _parse_user_defined_function(
2874        self, kind: t.Optional[TokenType] = None
2875    ) -> t.Optional[exp.Expression]:
2876        this = self._parse_id_var()
2877
2878        while self._match(TokenType.DOT):
2879            this = self.expression(exp.Dot, this=this, expression=self._parse_id_var())
2880
2881        if not self._match(TokenType.L_PAREN):
2882            return this
2883
2884        expressions = self._parse_csv(self._parse_function_parameter)
2885        self._match_r_paren()
2886        return self.expression(
2887            exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True
2888        )
2889
2890    def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]:
2891        literal = self._parse_primary()
2892        if literal:
2893            return self.expression(exp.Introducer, this=token.text, expression=literal)
2894
2895        return self.expression(exp.Identifier, this=token.text)
2896
2897    def _parse_national(self, token: Token) -> exp.Expression:
2898        return self.expression(exp.National, this=exp.Literal.string(token.text))
2899
2900    def _parse_session_parameter(self) -> exp.Expression:
2901        kind = None
2902        this = self._parse_id_var() or self._parse_primary()
2903
2904        if this and self._match(TokenType.DOT):
2905            kind = this.name
2906            this = self._parse_var() or self._parse_primary()
2907
2908        return self.expression(exp.SessionParameter, this=this, kind=kind)
2909
2910    def _parse_lambda(self) -> t.Optional[exp.Expression]:
2911        index = self._index
2912
2913        if self._match(TokenType.L_PAREN):
2914            expressions = self._parse_csv(self._parse_id_var)
2915
2916            if not self._match(TokenType.R_PAREN):
2917                self._retreat(index)
2918        else:
2919            expressions = [self._parse_id_var()]
2920
2921        if self._match_set(self.LAMBDAS):
2922            return self.LAMBDAS[self._prev.token_type](self, expressions)
2923
2924        self._retreat(index)
2925
2926        this: t.Optional[exp.Expression]
2927
2928        if self._match(TokenType.DISTINCT):
2929            this = self.expression(
2930                exp.Distinct, expressions=self._parse_csv(self._parse_conjunction)
2931            )
2932        else:
2933            this = self._parse_select_or_expression()
2934
2935        if self._match(TokenType.IGNORE_NULLS):
2936            this = self.expression(exp.IgnoreNulls, this=this)
2937        else:
2938            self._match(TokenType.RESPECT_NULLS)
2939
2940        return self._parse_limit(self._parse_order(this))
2941
2942    def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2943        index = self._index
2944        if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT):
2945            self._retreat(index)
2946            return this
2947
2948        args = self._parse_csv(
2949            lambda: self._parse_constraint()
2950            or self._parse_column_def(self._parse_field(any_token=True))
2951        )
2952        self._match_r_paren()
2953        return self.expression(exp.Schema, this=this, expressions=args)
2954
2955    def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2956        kind = self._parse_types()
2957
2958        if self._match_text_seq("FOR", "ORDINALITY"):
2959            return self.expression(exp.ColumnDef, this=this, ordinality=True)
2960
2961        constraints = []
2962        while True:
2963            constraint = self._parse_column_constraint()
2964            if not constraint:
2965                break
2966            constraints.append(constraint)
2967
2968        if not kind and not constraints:
2969            return this
2970
2971        return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints)
2972
2973    def _parse_auto_increment(self) -> exp.Expression:
2974        start = None
2975        increment = None
2976
2977        if self._match(TokenType.L_PAREN, advance=False):
2978            args = self._parse_wrapped_csv(self._parse_bitwise)
2979            start = seq_get(args, 0)
2980            increment = seq_get(args, 1)
2981        elif self._match_text_seq("START"):
2982            start = self._parse_bitwise()
2983            self._match_text_seq("INCREMENT")
2984            increment = self._parse_bitwise()
2985
2986        if start and increment:
2987            return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment)
2988
2989        return exp.AutoIncrementColumnConstraint()
2990
2991    def _parse_compress(self) -> exp.Expression:
2992        if self._match(TokenType.L_PAREN, advance=False):
2993            return self.expression(
2994                exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise)
2995            )
2996
2997        return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise())
2998
2999    def _parse_generated_as_identity(self) -> exp.Expression:
3000        if self._match(TokenType.BY_DEFAULT):
3001            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False)
3002        else:
3003            self._match_text_seq("ALWAYS")
3004            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True)
3005
3006        self._match_text_seq("AS", "IDENTITY")
3007        if self._match(TokenType.L_PAREN):
3008            if self._match_text_seq("START", "WITH"):
3009                this.set("start", self._parse_bitwise())
3010            if self._match_text_seq("INCREMENT", "BY"):
3011                this.set("increment", self._parse_bitwise())
3012            if self._match_text_seq("MINVALUE"):
3013                this.set("minvalue", self._parse_bitwise())
3014            if self._match_text_seq("MAXVALUE"):
3015                this.set("maxvalue", self._parse_bitwise())
3016
3017            if self._match_text_seq("CYCLE"):
3018                this.set("cycle", True)
3019            elif self._match_text_seq("NO", "CYCLE"):
3020                this.set("cycle", False)
3021
3022            self._match_r_paren()
3023
3024        return this
3025
3026    def _parse_inline(self) -> t.Optional[exp.Expression]:
3027        self._match_text_seq("LENGTH")
3028        return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise())
3029
3030    def _parse_not_constraint(self) -> t.Optional[exp.Expression]:
3031        if self._match_text_seq("NULL"):
3032            return self.expression(exp.NotNullColumnConstraint)
3033        if self._match_text_seq("CASESPECIFIC"):
3034            return self.expression(exp.CaseSpecificColumnConstraint, not_=True)
3035        return None
3036
3037    def _parse_column_constraint(self) -> t.Optional[exp.Expression]:
3038        this = self._parse_references()
3039        if this:
3040            return this
3041
3042        if self._match(TokenType.CONSTRAINT):
3043            this = self._parse_id_var()
3044
3045        if self._match_texts(self.CONSTRAINT_PARSERS):
3046            return self.expression(
3047                exp.ColumnConstraint,
3048                this=this,
3049                kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self),
3050            )
3051
3052        return this
3053
3054    def _parse_constraint(self) -> t.Optional[exp.Expression]:
3055        if not self._match(TokenType.CONSTRAINT):
3056            return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS)
3057
3058        this = self._parse_id_var()
3059        expressions = []
3060
3061        while True:
3062            constraint = self._parse_unnamed_constraint() or self._parse_function()
3063            if not constraint:
3064                break
3065            expressions.append(constraint)
3066
3067        return self.expression(exp.Constraint, this=this, expressions=expressions)
3068
3069    def _parse_unnamed_constraint(
3070        self, constraints: t.Optional[t.Collection[str]] = None
3071    ) -> t.Optional[exp.Expression]:
3072        if not self._match_texts(constraints or self.CONSTRAINT_PARSERS):
3073            return None
3074
3075        constraint = self._prev.text.upper()
3076        if constraint not in self.CONSTRAINT_PARSERS:
3077            self.raise_error(f"No parser found for schema constraint {constraint}.")
3078
3079        return self.CONSTRAINT_PARSERS[constraint](self)
3080
3081    def _parse_unique(self) -> exp.Expression:
3082        if not self._match(TokenType.L_PAREN, advance=False):
3083            return self.expression(exp.UniqueColumnConstraint)
3084        return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars())
3085
3086    def _parse_key_constraint_options(self) -> t.List[str]:
3087        options = []
3088        while True:
3089            if not self._curr:
3090                break
3091
3092            if self._match(TokenType.ON):
3093                action = None
3094                on = self._advance_any() and self._prev.text
3095
3096                if self._match(TokenType.NO_ACTION):
3097                    action = "NO ACTION"
3098                elif self._match(TokenType.CASCADE):
3099                    action = "CASCADE"
3100                elif self._match_pair(TokenType.SET, TokenType.NULL):
3101                    action = "SET NULL"
3102                elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
3103                    action = "SET DEFAULT"
3104                else:
3105                    self.raise_error("Invalid key constraint")
3106
3107                options.append(f"ON {on} {action}")
3108            elif self._match_text_seq("NOT", "ENFORCED"):
3109                options.append("NOT ENFORCED")
3110            elif self._match_text_seq("DEFERRABLE"):
3111                options.append("DEFERRABLE")
3112            elif self._match_text_seq("INITIALLY", "DEFERRED"):
3113                options.append("INITIALLY DEFERRED")
3114            elif self._match_text_seq("NORELY"):
3115                options.append("NORELY")
3116            elif self._match_text_seq("MATCH", "FULL"):
3117                options.append("MATCH FULL")
3118            else:
3119                break
3120
3121        return options
3122
3123    def _parse_references(self) -> t.Optional[exp.Expression]:
3124        if not self._match(TokenType.REFERENCES):
3125            return None
3126
3127        expressions = None
3128        this = self._parse_id_var()
3129
3130        if self._match(TokenType.L_PAREN, advance=False):
3131            expressions = self._parse_wrapped_id_vars()
3132
3133        options = self._parse_key_constraint_options()
3134        return self.expression(exp.Reference, this=this, expressions=expressions, options=options)
3135
3136    def _parse_foreign_key(self) -> exp.Expression:
3137        expressions = self._parse_wrapped_id_vars()
3138        reference = self._parse_references()
3139        options = {}
3140
3141        while self._match(TokenType.ON):
3142            if not self._match_set((TokenType.DELETE, TokenType.UPDATE)):
3143                self.raise_error("Expected DELETE or UPDATE")
3144
3145            kind = self._prev.text.lower()
3146
3147            if self._match(TokenType.NO_ACTION):
3148                action = "NO ACTION"
3149            elif self._match(TokenType.SET):
3150                self._match_set((TokenType.NULL, TokenType.DEFAULT))
3151                action = "SET " + self._prev.text.upper()
3152            else:
3153                self._advance()
3154                action = self._prev.text.upper()
3155
3156            options[kind] = action
3157
3158        return self.expression(
3159            exp.ForeignKey, expressions=expressions, reference=reference, **options  # type: ignore
3160        )
3161
3162    def _parse_primary_key(self) -> exp.Expression:
3163        desc = (
3164            self._match_set((TokenType.ASC, TokenType.DESC))
3165            and self._prev.token_type == TokenType.DESC
3166        )
3167
3168        if not self._match(TokenType.L_PAREN, advance=False):
3169            return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc)
3170
3171        expressions = self._parse_wrapped_id_vars()
3172        options = self._parse_key_constraint_options()
3173        return self.expression(exp.PrimaryKey, expressions=expressions, options=options)
3174
3175    def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3176        if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)):
3177            return this
3178
3179        bracket_kind = self._prev.token_type
3180        expressions: t.List[t.Optional[exp.Expression]]
3181
3182        if self._match(TokenType.COLON):
3183            expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())]
3184        else:
3185            expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction()))
3186
3187        # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
3188        if bracket_kind == TokenType.L_BRACE:
3189            this = self.expression(exp.Struct, expressions=expressions)
3190        elif not this or this.name.upper() == "ARRAY":
3191            this = self.expression(exp.Array, expressions=expressions)
3192        else:
3193            expressions = apply_index_offset(expressions, -self.index_offset)
3194            this = self.expression(exp.Bracket, this=this, expressions=expressions)
3195
3196        if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET:
3197            self.raise_error("Expected ]")
3198        elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE:
3199            self.raise_error("Expected }")
3200
3201        this.comments = self._prev_comments
3202        return self._parse_bracket(this)
3203
3204    def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3205        if self._match(TokenType.COLON):
3206            return self.expression(exp.Slice, this=this, expression=self._parse_conjunction())
3207        return this
3208
3209    def _parse_case(self) -> t.Optional[exp.Expression]:
3210        ifs = []
3211        default = None
3212
3213        expression = self._parse_conjunction()
3214
3215        while self._match(TokenType.WHEN):
3216            this = self._parse_conjunction()
3217            self._match(TokenType.THEN)
3218            then = self._parse_conjunction()
3219            ifs.append(self.expression(exp.If, this=this, true=then))
3220
3221        if self._match(TokenType.ELSE):
3222            default = self._parse_conjunction()
3223
3224        if not self._match(TokenType.END):
3225            self.raise_error("Expected END after CASE", self._prev)
3226
3227        return self._parse_window(
3228            self.expression(exp.Case, this=expression, ifs=ifs, default=default)
3229        )
3230
3231    def _parse_if(self) -> t.Optional[exp.Expression]:
3232        if self._match(TokenType.L_PAREN):
3233            args = self._parse_csv(self._parse_conjunction)
3234            this = exp.If.from_arg_list(args)
3235            self.validate_expression(this, args)
3236            self._match_r_paren()
3237        else:
3238            condition = self._parse_conjunction()
3239            self._match(TokenType.THEN)
3240            true = self._parse_conjunction()
3241            false = self._parse_conjunction() if self._match(TokenType.ELSE) else None
3242            self._match(TokenType.END)
3243            this = self.expression(exp.If, this=condition, true=true, false=false)
3244
3245        return self._parse_window(this)
3246
3247    def _parse_extract(self) -> exp.Expression:
3248        this = self._parse_function() or self._parse_var() or self._parse_type()
3249
3250        if self._match(TokenType.FROM):
3251            return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3252
3253        if not self._match(TokenType.COMMA):
3254            self.raise_error("Expected FROM or comma after EXTRACT", self._prev)
3255
3256        return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3257
3258    def _parse_cast(self, strict: bool) -> exp.Expression:
3259        this = self._parse_conjunction()
3260
3261        if not self._match(TokenType.ALIAS):
3262            self.raise_error("Expected AS after CAST")
3263
3264        to = self._parse_types()
3265
3266        if not to:
3267            self.raise_error("Expected TYPE after CAST")
3268        elif to.this == exp.DataType.Type.CHAR:
3269            if self._match(TokenType.CHARACTER_SET):
3270                to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())
3271
3272        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3273
3274    def _parse_string_agg(self) -> exp.Expression:
3275        expression: t.Optional[exp.Expression]
3276
3277        if self._match(TokenType.DISTINCT):
3278            args = self._parse_csv(self._parse_conjunction)
3279            expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)])
3280        else:
3281            args = self._parse_csv(self._parse_conjunction)
3282            expression = seq_get(args, 0)
3283
3284        index = self._index
3285        if not self._match(TokenType.R_PAREN):
3286            # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]])
3287            order = self._parse_order(this=expression)
3288            return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3289
3290        # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]).
3291        # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that
3292        # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them.
3293        if not self._match(TokenType.WITHIN_GROUP):
3294            self._retreat(index)
3295            this = exp.GroupConcat.from_arg_list(args)
3296            self.validate_expression(this, args)
3297            return this
3298
3299        self._match_l_paren()  # The corresponding match_r_paren will be called in parse_function (caller)
3300        order = self._parse_order(this=expression)
3301        return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3302
3303    def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]:
3304        to: t.Optional[exp.Expression]
3305        this = self._parse_bitwise()
3306
3307        if self._match(TokenType.USING):
3308            to = self.expression(exp.CharacterSet, this=self._parse_var())
3309        elif self._match(TokenType.COMMA):
3310            to = self._parse_bitwise()
3311        else:
3312            to = None
3313
3314        # Swap the argument order if needed to produce the correct AST
3315        if self.CONVERT_TYPE_FIRST:
3316            this, to = to, this
3317
3318        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3319
3320    def _parse_position(self, haystack_first: bool = False) -> exp.Expression:
3321        args = self._parse_csv(self._parse_bitwise)
3322
3323        if self._match(TokenType.IN):
3324            return self.expression(
3325                exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0)
3326            )
3327
3328        if haystack_first:
3329            haystack = seq_get(args, 0)
3330            needle = seq_get(args, 1)
3331        else:
3332            needle = seq_get(args, 0)
3333            haystack = seq_get(args, 1)
3334
3335        this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2))
3336
3337        self.validate_expression(this, args)
3338
3339        return this
3340
3341    def _parse_join_hint(self, func_name: str) -> exp.Expression:
3342        args = self._parse_csv(self._parse_table)
3343        return exp.JoinHint(this=func_name.upper(), expressions=args)
3344
3345    def _parse_substring(self) -> exp.Expression:
3346        # Postgres supports the form: substring(string [from int] [for int])
3347        # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
3348
3349        args = self._parse_csv(self._parse_bitwise)
3350
3351        if self._match(TokenType.FROM):
3352            args.append(self._parse_bitwise())
3353            if self._match(TokenType.FOR):
3354                args.append(self._parse_bitwise())
3355
3356        this = exp.Substring.from_arg_list(args)
3357        self.validate_expression(this, args)
3358
3359        return this
3360
3361    def _parse_trim(self) -> exp.Expression:
3362        # https://www.w3resource.com/sql/character-functions/trim.php
3363        # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html
3364
3365        position = None
3366        collation = None
3367
3368        if self._match_set(self.TRIM_TYPES):
3369            position = self._prev.text.upper()
3370
3371        expression = self._parse_term()
3372        if self._match_set((TokenType.FROM, TokenType.COMMA)):
3373            this = self._parse_term()
3374        else:
3375            this = expression
3376            expression = None
3377
3378        if self._match(TokenType.COLLATE):
3379            collation = self._parse_term()
3380
3381        return self.expression(
3382            exp.Trim,
3383            this=this,
3384            position=position,
3385            expression=expression,
3386            collation=collation,
3387        )
3388
3389    def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3390        return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window)
3391
3392    def _parse_named_window(self) -> t.Optional[exp.Expression]:
3393        return self._parse_window(self._parse_id_var(), alias=True)
3394
3395    def _parse_window(
3396        self, this: t.Optional[exp.Expression], alias: bool = False
3397    ) -> t.Optional[exp.Expression]:
3398        if self._match_pair(TokenType.FILTER, TokenType.L_PAREN):
3399            this = self.expression(exp.Filter, this=this, expression=self._parse_where())
3400            self._match_r_paren()
3401
3402        # T-SQL allows the OVER (...) syntax after WITHIN GROUP.
3403        # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
3404        if self._match(TokenType.WITHIN_GROUP):
3405            order = self._parse_wrapped(self._parse_order)
3406            this = self.expression(exp.WithinGroup, this=this, expression=order)
3407
3408        # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
3409        # Some dialects choose to implement and some do not.
3410        # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
3411
3412        # There is some code above in _parse_lambda that handles
3413        #   SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ...
3414
3415        # The below changes handle
3416        #   SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ...
3417
3418        # Oracle allows both formats
3419        #   (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html)
3420        #   and Snowflake chose to do the same for familiarity
3421        #   https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes
3422        if self._match(TokenType.IGNORE_NULLS):
3423            this = self.expression(exp.IgnoreNulls, this=this)
3424        elif self._match(TokenType.RESPECT_NULLS):
3425            this = self.expression(exp.RespectNulls, this=this)
3426
3427        # bigquery select from window x AS (partition by ...)
3428        if alias:
3429            self._match(TokenType.ALIAS)
3430        elif not self._match(TokenType.OVER):
3431            return this
3432
3433        if not self._match(TokenType.L_PAREN):
3434            return self.expression(exp.Window, this=this, alias=self._parse_id_var(False))
3435
3436        window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
3437        partition = self._parse_partition_by()
3438        order = self._parse_order()
3439        kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text
3440
3441        if kind:
3442            self._match(TokenType.BETWEEN)
3443            start = self._parse_window_spec()
3444            self._match(TokenType.AND)
3445            end = self._parse_window_spec()
3446
3447            spec = self.expression(
3448                exp.WindowSpec,
3449                kind=kind,
3450                start=start["value"],
3451                start_side=start["side"],
3452                end=end["value"],
3453                end_side=end["side"],
3454            )
3455        else:
3456            spec = None
3457
3458        self._match_r_paren()
3459
3460        return self.expression(
3461            exp.Window,
3462            this=this,
3463            partition_by=partition,
3464            order=order,
3465            spec=spec,
3466            alias=window_alias,
3467        )
3468
3469    def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]:
3470        self._match(TokenType.BETWEEN)
3471
3472        return {
3473            "value": (
3474                self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text
3475            )
3476            or self._parse_bitwise(),
3477            "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text,
3478        }
3479
3480    def _parse_alias(
3481        self, this: t.Optional[exp.Expression], explicit: bool = False
3482    ) -> t.Optional[exp.Expression]:
3483        any_token = self._match(TokenType.ALIAS)
3484
3485        if explicit and not any_token:
3486            return this
3487
3488        if self._match(TokenType.L_PAREN):
3489            aliases = self.expression(
3490                exp.Aliases,
3491                this=this,
3492                expressions=self._parse_csv(lambda: self._parse_id_var(any_token)),
3493            )
3494            self._match_r_paren(aliases)
3495            return aliases
3496
3497        alias = self._parse_id_var(any_token)
3498
3499        if alias:
3500            return self.expression(exp.Alias, this=this, alias=alias)
3501
3502        return this
3503
3504    def _parse_id_var(
3505        self,
3506        any_token: bool = True,
3507        tokens: t.Optional[t.Collection[TokenType]] = None,
3508        prefix_tokens: t.Optional[t.Collection[TokenType]] = None,
3509    ) -> t.Optional[exp.Expression]:
3510        identifier = self._parse_identifier()
3511
3512        if identifier:
3513            return identifier
3514
3515        prefix = ""
3516
3517        if prefix_tokens:
3518            while self._match_set(prefix_tokens):
3519                prefix += self._prev.text
3520
3521        if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS):
3522            quoted = self._prev.token_type == TokenType.STRING
3523            return exp.Identifier(this=prefix + self._prev.text, quoted=quoted)
3524
3525        return None
3526
3527    def _parse_string(self) -> t.Optional[exp.Expression]:
3528        if self._match(TokenType.STRING):
3529            return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev)
3530        return self._parse_placeholder()
3531
3532    def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]:
3533        return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True)
3534
3535    def _parse_number(self) -> t.Optional[exp.Expression]:
3536        if self._match(TokenType.NUMBER):
3537            return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev)
3538        return self._parse_placeholder()
3539
3540    def _parse_identifier(self) -> t.Optional[exp.Expression]:
3541        if self._match(TokenType.IDENTIFIER):
3542            return self.expression(exp.Identifier, this=self._prev.text, quoted=True)
3543        return self._parse_placeholder()
3544
3545    def _parse_var(
3546        self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None
3547    ) -> t.Optional[exp.Expression]:
3548        if (
3549            (any_token and self._advance_any())
3550            or self._match(TokenType.VAR)
3551            or (self._match_set(tokens) if tokens else False)
3552        ):
3553            return self.expression(exp.Var, this=self._prev.text)
3554        return self._parse_placeholder()
3555
3556    def _advance_any(self) -> t.Optional[Token]:
3557        if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS:
3558            self._advance()
3559            return self._prev
3560        return None
3561
3562    def _parse_var_or_string(self) -> t.Optional[exp.Expression]:
3563        return self._parse_var() or self._parse_string()
3564
3565    def _parse_null(self) -> t.Optional[exp.Expression]:
3566        if self._match(TokenType.NULL):
3567            return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev)
3568        return None
3569
3570    def _parse_boolean(self) -> t.Optional[exp.Expression]:
3571        if self._match(TokenType.TRUE):
3572            return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev)
3573        if self._match(TokenType.FALSE):
3574            return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev)
3575        return None
3576
3577    def _parse_star(self) -> t.Optional[exp.Expression]:
3578        if self._match(TokenType.STAR):
3579            return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev)
3580        return None
3581
3582    def _parse_parameter(self) -> exp.Expression:
3583        wrapped = self._match(TokenType.L_BRACE)
3584        this = self._parse_var() or self._parse_primary()
3585        self._match(TokenType.R_BRACE)
3586        return self.expression(exp.Parameter, this=this, wrapped=wrapped)
3587
3588    def _parse_placeholder(self) -> t.Optional[exp.Expression]:
3589        if self._match_set(self.PLACEHOLDER_PARSERS):
3590            placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self)
3591            if placeholder:
3592                return placeholder
3593            self._advance(-1)
3594        return None
3595
3596    def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3597        if not self._match(TokenType.EXCEPT):
3598            return None
3599        if self._match(TokenType.L_PAREN, advance=False):
3600            return self._parse_wrapped_csv(self._parse_column)
3601        return self._parse_csv(self._parse_column)
3602
3603    def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3604        if not self._match(TokenType.REPLACE):
3605            return None
3606        if self._match(TokenType.L_PAREN, advance=False):
3607            return self._parse_wrapped_csv(self._parse_expression)
3608        return self._parse_csv(self._parse_expression)
3609
3610    def _parse_csv(
3611        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
3612    ) -> t.List[t.Optional[exp.Expression]]:
3613        parse_result = parse_method()
3614        items = [parse_result] if parse_result is not None else []
3615
3616        while self._match(sep):
3617            if parse_result and self._prev_comments:
3618                parse_result.comments = self._prev_comments
3619
3620            parse_result = parse_method()
3621            if parse_result is not None:
3622                items.append(parse_result)
3623
3624        return items
3625
3626    def _parse_tokens(
3627        self, parse_method: t.Callable, expressions: t.Dict
3628    ) -> t.Optional[exp.Expression]:
3629        this = parse_method()
3630
3631        while self._match_set(expressions):
3632            this = self.expression(
3633                expressions[self._prev.token_type],
3634                this=this,
3635                comments=self._prev_comments,
3636                expression=parse_method(),
3637            )
3638
3639        return this
3640
3641    def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]:
3642        return self._parse_wrapped_csv(self._parse_id_var)
3643
3644    def _parse_wrapped_csv(
3645        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
3646    ) -> t.List[t.Optional[exp.Expression]]:
3647        return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep))
3648
3649    def _parse_wrapped(self, parse_method: t.Callable) -> t.Any:
3650        self._match_l_paren()
3651        parse_result = parse_method()
3652        self._match_r_paren()
3653        return parse_result
3654
3655    def _parse_select_or_expression(self) -> t.Optional[exp.Expression]:
3656        return self._parse_select() or self._parse_expression()
3657
3658    def _parse_ddl_select(self) -> t.Optional[exp.Expression]:
3659        return self._parse_set_operations(
3660            self._parse_select(nested=True, parse_subquery_alias=False)
3661        )
3662
3663    def _parse_transaction(self) -> exp.Expression:
3664        this = None
3665        if self._match_texts(self.TRANSACTION_KIND):
3666            this = self._prev.text
3667
3668        self._match_texts({"TRANSACTION", "WORK"})
3669
3670        modes = []
3671        while True:
3672            mode = []
3673            while self._match(TokenType.VAR):
3674                mode.append(self._prev.text)
3675
3676            if mode:
3677                modes.append(" ".join(mode))
3678            if not self._match(TokenType.COMMA):
3679                break
3680
3681        return self.expression(exp.Transaction, this=this, modes=modes)
3682
3683    def _parse_commit_or_rollback(self) -> exp.Expression:
3684        chain = None
3685        savepoint = None
3686        is_rollback = self._prev.token_type == TokenType.ROLLBACK
3687
3688        self._match_texts({"TRANSACTION", "WORK"})
3689
3690        if self._match_text_seq("TO"):
3691            self._match_text_seq("SAVEPOINT")
3692            savepoint = self._parse_id_var()
3693
3694        if self._match(TokenType.AND):
3695            chain = not self._match_text_seq("NO")
3696            self._match_text_seq("CHAIN")
3697
3698        if is_rollback:
3699            return self.expression(exp.Rollback, savepoint=savepoint)
3700        return self.expression(exp.Commit, chain=chain)
3701
3702    def _parse_add_column(self) -> t.Optional[exp.Expression]:
3703        if not self._match_text_seq("ADD"):
3704            return None
3705
3706        self._match(TokenType.COLUMN)
3707        exists_column = self._parse_exists(not_=True)
3708        expression = self._parse_column_def(self._parse_field(any_token=True))
3709
3710        if expression:
3711            expression.set("exists", exists_column)
3712
3713        return expression
3714
3715    def _parse_drop_column(self) -> t.Optional[exp.Expression]:
3716        return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN")
3717
3718    # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html
3719    def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression:
3720        return self.expression(
3721            exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists
3722        )
3723
3724    def _parse_add_constraint(self) -> t.Optional[exp.Expression]:
3725        this = None
3726        kind = self._prev.token_type
3727
3728        if kind == TokenType.CONSTRAINT:
3729            this = self._parse_id_var()
3730
3731            if self._match_text_seq("CHECK"):
3732                expression = self._parse_wrapped(self._parse_conjunction)
3733                enforced = self._match_text_seq("ENFORCED")
3734
3735                return self.expression(
3736                    exp.AddConstraint, this=this, expression=expression, enforced=enforced
3737                )
3738
3739        if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY):
3740            expression = self._parse_foreign_key()
3741        elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY):
3742            expression = self._parse_primary_key()
3743
3744        return self.expression(exp.AddConstraint, this=this, expression=expression)
3745
3746    def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]:
3747        index = self._index - 1
3748
3749        if self._match_set(self.ADD_CONSTRAINT_TOKENS):
3750            return self._parse_csv(self._parse_add_constraint)
3751
3752        self._retreat(index)
3753        return self._parse_csv(self._parse_add_column)
3754
3755    def _parse_alter_table_alter(self) -> exp.Expression:
3756        self._match(TokenType.COLUMN)
3757        column = self._parse_field(any_token=True)
3758
3759        if self._match_pair(TokenType.DROP, TokenType.DEFAULT):
3760            return self.expression(exp.AlterColumn, this=column, drop=True)
3761        if self._match_pair(TokenType.SET, TokenType.DEFAULT):
3762            return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction())
3763
3764        self._match_text_seq("SET", "DATA")
3765        return self.expression(
3766            exp.AlterColumn,
3767            this=column,
3768            dtype=self._match_text_seq("TYPE") and self._parse_types(),
3769            collate=self._match(TokenType.COLLATE) and self._parse_term(),
3770            using=self._match(TokenType.USING) and self._parse_conjunction(),
3771        )
3772
3773    def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]:
3774        index = self._index - 1
3775
3776        partition_exists = self._parse_exists()
3777        if self._match(TokenType.PARTITION, advance=False):
3778            return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists))
3779
3780        self._retreat(index)
3781        return self._parse_csv(self._parse_drop_column)
3782
3783    def _parse_alter_table_rename(self) -> exp.Expression:
3784        self._match_text_seq("TO")
3785        return self.expression(exp.RenameTable, this=self._parse_table(schema=True))
3786
3787    def _parse_alter(self) -> t.Optional[exp.Expression]:
3788        start = self._prev
3789
3790        if not self._match(TokenType.TABLE):
3791            return self._parse_as_command(start)
3792
3793        exists = self._parse_exists()
3794        this = self._parse_table(schema=True)
3795
3796        if self._next:
3797            self._advance()
3798        parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None
3799
3800        if parser:
3801            return self.expression(
3802                exp.AlterTable,
3803                this=this,
3804                exists=exists,
3805                actions=ensure_list(parser(self)),
3806            )
3807        return self._parse_as_command(start)
3808
3809    def _parse_merge(self) -> exp.Expression:
3810        self._match(TokenType.INTO)
3811        target = self._parse_table()
3812
3813        self._match(TokenType.USING)
3814        using = self._parse_table()
3815
3816        self._match(TokenType.ON)
3817        on = self._parse_conjunction()
3818
3819        whens = []
3820        while self._match(TokenType.WHEN):
3821            matched = not self._match(TokenType.NOT)
3822            self._match_text_seq("MATCHED")
3823            source = (
3824                False
3825                if self._match_text_seq("BY", "TARGET")
3826                else self._match_text_seq("BY", "SOURCE")
3827            )
3828            condition = self._parse_conjunction() if self._match(TokenType.AND) else None
3829
3830            self._match(TokenType.THEN)
3831
3832            if self._match(TokenType.INSERT):
3833                _this = self._parse_star()
3834                if _this:
3835                    then = self.expression(exp.Insert, this=_this)
3836                else:
3837                    then = self.expression(
3838                        exp.Insert,
3839                        this=self._parse_value(),
3840                        expression=self._match(TokenType.VALUES) and self._parse_value(),
3841                    )
3842            elif self._match(TokenType.UPDATE):
3843                expressions = self._parse_star()
3844                if expressions:
3845                    then = self.expression(exp.Update, expressions=expressions)
3846                else:
3847                    then = self.expression(
3848                        exp.Update,
3849                        expressions=self._match(TokenType.SET)
3850                        and self._parse_csv(self._parse_equality),
3851                    )
3852            elif self._match(TokenType.DELETE):
3853                then = self.expression(exp.Var, this=self._prev.text)
3854            else:
3855                then = None
3856
3857            whens.append(
3858                self.expression(
3859                    exp.When,
3860                    matched=matched,
3861                    source=source,
3862                    condition=condition,
3863                    then=then,
3864                )
3865            )
3866
3867        return self.expression(
3868            exp.Merge,
3869            this=target,
3870            using=using,
3871            on=on,
3872            expressions=whens,
3873        )
3874
3875    def _parse_show(self) -> t.Optional[exp.Expression]:
3876        parser = self._find_parser(self.SHOW_PARSERS, self._show_trie)  # type: ignore
3877        if parser:
3878            return parser(self)
3879        self._advance()
3880        return self.expression(exp.Show, this=self._prev.text.upper())
3881
3882    def _parse_set_item_assignment(
3883        self, kind: t.Optional[str] = None
3884    ) -> t.Optional[exp.Expression]:
3885        index = self._index
3886
3887        if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"):
3888            return self._parse_set_transaction(global_=kind == "GLOBAL")
3889
3890        left = self._parse_primary() or self._parse_id_var()
3891
3892        if not self._match_texts(("=", "TO")):
3893            self._retreat(index)
3894            return None
3895
3896        right = self._parse_statement() or self._parse_id_var()
3897        this = self.expression(
3898            exp.EQ,
3899            this=left,
3900            expression=right,
3901        )
3902
3903        return self.expression(
3904            exp.SetItem,
3905            this=this,
3906            kind=kind,
3907        )
3908
3909    def _parse_set_transaction(self, global_: bool = False) -> exp.Expression:
3910        self._match_text_seq("TRANSACTION")
3911        characteristics = self._parse_csv(
3912            lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS)
3913        )
3914        return self.expression(
3915            exp.SetItem,
3916            expressions=characteristics,
3917            kind="TRANSACTION",
3918            **{"global": global_},  # type: ignore
3919        )
3920
3921    def _parse_set_item(self) -> t.Optional[exp.Expression]:
3922        parser = self._find_parser(self.SET_PARSERS, self._set_trie)  # type: ignore
3923        return parser(self) if parser else self._parse_set_item_assignment(kind=None)
3924
3925    def _parse_set(self) -> exp.Expression:
3926        index = self._index
3927        set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item))
3928
3929        if self._curr:
3930            self._retreat(index)
3931            return self._parse_as_command(self._prev)
3932
3933        return set_
3934
3935    def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]:
3936        for option in options:
3937            if self._match_text_seq(*option.split(" ")):
3938                return exp.Var(this=option)
3939        return None
3940
3941    def _parse_as_command(self, start: Token) -> exp.Command:
3942        while self._curr:
3943            self._advance()
3944        text = self._find_sql(start, self._prev)
3945        size = len(start.text)
3946        return exp.Command(this=text[:size], expression=text[size:])
3947
3948    def _find_parser(
3949        self, parsers: t.Dict[str, t.Callable], trie: t.Dict
3950    ) -> t.Optional[t.Callable]:
3951        if not self._curr:
3952            return None
3953
3954        index = self._index
3955        this = []
3956        while True:
3957            # The current token might be multiple words
3958            curr = self._curr.text.upper()
3959            key = curr.split(" ")
3960            this.append(curr)
3961            self._advance()
3962            result, trie = in_trie(trie, key)
3963            if result == 0:
3964                break
3965            if result == 2:
3966                subparser = parsers[" ".join(this)]
3967                return subparser
3968        self._retreat(index)
3969        return None
3970
3971    def _match(self, token_type, advance=True):
3972        if not self._curr:
3973            return None
3974
3975        if self._curr.token_type == token_type:
3976            if advance:
3977                self._advance()
3978            return True
3979
3980        return None
3981
3982    def _match_set(self, types, advance=True):
3983        if not self._curr:
3984            return None
3985
3986        if self._curr.token_type in types:
3987            if advance:
3988                self._advance()
3989            return True
3990
3991        return None
3992
3993    def _match_pair(self, token_type_a, token_type_b, advance=True):
3994        if not self._curr or not self._next:
3995            return None
3996
3997        if self._curr.token_type == token_type_a and self._next.token_type == token_type_b:
3998            if advance:
3999                self._advance(2)
4000            return True
4001
4002        return None
4003
4004    def _match_l_paren(self, expression=None):
4005        if not self._match(TokenType.L_PAREN):
4006            self.raise_error("Expecting (")
4007        if expression and self._prev_comments:
4008            expression.comments = self._prev_comments
4009
4010    def _match_r_paren(self, expression=None):
4011        if not self._match(TokenType.R_PAREN):
4012            self.raise_error("Expecting )")
4013        if expression and self._prev_comments:
4014            expression.comments = self._prev_comments
4015
4016    def _match_texts(self, texts, advance=True):
4017        if self._curr and self._curr.text.upper() in texts:
4018            if advance:
4019                self._advance()
4020            return True
4021        return False
4022
4023    def _match_text_seq(self, *texts, advance=True):
4024        index = self._index
4025        for text in texts:
4026            if self._curr and self._curr.text.upper() == text:
4027                self._advance()
4028            else:
4029                self._retreat(index)
4030                return False
4031
4032        if not advance:
4033            self._retreat(index)
4034
4035        return True
4036
4037    def _replace_columns_with_dots(self, this):
4038        if isinstance(this, exp.Dot):
4039            exp.replace_children(this, self._replace_columns_with_dots)
4040        elif isinstance(this, exp.Column):
4041            exp.replace_children(this, self._replace_columns_with_dots)
4042            table = this.args.get("table")
4043            this = (
4044                self.expression(exp.Dot, this=table, expression=this.this)
4045                if table
4046                else self.expression(exp.Var, this=this.name)
4047            )
4048        elif isinstance(this, exp.Identifier):
4049            this = self.expression(exp.Var, this=this.name)
4050        return this
4051
4052    def _replace_lambda(self, node, lambda_variables):
4053        if isinstance(node, exp.Column):
4054            if node.name in lambda_variables:
4055                return node.this
4056        return node
def parse_var_map(args):
23def parse_var_map(args):
24    keys = []
25    values = []
26    for i in range(0, len(args), 2):
27        keys.append(args[i])
28        values.append(args[i + 1])
29    return exp.VarMap(
30        keys=exp.Array(expressions=keys),
31        values=exp.Array(expressions=values),
32    )
def binary_range_parser( expr_type: Type[sqlglot.expressions.Expression]) -> Callable[[sqlglot.parser.Parser, Optional[sqlglot.expressions.Expression]], Optional[sqlglot.expressions.Expression]]:
35def binary_range_parser(
36    expr_type: t.Type[exp.Expression],
37) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]:
38    return lambda self, this: self._parse_escape(
39        self.expression(expr_type, this=this, expression=self._parse_bitwise())
40    )
class Parser:
  52class Parser(metaclass=_Parser):
  53    """
  54    Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces
  55    a parsed syntax tree.
  56
  57    Args:
  58        error_level: the desired error level.
  59            Default: ErrorLevel.RAISE
  60        error_message_context: determines the amount of context to capture from a
  61            query string when displaying the error message (in number of characters).
  62            Default: 50.
  63        index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list.
  64            Default: 0
  65        alias_post_tablesample: If the table alias comes after tablesample.
  66            Default: False
  67        max_errors: Maximum number of error messages to include in a raised ParseError.
  68            This is only relevant if error_level is ErrorLevel.RAISE.
  69            Default: 3
  70        null_ordering: Indicates the default null ordering method to use if not explicitly set.
  71            Options are "nulls_are_small", "nulls_are_large", "nulls_are_last".
  72            Default: "nulls_are_small"
  73    """
  74
  75    FUNCTIONS: t.Dict[str, t.Callable] = {
  76        **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()},
  77        "DATE_TO_DATE_STR": lambda args: exp.Cast(
  78            this=seq_get(args, 0),
  79            to=exp.DataType(this=exp.DataType.Type.TEXT),
  80        ),
  81        "TIME_TO_TIME_STR": lambda args: exp.Cast(
  82            this=seq_get(args, 0),
  83            to=exp.DataType(this=exp.DataType.Type.TEXT),
  84        ),
  85        "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
  86            this=exp.Cast(
  87                this=seq_get(args, 0),
  88                to=exp.DataType(this=exp.DataType.Type.TEXT),
  89            ),
  90            start=exp.Literal.number(1),
  91            length=exp.Literal.number(10),
  92        ),
  93        "VAR_MAP": parse_var_map,
  94        "IFNULL": exp.Coalesce.from_arg_list,
  95    }
  96
  97    NO_PAREN_FUNCTIONS = {
  98        TokenType.CURRENT_DATE: exp.CurrentDate,
  99        TokenType.CURRENT_DATETIME: exp.CurrentDate,
 100        TokenType.CURRENT_TIME: exp.CurrentTime,
 101        TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
 102    }
 103
 104    NESTED_TYPE_TOKENS = {
 105        TokenType.ARRAY,
 106        TokenType.MAP,
 107        TokenType.STRUCT,
 108        TokenType.NULLABLE,
 109    }
 110
 111    TYPE_TOKENS = {
 112        TokenType.BIT,
 113        TokenType.BOOLEAN,
 114        TokenType.TINYINT,
 115        TokenType.SMALLINT,
 116        TokenType.INT,
 117        TokenType.BIGINT,
 118        TokenType.FLOAT,
 119        TokenType.DOUBLE,
 120        TokenType.CHAR,
 121        TokenType.NCHAR,
 122        TokenType.VARCHAR,
 123        TokenType.NVARCHAR,
 124        TokenType.TEXT,
 125        TokenType.MEDIUMTEXT,
 126        TokenType.LONGTEXT,
 127        TokenType.MEDIUMBLOB,
 128        TokenType.LONGBLOB,
 129        TokenType.BINARY,
 130        TokenType.VARBINARY,
 131        TokenType.JSON,
 132        TokenType.JSONB,
 133        TokenType.INTERVAL,
 134        TokenType.TIME,
 135        TokenType.TIMESTAMP,
 136        TokenType.TIMESTAMPTZ,
 137        TokenType.TIMESTAMPLTZ,
 138        TokenType.DATETIME,
 139        TokenType.DATE,
 140        TokenType.DECIMAL,
 141        TokenType.UUID,
 142        TokenType.GEOGRAPHY,
 143        TokenType.GEOMETRY,
 144        TokenType.HLLSKETCH,
 145        TokenType.HSTORE,
 146        TokenType.PSEUDO_TYPE,
 147        TokenType.SUPER,
 148        TokenType.SERIAL,
 149        TokenType.SMALLSERIAL,
 150        TokenType.BIGSERIAL,
 151        TokenType.XML,
 152        TokenType.UNIQUEIDENTIFIER,
 153        TokenType.MONEY,
 154        TokenType.SMALLMONEY,
 155        TokenType.ROWVERSION,
 156        TokenType.IMAGE,
 157        TokenType.VARIANT,
 158        TokenType.OBJECT,
 159        TokenType.INET,
 160        *NESTED_TYPE_TOKENS,
 161    }
 162
 163    SUBQUERY_PREDICATES = {
 164        TokenType.ANY: exp.Any,
 165        TokenType.ALL: exp.All,
 166        TokenType.EXISTS: exp.Exists,
 167        TokenType.SOME: exp.Any,
 168    }
 169
 170    RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT}
 171
 172    DB_CREATABLES = {
 173        TokenType.DATABASE,
 174        TokenType.SCHEMA,
 175        TokenType.TABLE,
 176        TokenType.VIEW,
 177    }
 178
 179    CREATABLES = {
 180        TokenType.COLUMN,
 181        TokenType.FUNCTION,
 182        TokenType.INDEX,
 183        TokenType.PROCEDURE,
 184        *DB_CREATABLES,
 185    }
 186
 187    ID_VAR_TOKENS = {
 188        TokenType.VAR,
 189        TokenType.ANTI,
 190        TokenType.APPLY,
 191        TokenType.AUTO_INCREMENT,
 192        TokenType.BEGIN,
 193        TokenType.BOTH,
 194        TokenType.BUCKET,
 195        TokenType.CACHE,
 196        TokenType.CASCADE,
 197        TokenType.COLLATE,
 198        TokenType.COMMAND,
 199        TokenType.COMMENT,
 200        TokenType.COMMIT,
 201        TokenType.COMPOUND,
 202        TokenType.CONSTRAINT,
 203        TokenType.DEFAULT,
 204        TokenType.DELETE,
 205        TokenType.DESCRIBE,
 206        TokenType.DIV,
 207        TokenType.END,
 208        TokenType.EXECUTE,
 209        TokenType.ESCAPE,
 210        TokenType.FALSE,
 211        TokenType.FIRST,
 212        TokenType.FILTER,
 213        TokenType.FOLLOWING,
 214        TokenType.FORMAT,
 215        TokenType.IF,
 216        TokenType.ISNULL,
 217        TokenType.INTERVAL,
 218        TokenType.LAZY,
 219        TokenType.LEADING,
 220        TokenType.LEFT,
 221        TokenType.LOCAL,
 222        TokenType.MATERIALIZED,
 223        TokenType.MERGE,
 224        TokenType.NATURAL,
 225        TokenType.NEXT,
 226        TokenType.OFFSET,
 227        TokenType.ONLY,
 228        TokenType.OPTIONS,
 229        TokenType.ORDINALITY,
 230        TokenType.PERCENT,
 231        TokenType.PIVOT,
 232        TokenType.PRECEDING,
 233        TokenType.RANGE,
 234        TokenType.REFERENCES,
 235        TokenType.RIGHT,
 236        TokenType.ROW,
 237        TokenType.ROWS,
 238        TokenType.SEED,
 239        TokenType.SEMI,
 240        TokenType.SET,
 241        TokenType.SHOW,
 242        TokenType.SORTKEY,
 243        TokenType.TEMPORARY,
 244        TokenType.TOP,
 245        TokenType.TRAILING,
 246        TokenType.TRUE,
 247        TokenType.UNBOUNDED,
 248        TokenType.UNIQUE,
 249        TokenType.UNLOGGED,
 250        TokenType.UNPIVOT,
 251        TokenType.VOLATILE,
 252        TokenType.WINDOW,
 253        *CREATABLES,
 254        *SUBQUERY_PREDICATES,
 255        *TYPE_TOKENS,
 256        *NO_PAREN_FUNCTIONS,
 257    }
 258
 259    TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {
 260        TokenType.APPLY,
 261        TokenType.LEFT,
 262        TokenType.NATURAL,
 263        TokenType.OFFSET,
 264        TokenType.RIGHT,
 265        TokenType.WINDOW,
 266    }
 267
 268    UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
 269
 270    TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH}
 271
 272    FUNC_TOKENS = {
 273        TokenType.COMMAND,
 274        TokenType.CURRENT_DATE,
 275        TokenType.CURRENT_DATETIME,
 276        TokenType.CURRENT_TIMESTAMP,
 277        TokenType.CURRENT_TIME,
 278        TokenType.FILTER,
 279        TokenType.FIRST,
 280        TokenType.FORMAT,
 281        TokenType.IDENTIFIER,
 282        TokenType.INDEX,
 283        TokenType.ISNULL,
 284        TokenType.ILIKE,
 285        TokenType.LIKE,
 286        TokenType.MERGE,
 287        TokenType.OFFSET,
 288        TokenType.PRIMARY_KEY,
 289        TokenType.REPLACE,
 290        TokenType.ROW,
 291        TokenType.UNNEST,
 292        TokenType.VAR,
 293        TokenType.LEFT,
 294        TokenType.RIGHT,
 295        TokenType.DATE,
 296        TokenType.DATETIME,
 297        TokenType.TABLE,
 298        TokenType.TIMESTAMP,
 299        TokenType.TIMESTAMPTZ,
 300        TokenType.WINDOW,
 301        *TYPE_TOKENS,
 302        *SUBQUERY_PREDICATES,
 303    }
 304
 305    CONJUNCTION = {
 306        TokenType.AND: exp.And,
 307        TokenType.OR: exp.Or,
 308    }
 309
 310    EQUALITY = {
 311        TokenType.EQ: exp.EQ,
 312        TokenType.NEQ: exp.NEQ,
 313        TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
 314    }
 315
 316    COMPARISON = {
 317        TokenType.GT: exp.GT,
 318        TokenType.GTE: exp.GTE,
 319        TokenType.LT: exp.LT,
 320        TokenType.LTE: exp.LTE,
 321    }
 322
 323    BITWISE = {
 324        TokenType.AMP: exp.BitwiseAnd,
 325        TokenType.CARET: exp.BitwiseXor,
 326        TokenType.PIPE: exp.BitwiseOr,
 327        TokenType.DPIPE: exp.DPipe,
 328    }
 329
 330    TERM = {
 331        TokenType.DASH: exp.Sub,
 332        TokenType.PLUS: exp.Add,
 333        TokenType.MOD: exp.Mod,
 334        TokenType.COLLATE: exp.Collate,
 335    }
 336
 337    FACTOR = {
 338        TokenType.DIV: exp.IntDiv,
 339        TokenType.LR_ARROW: exp.Distance,
 340        TokenType.SLASH: exp.Div,
 341        TokenType.STAR: exp.Mul,
 342    }
 343
 344    TIMESTAMPS = {
 345        TokenType.TIME,
 346        TokenType.TIMESTAMP,
 347        TokenType.TIMESTAMPTZ,
 348        TokenType.TIMESTAMPLTZ,
 349    }
 350
 351    SET_OPERATIONS = {
 352        TokenType.UNION,
 353        TokenType.INTERSECT,
 354        TokenType.EXCEPT,
 355    }
 356
 357    JOIN_SIDES = {
 358        TokenType.LEFT,
 359        TokenType.RIGHT,
 360        TokenType.FULL,
 361    }
 362
 363    JOIN_KINDS = {
 364        TokenType.INNER,
 365        TokenType.OUTER,
 366        TokenType.CROSS,
 367        TokenType.SEMI,
 368        TokenType.ANTI,
 369    }
 370
 371    LAMBDAS = {
 372        TokenType.ARROW: lambda self, expressions: self.expression(
 373            exp.Lambda,
 374            this=self._parse_conjunction().transform(
 375                self._replace_lambda, {node.name for node in expressions}
 376            ),
 377            expressions=expressions,
 378        ),
 379        TokenType.FARROW: lambda self, expressions: self.expression(
 380            exp.Kwarg,
 381            this=exp.Var(this=expressions[0].name),
 382            expression=self._parse_conjunction(),
 383        ),
 384    }
 385
 386    COLUMN_OPERATORS = {
 387        TokenType.DOT: None,
 388        TokenType.DCOLON: lambda self, this, to: self.expression(
 389            exp.Cast,
 390            this=this,
 391            to=to,
 392        ),
 393        TokenType.ARROW: lambda self, this, path: self.expression(
 394            exp.JSONExtract,
 395            this=this,
 396            expression=path,
 397        ),
 398        TokenType.DARROW: lambda self, this, path: self.expression(
 399            exp.JSONExtractScalar,
 400            this=this,
 401            expression=path,
 402        ),
 403        TokenType.HASH_ARROW: lambda self, this, path: self.expression(
 404            exp.JSONBExtract,
 405            this=this,
 406            expression=path,
 407        ),
 408        TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
 409            exp.JSONBExtractScalar,
 410            this=this,
 411            expression=path,
 412        ),
 413        TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
 414            exp.JSONBContains,
 415            this=this,
 416            expression=key,
 417        ),
 418    }
 419
 420    EXPRESSION_PARSERS = {
 421        exp.Column: lambda self: self._parse_column(),
 422        exp.DataType: lambda self: self._parse_types(),
 423        exp.From: lambda self: self._parse_from(),
 424        exp.Group: lambda self: self._parse_group(),
 425        exp.Identifier: lambda self: self._parse_id_var(),
 426        exp.Lateral: lambda self: self._parse_lateral(),
 427        exp.Join: lambda self: self._parse_join(),
 428        exp.Order: lambda self: self._parse_order(),
 429        exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
 430        exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
 431        exp.Lambda: lambda self: self._parse_lambda(),
 432        exp.Limit: lambda self: self._parse_limit(),
 433        exp.Offset: lambda self: self._parse_offset(),
 434        exp.TableAlias: lambda self: self._parse_table_alias(),
 435        exp.Table: lambda self: self._parse_table(),
 436        exp.Condition: lambda self: self._parse_conjunction(),
 437        exp.Expression: lambda self: self._parse_statement(),
 438        exp.Properties: lambda self: self._parse_properties(),
 439        exp.Where: lambda self: self._parse_where(),
 440        exp.Ordered: lambda self: self._parse_ordered(),
 441        exp.Having: lambda self: self._parse_having(),
 442        exp.With: lambda self: self._parse_with(),
 443        exp.Window: lambda self: self._parse_named_window(),
 444        exp.Qualify: lambda self: self._parse_qualify(),
 445        exp.Returning: lambda self: self._parse_returning(),
 446        "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(),
 447    }
 448
 449    STATEMENT_PARSERS = {
 450        TokenType.ALTER: lambda self: self._parse_alter(),
 451        TokenType.BEGIN: lambda self: self._parse_transaction(),
 452        TokenType.CACHE: lambda self: self._parse_cache(),
 453        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
 454        TokenType.COMMENT: lambda self: self._parse_comment(),
 455        TokenType.CREATE: lambda self: self._parse_create(),
 456        TokenType.DELETE: lambda self: self._parse_delete(),
 457        TokenType.DESC: lambda self: self._parse_describe(),
 458        TokenType.DESCRIBE: lambda self: self._parse_describe(),
 459        TokenType.DROP: lambda self: self._parse_drop(),
 460        TokenType.END: lambda self: self._parse_commit_or_rollback(),
 461        TokenType.INSERT: lambda self: self._parse_insert(),
 462        TokenType.LOAD_DATA: lambda self: self._parse_load_data(),
 463        TokenType.MERGE: lambda self: self._parse_merge(),
 464        TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
 465        TokenType.SET: lambda self: self._parse_set(),
 466        TokenType.UNCACHE: lambda self: self._parse_uncache(),
 467        TokenType.UPDATE: lambda self: self._parse_update(),
 468        TokenType.USE: lambda self: self.expression(
 469            exp.Use,
 470            kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"))
 471            and exp.Var(this=self._prev.text),
 472            this=self._parse_table(schema=False),
 473        ),
 474    }
 475
 476    UNARY_PARSERS = {
 477        TokenType.PLUS: lambda self: self._parse_unary(),  # Unary + is handled as a no-op
 478        TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()),
 479        TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()),
 480        TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()),
 481    }
 482
 483    PRIMARY_PARSERS = {
 484        TokenType.STRING: lambda self, token: self.expression(
 485            exp.Literal, this=token.text, is_string=True
 486        ),
 487        TokenType.NUMBER: lambda self, token: self.expression(
 488            exp.Literal, this=token.text, is_string=False
 489        ),
 490        TokenType.STAR: lambda self, _: self.expression(
 491            exp.Star,
 492            **{"except": self._parse_except(), "replace": self._parse_replace()},
 493        ),
 494        TokenType.NULL: lambda self, _: self.expression(exp.Null),
 495        TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True),
 496        TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False),
 497        TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text),
 498        TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text),
 499        TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text),
 500        TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
 501        TokenType.NATIONAL: lambda self, token: self._parse_national(token),
 502        TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
 503    }
 504
 505    PLACEHOLDER_PARSERS = {
 506        TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
 507        TokenType.PARAMETER: lambda self: self._parse_parameter(),
 508        TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text)
 509        if self._match_set((TokenType.NUMBER, TokenType.VAR))
 510        else None,
 511    }
 512
 513    RANGE_PARSERS = {
 514        TokenType.BETWEEN: lambda self, this: self._parse_between(this),
 515        TokenType.GLOB: binary_range_parser(exp.Glob),
 516        TokenType.OVERLAPS: binary_range_parser(exp.Overlaps),
 517        TokenType.IN: lambda self, this: self._parse_in(this),
 518        TokenType.IS: lambda self, this: self._parse_is(this),
 519        TokenType.LIKE: binary_range_parser(exp.Like),
 520        TokenType.ILIKE: binary_range_parser(exp.ILike),
 521        TokenType.IRLIKE: binary_range_parser(exp.RegexpILike),
 522        TokenType.RLIKE: binary_range_parser(exp.RegexpLike),
 523        TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo),
 524    }
 525
 526    PROPERTY_PARSERS = {
 527        "AFTER": lambda self: self._parse_afterjournal(
 528            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 529        ),
 530        "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
 531        "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
 532        "BEFORE": lambda self: self._parse_journal(
 533            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 534        ),
 535        "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
 536        "CHARACTER SET": lambda self: self._parse_character_set(),
 537        "CHECKSUM": lambda self: self._parse_checksum(),
 538        "CLUSTER BY": lambda self: self.expression(
 539            exp.Cluster, expressions=self._parse_csv(self._parse_ordered)
 540        ),
 541        "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty),
 542        "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
 543        "DATABLOCKSIZE": lambda self: self._parse_datablocksize(
 544            default=self._prev.text.upper() == "DEFAULT"
 545        ),
 546        "DEFINER": lambda self: self._parse_definer(),
 547        "DETERMINISTIC": lambda self: self.expression(
 548            exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE")
 549        ),
 550        "DISTKEY": lambda self: self._parse_distkey(),
 551        "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
 552        "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
 553        "EXTERNAL": lambda self: self.expression(exp.ExternalProperty),
 554        "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"),
 555        "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 556        "FREESPACE": lambda self: self._parse_freespace(),
 557        "GLOBAL": lambda self: self._parse_temporary(global_=True),
 558        "IMMUTABLE": lambda self: self.expression(
 559            exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE")
 560        ),
 561        "JOURNAL": lambda self: self._parse_journal(
 562            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 563        ),
 564        "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
 565        "LIKE": lambda self: self._parse_create_like(),
 566        "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True),
 567        "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
 568        "LOCK": lambda self: self._parse_locking(),
 569        "LOCKING": lambda self: self._parse_locking(),
 570        "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"),
 571        "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty),
 572        "MAX": lambda self: self._parse_datablocksize(),
 573        "MAXIMUM": lambda self: self._parse_datablocksize(),
 574        "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio(
 575            no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT"
 576        ),
 577        "MIN": lambda self: self._parse_datablocksize(),
 578        "MINIMUM": lambda self: self._parse_datablocksize(),
 579        "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True),
 580        "NO": lambda self: self._parse_noprimaryindex(),
 581        "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False),
 582        "ON": lambda self: self._parse_oncommit(),
 583        "PARTITION BY": lambda self: self._parse_partitioned_by(),
 584        "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
 585        "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
 586        "RETURNS": lambda self: self._parse_returns(),
 587        "ROW": lambda self: self._parse_row(),
 588        "SET": lambda self: self.expression(exp.SetProperty, multi=False),
 589        "SORTKEY": lambda self: self._parse_sortkey(),
 590        "STABLE": lambda self: self.expression(
 591            exp.VolatilityProperty, this=exp.Literal.string("STABLE")
 592        ),
 593        "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 594        "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty),
 595        "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property),
 596        "TEMPORARY": lambda self: self._parse_temporary(global_=False),
 597        "TRANSIENT": lambda self: self.expression(exp.TransientProperty),
 598        "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty),
 599        "VOLATILE": lambda self: self.expression(
 600            exp.VolatilityProperty, this=exp.Literal.string("VOLATILE")
 601        ),
 602        "WITH": lambda self: self._parse_with_property(),
 603    }
 604
 605    CONSTRAINT_PARSERS = {
 606        "AUTOINCREMENT": lambda self: self._parse_auto_increment(),
 607        "AUTO_INCREMENT": lambda self: self._parse_auto_increment(),
 608        "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False),
 609        "CHARACTER SET": lambda self: self.expression(
 610            exp.CharacterSetColumnConstraint, this=self._parse_var_or_string()
 611        ),
 612        "CHECK": lambda self: self.expression(
 613            exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction)
 614        ),
 615        "COLLATE": lambda self: self.expression(
 616            exp.CollateColumnConstraint, this=self._parse_var()
 617        ),
 618        "COMMENT": lambda self: self.expression(
 619            exp.CommentColumnConstraint, this=self._parse_string()
 620        ),
 621        "COMPRESS": lambda self: self._parse_compress(),
 622        "DEFAULT": lambda self: self.expression(
 623            exp.DefaultColumnConstraint, this=self._parse_bitwise()
 624        ),
 625        "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()),
 626        "FOREIGN KEY": lambda self: self._parse_foreign_key(),
 627        "FORMAT": lambda self: self.expression(
 628            exp.DateFormatColumnConstraint, this=self._parse_var_or_string()
 629        ),
 630        "GENERATED": lambda self: self._parse_generated_as_identity(),
 631        "IDENTITY": lambda self: self._parse_auto_increment(),
 632        "INLINE": lambda self: self._parse_inline(),
 633        "LIKE": lambda self: self._parse_create_like(),
 634        "NOT": lambda self: self._parse_not_constraint(),
 635        "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True),
 636        "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()),
 637        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 638        "TITLE": lambda self: self.expression(
 639            exp.TitleColumnConstraint, this=self._parse_var_or_string()
 640        ),
 641        "UNIQUE": lambda self: self._parse_unique(),
 642        "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint),
 643    }
 644
 645    ALTER_PARSERS = {
 646        "ADD": lambda self: self._parse_alter_table_add(),
 647        "ALTER": lambda self: self._parse_alter_table_alter(),
 648        "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()),
 649        "DROP": lambda self: self._parse_alter_table_drop(),
 650        "RENAME": lambda self: self._parse_alter_table_rename(),
 651    }
 652
 653    SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"}
 654
 655    NO_PAREN_FUNCTION_PARSERS = {
 656        TokenType.CASE: lambda self: self._parse_case(),
 657        TokenType.IF: lambda self: self._parse_if(),
 658        TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()),
 659    }
 660
 661    FUNCTION_PARSERS: t.Dict[str, t.Callable] = {
 662        "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
 663        "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
 664        "EXTRACT": lambda self: self._parse_extract(),
 665        "POSITION": lambda self: self._parse_position(),
 666        "STRING_AGG": lambda self: self._parse_string_agg(),
 667        "SUBSTRING": lambda self: self._parse_substring(),
 668        "TRIM": lambda self: self._parse_trim(),
 669        "TRY_CAST": lambda self: self._parse_cast(False),
 670        "TRY_CONVERT": lambda self: self._parse_convert(False),
 671    }
 672
 673    QUERY_MODIFIER_PARSERS = {
 674        "match": lambda self: self._parse_match_recognize(),
 675        "where": lambda self: self._parse_where(),
 676        "group": lambda self: self._parse_group(),
 677        "having": lambda self: self._parse_having(),
 678        "qualify": lambda self: self._parse_qualify(),
 679        "windows": lambda self: self._parse_window_clause(),
 680        "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute),
 681        "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
 682        "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
 683        "order": lambda self: self._parse_order(),
 684        "limit": lambda self: self._parse_limit(),
 685        "offset": lambda self: self._parse_offset(),
 686        "lock": lambda self: self._parse_lock(),
 687        "sample": lambda self: self._parse_table_sample(as_modifier=True),
 688    }
 689
 690    SET_PARSERS = {
 691        "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"),
 692        "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"),
 693        "SESSION": lambda self: self._parse_set_item_assignment("SESSION"),
 694        "TRANSACTION": lambda self: self._parse_set_transaction(),
 695    }
 696
 697    SHOW_PARSERS: t.Dict[str, t.Callable] = {}
 698
 699    MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
 700
 701    TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
 702
 703    TRANSACTION_CHARACTERISTICS = {
 704        "ISOLATION LEVEL REPEATABLE READ",
 705        "ISOLATION LEVEL READ COMMITTED",
 706        "ISOLATION LEVEL READ UNCOMMITTED",
 707        "ISOLATION LEVEL SERIALIZABLE",
 708        "READ WRITE",
 709        "READ ONLY",
 710    }
 711
 712    INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
 713
 714    WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
 715
 716    ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY}
 717
 718    STRICT_CAST = True
 719
 720    CONVERT_TYPE_FIRST = False
 721
 722    __slots__ = (
 723        "error_level",
 724        "error_message_context",
 725        "sql",
 726        "errors",
 727        "index_offset",
 728        "unnest_column_only",
 729        "alias_post_tablesample",
 730        "max_errors",
 731        "null_ordering",
 732        "_tokens",
 733        "_index",
 734        "_curr",
 735        "_next",
 736        "_prev",
 737        "_prev_comments",
 738        "_show_trie",
 739        "_set_trie",
 740    )
 741
 742    def __init__(
 743        self,
 744        error_level: t.Optional[ErrorLevel] = None,
 745        error_message_context: int = 100,
 746        index_offset: int = 0,
 747        unnest_column_only: bool = False,
 748        alias_post_tablesample: bool = False,
 749        max_errors: int = 3,
 750        null_ordering: t.Optional[str] = None,
 751    ):
 752        self.error_level = error_level or ErrorLevel.IMMEDIATE
 753        self.error_message_context = error_message_context
 754        self.index_offset = index_offset
 755        self.unnest_column_only = unnest_column_only
 756        self.alias_post_tablesample = alias_post_tablesample
 757        self.max_errors = max_errors
 758        self.null_ordering = null_ordering
 759        self.reset()
 760
 761    def reset(self):
 762        self.sql = ""
 763        self.errors = []
 764        self._tokens = []
 765        self._index = 0
 766        self._curr = None
 767        self._next = None
 768        self._prev = None
 769        self._prev_comments = None
 770
 771    def parse(
 772        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
 773    ) -> t.List[t.Optional[exp.Expression]]:
 774        """
 775        Parses a list of tokens and returns a list of syntax trees, one tree
 776        per parsed SQL statement.
 777
 778        Args:
 779            raw_tokens: the list of tokens.
 780            sql: the original SQL string, used to produce helpful debug messages.
 781
 782        Returns:
 783            The list of syntax trees.
 784        """
 785        return self._parse(
 786            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
 787        )
 788
 789    def parse_into(
 790        self,
 791        expression_types: exp.IntoType,
 792        raw_tokens: t.List[Token],
 793        sql: t.Optional[str] = None,
 794    ) -> t.List[t.Optional[exp.Expression]]:
 795        """
 796        Parses a list of tokens into a given Expression type. If a collection of Expression
 797        types is given instead, this method will try to parse the token list into each one
 798        of them, stopping at the first for which the parsing succeeds.
 799
 800        Args:
 801            expression_types: the expression type(s) to try and parse the token list into.
 802            raw_tokens: the list of tokens.
 803            sql: the original SQL string, used to produce helpful debug messages.
 804
 805        Returns:
 806            The target Expression.
 807        """
 808        errors = []
 809        for expression_type in ensure_collection(expression_types):
 810            parser = self.EXPRESSION_PARSERS.get(expression_type)
 811            if not parser:
 812                raise TypeError(f"No parser registered for {expression_type}")
 813            try:
 814                return self._parse(parser, raw_tokens, sql)
 815            except ParseError as e:
 816                e.errors[0]["into_expression"] = expression_type
 817                errors.append(e)
 818        raise ParseError(
 819            f"Failed to parse into {expression_types}",
 820            errors=merge_errors(errors),
 821        ) from errors[-1]
 822
 823    def _parse(
 824        self,
 825        parse_method: t.Callable[[Parser], t.Optional[exp.Expression]],
 826        raw_tokens: t.List[Token],
 827        sql: t.Optional[str] = None,
 828    ) -> t.List[t.Optional[exp.Expression]]:
 829        self.reset()
 830        self.sql = sql or ""
 831        total = len(raw_tokens)
 832        chunks: t.List[t.List[Token]] = [[]]
 833
 834        for i, token in enumerate(raw_tokens):
 835            if token.token_type == TokenType.SEMICOLON:
 836                if i < total - 1:
 837                    chunks.append([])
 838            else:
 839                chunks[-1].append(token)
 840
 841        expressions = []
 842
 843        for tokens in chunks:
 844            self._index = -1
 845            self._tokens = tokens
 846            self._advance()
 847
 848            expressions.append(parse_method(self))
 849
 850            if self._index < len(self._tokens):
 851                self.raise_error("Invalid expression / Unexpected token")
 852
 853            self.check_errors()
 854
 855        return expressions
 856
 857    def check_errors(self) -> None:
 858        """
 859        Logs or raises any found errors, depending on the chosen error level setting.
 860        """
 861        if self.error_level == ErrorLevel.WARN:
 862            for error in self.errors:
 863                logger.error(str(error))
 864        elif self.error_level == ErrorLevel.RAISE and self.errors:
 865            raise ParseError(
 866                concat_messages(self.errors, self.max_errors),
 867                errors=merge_errors(self.errors),
 868            )
 869
 870    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
 871        """
 872        Appends an error in the list of recorded errors or raises it, depending on the chosen
 873        error level setting.
 874        """
 875        token = token or self._curr or self._prev or Token.string("")
 876        start = self._find_token(token)
 877        end = start + len(token.text)
 878        start_context = self.sql[max(start - self.error_message_context, 0) : start]
 879        highlight = self.sql[start:end]
 880        end_context = self.sql[end : end + self.error_message_context]
 881
 882        error = ParseError.new(
 883            f"{message}. Line {token.line}, Col: {token.col}.\n"
 884            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
 885            description=message,
 886            line=token.line,
 887            col=token.col,
 888            start_context=start_context,
 889            highlight=highlight,
 890            end_context=end_context,
 891        )
 892
 893        if self.error_level == ErrorLevel.IMMEDIATE:
 894            raise error
 895
 896        self.errors.append(error)
 897
 898    def expression(
 899        self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs
 900    ) -> exp.Expression:
 901        """
 902        Creates a new, validated Expression.
 903
 904        Args:
 905            exp_class: the expression class to instantiate.
 906            comments: an optional list of comments to attach to the expression.
 907            kwargs: the arguments to set for the expression along with their respective values.
 908
 909        Returns:
 910            The target expression.
 911        """
 912        instance = exp_class(**kwargs)
 913        if self._prev_comments:
 914            instance.comments = self._prev_comments
 915            self._prev_comments = None
 916        if comments:
 917            instance.comments = comments
 918        self.validate_expression(instance)
 919        return instance
 920
 921    def validate_expression(
 922        self, expression: exp.Expression, args: t.Optional[t.List] = None
 923    ) -> None:
 924        """
 925        Validates an already instantiated expression, making sure that all its mandatory arguments
 926        are set.
 927
 928        Args:
 929            expression: the expression to validate.
 930            args: an optional list of items that was used to instantiate the expression, if it's a Func.
 931        """
 932        if self.error_level == ErrorLevel.IGNORE:
 933            return
 934
 935        for error_message in expression.error_messages(args):
 936            self.raise_error(error_message)
 937
 938    def _find_sql(self, start: Token, end: Token) -> str:
 939        return self.sql[self._find_token(start) : self._find_token(end) + len(end.text)]
 940
 941    def _find_token(self, token: Token) -> int:
 942        line = 1
 943        col = 1
 944        index = 0
 945
 946        while line < token.line or col < token.col:
 947            if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK:
 948                line += 1
 949                col = 1
 950            else:
 951                col += 1
 952            index += 1
 953
 954        return index
 955
 956    def _advance(self, times: int = 1) -> None:
 957        self._index += times
 958        self._curr = seq_get(self._tokens, self._index)
 959        self._next = seq_get(self._tokens, self._index + 1)
 960        if self._index > 0:
 961            self._prev = self._tokens[self._index - 1]
 962            self._prev_comments = self._prev.comments
 963        else:
 964            self._prev = None
 965            self._prev_comments = None
 966
 967    def _retreat(self, index: int) -> None:
 968        if index != self._index:
 969            self._advance(index - self._index)
 970
 971    def _parse_command(self) -> exp.Expression:
 972        return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string())
 973
 974    def _parse_comment(self, allow_exists: bool = True) -> exp.Expression:
 975        start = self._prev
 976        exists = self._parse_exists() if allow_exists else None
 977
 978        self._match(TokenType.ON)
 979
 980        kind = self._match_set(self.CREATABLES) and self._prev
 981
 982        if not kind:
 983            return self._parse_as_command(start)
 984
 985        if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
 986            this = self._parse_user_defined_function(kind=kind.token_type)
 987        elif kind.token_type == TokenType.TABLE:
 988            this = self._parse_table()
 989        elif kind.token_type == TokenType.COLUMN:
 990            this = self._parse_column()
 991        else:
 992            this = self._parse_id_var()
 993
 994        self._match(TokenType.IS)
 995
 996        return self.expression(
 997            exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists
 998        )
 999
1000    def _parse_statement(self) -> t.Optional[exp.Expression]:
1001        if self._curr is None:
1002            return None
1003
1004        if self._match_set(self.STATEMENT_PARSERS):
1005            return self.STATEMENT_PARSERS[self._prev.token_type](self)
1006
1007        if self._match_set(Tokenizer.COMMANDS):
1008            return self._parse_command()
1009
1010        expression = self._parse_expression()
1011        expression = self._parse_set_operations(expression) if expression else self._parse_select()
1012
1013        self._parse_query_modifiers(expression)
1014        return expression
1015
1016    def _parse_drop(self, default_kind: t.Optional[str] = None) -> t.Optional[exp.Expression]:
1017        start = self._prev
1018        temporary = self._match(TokenType.TEMPORARY)
1019        materialized = self._match(TokenType.MATERIALIZED)
1020        kind = self._match_set(self.CREATABLES) and self._prev.text
1021        if not kind:
1022            if default_kind:
1023                kind = default_kind
1024            else:
1025                return self._parse_as_command(start)
1026
1027        return self.expression(
1028            exp.Drop,
1029            exists=self._parse_exists(),
1030            this=self._parse_table(schema=True),
1031            kind=kind,
1032            temporary=temporary,
1033            materialized=materialized,
1034            cascade=self._match(TokenType.CASCADE),
1035        )
1036
1037    def _parse_exists(self, not_: bool = False) -> t.Optional[bool]:
1038        return (
1039            self._match(TokenType.IF)
1040            and (not not_ or self._match(TokenType.NOT))
1041            and self._match(TokenType.EXISTS)
1042        )
1043
1044    def _parse_create(self) -> t.Optional[exp.Expression]:
1045        start = self._prev
1046        replace = self._prev.text.upper() == "REPLACE" or self._match_pair(
1047            TokenType.OR, TokenType.REPLACE
1048        )
1049        unique = self._match(TokenType.UNIQUE)
1050        volatile = self._match(TokenType.VOLATILE)
1051
1052        if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
1053            self._match(TokenType.TABLE)
1054
1055        properties = None
1056        create_token = self._match_set(self.CREATABLES) and self._prev
1057
1058        if not create_token:
1059            properties = self._parse_properties()  # exp.Properties.Location.POST_CREATE
1060            create_token = self._match_set(self.CREATABLES) and self._prev
1061
1062            if not properties or not create_token:
1063                return self._parse_as_command(start)
1064
1065        exists = self._parse_exists(not_=True)
1066        this = None
1067        expression = None
1068        indexes = None
1069        no_schema_binding = None
1070        begin = None
1071
1072        if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1073            this = self._parse_user_defined_function(kind=create_token.token_type)
1074            temp_properties = self._parse_properties()
1075            if properties and temp_properties:
1076                properties.expressions.extend(temp_properties.expressions)
1077            elif temp_properties:
1078                properties = temp_properties
1079
1080            self._match(TokenType.ALIAS)
1081            begin = self._match(TokenType.BEGIN)
1082            return_ = self._match_text_seq("RETURN")
1083            expression = self._parse_statement()
1084
1085            if return_:
1086                expression = self.expression(exp.Return, this=expression)
1087        elif create_token.token_type == TokenType.INDEX:
1088            this = self._parse_index()
1089        elif create_token.token_type in self.DB_CREATABLES:
1090            table_parts = self._parse_table_parts(schema=True)
1091
1092            # exp.Properties.Location.POST_NAME
1093            if self._match(TokenType.COMMA):
1094                temp_properties = self._parse_properties(before=True)
1095                if properties and temp_properties:
1096                    properties.expressions.extend(temp_properties.expressions)
1097                elif temp_properties:
1098                    properties = temp_properties
1099
1100            this = self._parse_schema(this=table_parts)
1101
1102            # exp.Properties.Location.POST_SCHEMA and POST_WITH
1103            temp_properties = self._parse_properties()
1104            if properties and temp_properties:
1105                properties.expressions.extend(temp_properties.expressions)
1106            elif temp_properties:
1107                properties = temp_properties
1108
1109            self._match(TokenType.ALIAS)
1110
1111            # exp.Properties.Location.POST_ALIAS
1112            if not (
1113                self._match(TokenType.SELECT, advance=False)
1114                or self._match(TokenType.WITH, advance=False)
1115                or self._match(TokenType.L_PAREN, advance=False)
1116            ):
1117                temp_properties = self._parse_properties()
1118                if properties and temp_properties:
1119                    properties.expressions.extend(temp_properties.expressions)
1120                elif temp_properties:
1121                    properties = temp_properties
1122
1123            expression = self._parse_ddl_select()
1124
1125            if create_token.token_type == TokenType.TABLE:
1126                # exp.Properties.Location.POST_EXPRESSION
1127                temp_properties = self._parse_properties()
1128                if properties and temp_properties:
1129                    properties.expressions.extend(temp_properties.expressions)
1130                elif temp_properties:
1131                    properties = temp_properties
1132
1133                indexes = []
1134                while True:
1135                    index = self._parse_create_table_index()
1136
1137                    # exp.Properties.Location.POST_INDEX
1138                    if self._match(TokenType.PARTITION_BY, advance=False):
1139                        temp_properties = self._parse_properties()
1140                        if properties and temp_properties:
1141                            properties.expressions.extend(temp_properties.expressions)
1142                        elif temp_properties:
1143                            properties = temp_properties
1144
1145                    if not index:
1146                        break
1147                    else:
1148                        indexes.append(index)
1149            elif create_token.token_type == TokenType.VIEW:
1150                if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
1151                    no_schema_binding = True
1152
1153        return self.expression(
1154            exp.Create,
1155            this=this,
1156            kind=create_token.text,
1157            replace=replace,
1158            unique=unique,
1159            volatile=volatile,
1160            expression=expression,
1161            exists=exists,
1162            properties=properties,
1163            indexes=indexes,
1164            no_schema_binding=no_schema_binding,
1165            begin=begin,
1166        )
1167
1168    def _parse_property_before(self) -> t.Optional[exp.Expression]:
1169        self._match(TokenType.COMMA)
1170
1171        # parsers look to _prev for no/dual/default, so need to consume first
1172        self._match_text_seq("NO")
1173        self._match_text_seq("DUAL")
1174        self._match_text_seq("DEFAULT")
1175
1176        if self.PROPERTY_PARSERS.get(self._curr.text.upper()):
1177            return self.PROPERTY_PARSERS[self._curr.text.upper()](self)
1178
1179        return None
1180
1181    def _parse_property(self) -> t.Optional[exp.Expression]:
1182        if self._match_texts(self.PROPERTY_PARSERS):
1183            return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
1184
1185        if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET):
1186            return self._parse_character_set(default=True)
1187
1188        if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY):
1189            return self._parse_sortkey(compound=True)
1190
1191        if self._match_text_seq("SQL", "SECURITY"):
1192            return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER"))
1193
1194        assignment = self._match_pair(
1195            TokenType.VAR, TokenType.EQ, advance=False
1196        ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False)
1197
1198        if assignment:
1199            key = self._parse_var_or_string()
1200            self._match(TokenType.EQ)
1201            return self.expression(exp.Property, this=key, value=self._parse_column())
1202
1203        return None
1204
1205    def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression:
1206        self._match(TokenType.EQ)
1207        self._match(TokenType.ALIAS)
1208        return self.expression(
1209            exp_class,
1210            this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
1211        )
1212
1213    def _parse_properties(self, before=None) -> t.Optional[exp.Expression]:
1214        properties = []
1215
1216        while True:
1217            if before:
1218                identified_property = self._parse_property_before()
1219            else:
1220                identified_property = self._parse_property()
1221
1222            if not identified_property:
1223                break
1224            for p in ensure_collection(identified_property):
1225                properties.append(p)
1226
1227        if properties:
1228            return self.expression(exp.Properties, expressions=properties)
1229
1230        return None
1231
1232    def _parse_fallback(self, no=False) -> exp.Expression:
1233        self._match_text_seq("FALLBACK")
1234        return self.expression(
1235            exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
1236        )
1237
1238    def _parse_with_property(
1239        self,
1240    ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]:
1241        self._match(TokenType.WITH)
1242        if self._match(TokenType.L_PAREN, advance=False):
1243            return self._parse_wrapped_csv(self._parse_property)
1244
1245        if self._match_text_seq("JOURNAL"):
1246            return self._parse_withjournaltable()
1247
1248        if self._match_text_seq("DATA"):
1249            return self._parse_withdata(no=False)
1250        elif self._match_text_seq("NO", "DATA"):
1251            return self._parse_withdata(no=True)
1252
1253        if not self._next:
1254            return None
1255
1256        return self._parse_withisolatedloading()
1257
1258    # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
1259    def _parse_definer(self) -> t.Optional[exp.Expression]:
1260        self._match(TokenType.EQ)
1261
1262        user = self._parse_id_var()
1263        self._match(TokenType.PARAMETER)
1264        host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
1265
1266        if not user or not host:
1267            return None
1268
1269        return exp.DefinerProperty(this=f"{user}@{host}")
1270
1271    def _parse_withjournaltable(self) -> exp.Expression:
1272        self._match(TokenType.TABLE)
1273        self._match(TokenType.EQ)
1274        return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
1275
1276    def _parse_log(self, no=False) -> exp.Expression:
1277        self._match_text_seq("LOG")
1278        return self.expression(exp.LogProperty, no=no)
1279
1280    def _parse_journal(self, no=False, dual=False) -> exp.Expression:
1281        before = self._match_text_seq("BEFORE")
1282        self._match_text_seq("JOURNAL")
1283        return self.expression(exp.JournalProperty, no=no, dual=dual, before=before)
1284
1285    def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression:
1286        self._match_text_seq("NOT")
1287        self._match_text_seq("LOCAL")
1288        self._match_text_seq("AFTER", "JOURNAL")
1289        return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local)
1290
1291    def _parse_checksum(self) -> exp.Expression:
1292        self._match_text_seq("CHECKSUM")
1293        self._match(TokenType.EQ)
1294
1295        on = None
1296        if self._match(TokenType.ON):
1297            on = True
1298        elif self._match_text_seq("OFF"):
1299            on = False
1300        default = self._match(TokenType.DEFAULT)
1301
1302        return self.expression(
1303            exp.ChecksumProperty,
1304            on=on,
1305            default=default,
1306        )
1307
1308    def _parse_freespace(self) -> exp.Expression:
1309        self._match_text_seq("FREESPACE")
1310        self._match(TokenType.EQ)
1311        return self.expression(
1312            exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
1313        )
1314
1315    def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression:
1316        self._match_text_seq("MERGEBLOCKRATIO")
1317        if self._match(TokenType.EQ):
1318            return self.expression(
1319                exp.MergeBlockRatioProperty,
1320                this=self._parse_number(),
1321                percent=self._match(TokenType.PERCENT),
1322            )
1323        else:
1324            return self.expression(
1325                exp.MergeBlockRatioProperty,
1326                no=no,
1327                default=default,
1328            )
1329
1330    def _parse_datablocksize(self, default=None) -> exp.Expression:
1331        if default:
1332            self._match_text_seq("DATABLOCKSIZE")
1333            return self.expression(exp.DataBlocksizeProperty, default=True)
1334        elif self._match_texts(("MIN", "MINIMUM")):
1335            self._match_text_seq("DATABLOCKSIZE")
1336            return self.expression(exp.DataBlocksizeProperty, min=True)
1337        elif self._match_texts(("MAX", "MAXIMUM")):
1338            self._match_text_seq("DATABLOCKSIZE")
1339            return self.expression(exp.DataBlocksizeProperty, min=False)
1340
1341        self._match_text_seq("DATABLOCKSIZE")
1342        self._match(TokenType.EQ)
1343        size = self._parse_number()
1344        units = None
1345        if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
1346            units = self._prev.text
1347        return self.expression(exp.DataBlocksizeProperty, size=size, units=units)
1348
1349    def _parse_blockcompression(self) -> exp.Expression:
1350        self._match_text_seq("BLOCKCOMPRESSION")
1351        self._match(TokenType.EQ)
1352        always = self._match_text_seq("ALWAYS")
1353        manual = self._match_text_seq("MANUAL")
1354        never = self._match_text_seq("NEVER")
1355        default = self._match_text_seq("DEFAULT")
1356        autotemp = None
1357        if self._match_text_seq("AUTOTEMP"):
1358            autotemp = self._parse_schema()
1359
1360        return self.expression(
1361            exp.BlockCompressionProperty,
1362            always=always,
1363            manual=manual,
1364            never=never,
1365            default=default,
1366            autotemp=autotemp,
1367        )
1368
1369    def _parse_withisolatedloading(self) -> exp.Expression:
1370        no = self._match_text_seq("NO")
1371        concurrent = self._match_text_seq("CONCURRENT")
1372        self._match_text_seq("ISOLATED", "LOADING")
1373        for_all = self._match_text_seq("FOR", "ALL")
1374        for_insert = self._match_text_seq("FOR", "INSERT")
1375        for_none = self._match_text_seq("FOR", "NONE")
1376        return self.expression(
1377            exp.IsolatedLoadingProperty,
1378            no=no,
1379            concurrent=concurrent,
1380            for_all=for_all,
1381            for_insert=for_insert,
1382            for_none=for_none,
1383        )
1384
1385    def _parse_locking(self) -> exp.Expression:
1386        if self._match(TokenType.TABLE):
1387            kind = "TABLE"
1388        elif self._match(TokenType.VIEW):
1389            kind = "VIEW"
1390        elif self._match(TokenType.ROW):
1391            kind = "ROW"
1392        elif self._match_text_seq("DATABASE"):
1393            kind = "DATABASE"
1394        else:
1395            kind = None
1396
1397        if kind in ("DATABASE", "TABLE", "VIEW"):
1398            this = self._parse_table_parts()
1399        else:
1400            this = None
1401
1402        if self._match(TokenType.FOR):
1403            for_or_in = "FOR"
1404        elif self._match(TokenType.IN):
1405            for_or_in = "IN"
1406        else:
1407            for_or_in = None
1408
1409        if self._match_text_seq("ACCESS"):
1410            lock_type = "ACCESS"
1411        elif self._match_texts(("EXCL", "EXCLUSIVE")):
1412            lock_type = "EXCLUSIVE"
1413        elif self._match_text_seq("SHARE"):
1414            lock_type = "SHARE"
1415        elif self._match_text_seq("READ"):
1416            lock_type = "READ"
1417        elif self._match_text_seq("WRITE"):
1418            lock_type = "WRITE"
1419        elif self._match_text_seq("CHECKSUM"):
1420            lock_type = "CHECKSUM"
1421        else:
1422            lock_type = None
1423
1424        override = self._match_text_seq("OVERRIDE")
1425
1426        return self.expression(
1427            exp.LockingProperty,
1428            this=this,
1429            kind=kind,
1430            for_or_in=for_or_in,
1431            lock_type=lock_type,
1432            override=override,
1433        )
1434
1435    def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]:
1436        if self._match(TokenType.PARTITION_BY):
1437            return self._parse_csv(self._parse_conjunction)
1438        return []
1439
1440    def _parse_partitioned_by(self) -> exp.Expression:
1441        self._match(TokenType.EQ)
1442        return self.expression(
1443            exp.PartitionedByProperty,
1444            this=self._parse_schema() or self._parse_bracket(self._parse_field()),
1445        )
1446
1447    def _parse_withdata(self, no=False) -> exp.Expression:
1448        if self._match_text_seq("AND", "STATISTICS"):
1449            statistics = True
1450        elif self._match_text_seq("AND", "NO", "STATISTICS"):
1451            statistics = False
1452        else:
1453            statistics = None
1454
1455        return self.expression(exp.WithDataProperty, no=no, statistics=statistics)
1456
1457    def _parse_noprimaryindex(self) -> exp.Expression:
1458        self._match_text_seq("PRIMARY", "INDEX")
1459        return exp.NoPrimaryIndexProperty()
1460
1461    def _parse_oncommit(self) -> exp.Expression:
1462        self._match_text_seq("COMMIT", "PRESERVE", "ROWS")
1463        return exp.OnCommitProperty()
1464
1465    def _parse_distkey(self) -> exp.Expression:
1466        return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
1467
1468    def _parse_create_like(self) -> t.Optional[exp.Expression]:
1469        table = self._parse_table(schema=True)
1470        options = []
1471        while self._match_texts(("INCLUDING", "EXCLUDING")):
1472            this = self._prev.text.upper()
1473            id_var = self._parse_id_var()
1474
1475            if not id_var:
1476                return None
1477
1478            options.append(
1479                self.expression(
1480                    exp.Property,
1481                    this=this,
1482                    value=exp.Var(this=id_var.this.upper()),
1483                )
1484            )
1485        return self.expression(exp.LikeProperty, this=table, expressions=options)
1486
1487    def _parse_sortkey(self, compound: bool = False) -> exp.Expression:
1488        return self.expression(
1489            exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound
1490        )
1491
1492    def _parse_character_set(self, default: bool = False) -> exp.Expression:
1493        self._match(TokenType.EQ)
1494        return self.expression(
1495            exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default
1496        )
1497
1498    def _parse_returns(self) -> exp.Expression:
1499        value: t.Optional[exp.Expression]
1500        is_table = self._match(TokenType.TABLE)
1501
1502        if is_table:
1503            if self._match(TokenType.LT):
1504                value = self.expression(
1505                    exp.Schema,
1506                    this="TABLE",
1507                    expressions=self._parse_csv(self._parse_struct_kwargs),
1508                )
1509                if not self._match(TokenType.GT):
1510                    self.raise_error("Expecting >")
1511            else:
1512                value = self._parse_schema(exp.Var(this="TABLE"))
1513        else:
1514            value = self._parse_types()
1515
1516        return self.expression(exp.ReturnsProperty, this=value, is_table=is_table)
1517
1518    def _parse_temporary(self, global_=False) -> exp.Expression:
1519        self._match(TokenType.TEMPORARY)  # in case calling from "GLOBAL"
1520        return self.expression(exp.TemporaryProperty, global_=global_)
1521
1522    def _parse_describe(self) -> exp.Expression:
1523        kind = self._match_set(self.CREATABLES) and self._prev.text
1524        this = self._parse_table()
1525
1526        return self.expression(exp.Describe, this=this, kind=kind)
1527
1528    def _parse_insert(self) -> exp.Expression:
1529        overwrite = self._match(TokenType.OVERWRITE)
1530        local = self._match(TokenType.LOCAL)
1531        alternative = None
1532
1533        if self._match_text_seq("DIRECTORY"):
1534            this: t.Optional[exp.Expression] = self.expression(
1535                exp.Directory,
1536                this=self._parse_var_or_string(),
1537                local=local,
1538                row_format=self._parse_row_format(match_row=True),
1539            )
1540        else:
1541            if self._match(TokenType.OR):
1542                alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text
1543
1544            self._match(TokenType.INTO)
1545            self._match(TokenType.TABLE)
1546            this = self._parse_table(schema=True)
1547
1548        return self.expression(
1549            exp.Insert,
1550            this=this,
1551            exists=self._parse_exists(),
1552            partition=self._parse_partition(),
1553            expression=self._parse_ddl_select(),
1554            returning=self._parse_returning(),
1555            overwrite=overwrite,
1556            alternative=alternative,
1557        )
1558
1559    def _parse_returning(self) -> t.Optional[exp.Expression]:
1560        if not self._match(TokenType.RETURNING):
1561            return None
1562
1563        return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column))
1564
1565    def _parse_row(self) -> t.Optional[exp.Expression]:
1566        if not self._match(TokenType.FORMAT):
1567            return None
1568        return self._parse_row_format()
1569
1570    def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]:
1571        if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
1572            return None
1573
1574        if self._match_text_seq("SERDE"):
1575            return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string())
1576
1577        self._match_text_seq("DELIMITED")
1578
1579        kwargs = {}
1580
1581        if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
1582            kwargs["fields"] = self._parse_string()
1583            if self._match_text_seq("ESCAPED", "BY"):
1584                kwargs["escaped"] = self._parse_string()
1585        if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
1586            kwargs["collection_items"] = self._parse_string()
1587        if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
1588            kwargs["map_keys"] = self._parse_string()
1589        if self._match_text_seq("LINES", "TERMINATED", "BY"):
1590            kwargs["lines"] = self._parse_string()
1591        if self._match_text_seq("NULL", "DEFINED", "AS"):
1592            kwargs["null"] = self._parse_string()
1593
1594        return self.expression(exp.RowFormatDelimitedProperty, **kwargs)  # type: ignore
1595
1596    def _parse_load_data(self) -> exp.Expression:
1597        local = self._match(TokenType.LOCAL)
1598        self._match_text_seq("INPATH")
1599        inpath = self._parse_string()
1600        overwrite = self._match(TokenType.OVERWRITE)
1601        self._match_pair(TokenType.INTO, TokenType.TABLE)
1602
1603        return self.expression(
1604            exp.LoadData,
1605            this=self._parse_table(schema=True),
1606            local=local,
1607            overwrite=overwrite,
1608            inpath=inpath,
1609            partition=self._parse_partition(),
1610            input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
1611            serde=self._match_text_seq("SERDE") and self._parse_string(),
1612        )
1613
1614    def _parse_delete(self) -> exp.Expression:
1615        self._match(TokenType.FROM)
1616
1617        return self.expression(
1618            exp.Delete,
1619            this=self._parse_table(schema=True),
1620            using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()),
1621            where=self._parse_where(),
1622            returning=self._parse_returning(),
1623        )
1624
1625    def _parse_update(self) -> exp.Expression:
1626        return self.expression(
1627            exp.Update,
1628            **{  # type: ignore
1629                "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS),
1630                "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality),
1631                "from": self._parse_from(),
1632                "where": self._parse_where(),
1633                "returning": self._parse_returning(),
1634            },
1635        )
1636
1637    def _parse_uncache(self) -> exp.Expression:
1638        if not self._match(TokenType.TABLE):
1639            self.raise_error("Expecting TABLE after UNCACHE")
1640
1641        return self.expression(
1642            exp.Uncache,
1643            exists=self._parse_exists(),
1644            this=self._parse_table(schema=True),
1645        )
1646
1647    def _parse_cache(self) -> exp.Expression:
1648        lazy = self._match(TokenType.LAZY)
1649        self._match(TokenType.TABLE)
1650        table = self._parse_table(schema=True)
1651        options = []
1652
1653        if self._match(TokenType.OPTIONS):
1654            self._match_l_paren()
1655            k = self._parse_string()
1656            self._match(TokenType.EQ)
1657            v = self._parse_string()
1658            options = [k, v]
1659            self._match_r_paren()
1660
1661        self._match(TokenType.ALIAS)
1662        return self.expression(
1663            exp.Cache,
1664            this=table,
1665            lazy=lazy,
1666            options=options,
1667            expression=self._parse_select(nested=True),
1668        )
1669
1670    def _parse_partition(self) -> t.Optional[exp.Expression]:
1671        if not self._match(TokenType.PARTITION):
1672            return None
1673
1674        return self.expression(
1675            exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction)
1676        )
1677
1678    def _parse_value(self) -> exp.Expression:
1679        if self._match(TokenType.L_PAREN):
1680            expressions = self._parse_csv(self._parse_conjunction)
1681            self._match_r_paren()
1682            return self.expression(exp.Tuple, expressions=expressions)
1683
1684        # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows.
1685        # Source: https://prestodb.io/docs/current/sql/values.html
1686        return self.expression(exp.Tuple, expressions=[self._parse_conjunction()])
1687
1688    def _parse_select(
1689        self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True
1690    ) -> t.Optional[exp.Expression]:
1691        cte = self._parse_with()
1692        if cte:
1693            this = self._parse_statement()
1694
1695            if not this:
1696                self.raise_error("Failed to parse any statement following CTE")
1697                return cte
1698
1699            if "with" in this.arg_types:
1700                this.set("with", cte)
1701            else:
1702                self.raise_error(f"{this.key} does not support CTE")
1703                this = cte
1704        elif self._match(TokenType.SELECT):
1705            comments = self._prev_comments
1706
1707            hint = self._parse_hint()
1708            all_ = self._match(TokenType.ALL)
1709            distinct = self._match(TokenType.DISTINCT)
1710
1711            if distinct:
1712                distinct = self.expression(
1713                    exp.Distinct,
1714                    on=self._parse_value() if self._match(TokenType.ON) else None,
1715                )
1716
1717            if all_ and distinct:
1718                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
1719
1720            limit = self._parse_limit(top=True)
1721            expressions = self._parse_csv(self._parse_expression)
1722
1723            this = self.expression(
1724                exp.Select,
1725                hint=hint,
1726                distinct=distinct,
1727                expressions=expressions,
1728                limit=limit,
1729            )
1730            this.comments = comments
1731
1732            into = self._parse_into()
1733            if into:
1734                this.set("into", into)
1735
1736            from_ = self._parse_from()
1737            if from_:
1738                this.set("from", from_)
1739
1740            self._parse_query_modifiers(this)
1741        elif (table or nested) and self._match(TokenType.L_PAREN):
1742            this = self._parse_table() if table else self._parse_select(nested=True)
1743            self._parse_query_modifiers(this)
1744            this = self._parse_set_operations(this)
1745            self._match_r_paren()
1746
1747            # early return so that subquery unions aren't parsed again
1748            # SELECT * FROM (SELECT 1) UNION ALL SELECT 1
1749            # Union ALL should be a property of the top select node, not the subquery
1750            return self._parse_subquery(this, parse_alias=parse_subquery_alias)
1751        elif self._match(TokenType.VALUES):
1752            this = self.expression(
1753                exp.Values,
1754                expressions=self._parse_csv(self._parse_value),
1755                alias=self._parse_table_alias(),
1756            )
1757        else:
1758            this = None
1759
1760        return self._parse_set_operations(this)
1761
1762    def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]:
1763        if not skip_with_token and not self._match(TokenType.WITH):
1764            return None
1765
1766        recursive = self._match(TokenType.RECURSIVE)
1767
1768        expressions = []
1769        while True:
1770            expressions.append(self._parse_cte())
1771
1772            if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
1773                break
1774            else:
1775                self._match(TokenType.WITH)
1776
1777        return self.expression(exp.With, expressions=expressions, recursive=recursive)
1778
1779    def _parse_cte(self) -> exp.Expression:
1780        alias = self._parse_table_alias()
1781        if not alias or not alias.this:
1782            self.raise_error("Expected CTE to have alias")
1783
1784        self._match(TokenType.ALIAS)
1785
1786        return self.expression(
1787            exp.CTE,
1788            this=self._parse_wrapped(self._parse_statement),
1789            alias=alias,
1790        )
1791
1792    def _parse_table_alias(
1793        self, alias_tokens: t.Optional[t.Collection[TokenType]] = None
1794    ) -> t.Optional[exp.Expression]:
1795        any_token = self._match(TokenType.ALIAS)
1796        alias = (
1797            self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
1798            or self._parse_string_as_identifier()
1799        )
1800
1801        index = self._index
1802        if self._match(TokenType.L_PAREN):
1803            columns = self._parse_csv(self._parse_function_parameter)
1804            self._match_r_paren() if columns else self._retreat(index)
1805        else:
1806            columns = None
1807
1808        if not alias and not columns:
1809            return None
1810
1811        return self.expression(exp.TableAlias, this=alias, columns=columns)
1812
1813    def _parse_subquery(
1814        self, this: t.Optional[exp.Expression], parse_alias: bool = True
1815    ) -> exp.Expression:
1816        return self.expression(
1817            exp.Subquery,
1818            this=this,
1819            pivots=self._parse_pivots(),
1820            alias=self._parse_table_alias() if parse_alias else None,
1821        )
1822
1823    def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None:
1824        if not isinstance(this, self.MODIFIABLES):
1825            return
1826
1827        table = isinstance(this, exp.Table)
1828
1829        while True:
1830            lateral = self._parse_lateral()
1831            join = self._parse_join()
1832            comma = None if table else self._match(TokenType.COMMA)
1833            if lateral:
1834                this.append("laterals", lateral)
1835            if join:
1836                this.append("joins", join)
1837            if comma:
1838                this.args["from"].append("expressions", self._parse_table())
1839            if not (lateral or join or comma):
1840                break
1841
1842        for key, parser in self.QUERY_MODIFIER_PARSERS.items():
1843            expression = parser(self)
1844
1845            if expression:
1846                this.set(key, expression)
1847
1848    def _parse_hint(self) -> t.Optional[exp.Expression]:
1849        if self._match(TokenType.HINT):
1850            hints = self._parse_csv(self._parse_function)
1851            if not self._match_pair(TokenType.STAR, TokenType.SLASH):
1852                self.raise_error("Expected */ after HINT")
1853            return self.expression(exp.Hint, expressions=hints)
1854
1855        return None
1856
1857    def _parse_into(self) -> t.Optional[exp.Expression]:
1858        if not self._match(TokenType.INTO):
1859            return None
1860
1861        temp = self._match(TokenType.TEMPORARY)
1862        unlogged = self._match(TokenType.UNLOGGED)
1863        self._match(TokenType.TABLE)
1864
1865        return self.expression(
1866            exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged
1867        )
1868
1869    def _parse_from(self) -> t.Optional[exp.Expression]:
1870        if not self._match(TokenType.FROM):
1871            return None
1872
1873        return self.expression(
1874            exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table)
1875        )
1876
1877    def _parse_match_recognize(self) -> t.Optional[exp.Expression]:
1878        if not self._match(TokenType.MATCH_RECOGNIZE):
1879            return None
1880        self._match_l_paren()
1881
1882        partition = self._parse_partition_by()
1883        order = self._parse_order()
1884        measures = (
1885            self._parse_alias(self._parse_conjunction())
1886            if self._match_text_seq("MEASURES")
1887            else None
1888        )
1889
1890        if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
1891            rows = exp.Var(this="ONE ROW PER MATCH")
1892        elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
1893            text = "ALL ROWS PER MATCH"
1894            if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
1895                text += f" SHOW EMPTY MATCHES"
1896            elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
1897                text += f" OMIT EMPTY MATCHES"
1898            elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
1899                text += f" WITH UNMATCHED ROWS"
1900            rows = exp.Var(this=text)
1901        else:
1902            rows = None
1903
1904        if self._match_text_seq("AFTER", "MATCH", "SKIP"):
1905            text = "AFTER MATCH SKIP"
1906            if self._match_text_seq("PAST", "LAST", "ROW"):
1907                text += f" PAST LAST ROW"
1908            elif self._match_text_seq("TO", "NEXT", "ROW"):
1909                text += f" TO NEXT ROW"
1910            elif self._match_text_seq("TO", "FIRST"):
1911                text += f" TO FIRST {self._advance_any().text}"  # type: ignore
1912            elif self._match_text_seq("TO", "LAST"):
1913                text += f" TO LAST {self._advance_any().text}"  # type: ignore
1914            after = exp.Var(this=text)
1915        else:
1916            after = None
1917
1918        if self._match_text_seq("PATTERN"):
1919            self._match_l_paren()
1920
1921            if not self._curr:
1922                self.raise_error("Expecting )", self._curr)
1923
1924            paren = 1
1925            start = self._curr
1926
1927            while self._curr and paren > 0:
1928                if self._curr.token_type == TokenType.L_PAREN:
1929                    paren += 1
1930                if self._curr.token_type == TokenType.R_PAREN:
1931                    paren -= 1
1932                end = self._prev
1933                self._advance()
1934            if paren > 0:
1935                self.raise_error("Expecting )", self._curr)
1936            pattern = exp.Var(this=self._find_sql(start, end))
1937        else:
1938            pattern = None
1939
1940        define = (
1941            self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None
1942        )
1943        self._match_r_paren()
1944
1945        return self.expression(
1946            exp.MatchRecognize,
1947            partition_by=partition,
1948            order=order,
1949            measures=measures,
1950            rows=rows,
1951            after=after,
1952            pattern=pattern,
1953            define=define,
1954        )
1955
1956    def _parse_lateral(self) -> t.Optional[exp.Expression]:
1957        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY)
1958        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
1959
1960        if outer_apply or cross_apply:
1961            this = self._parse_select(table=True)
1962            view = None
1963            outer = not cross_apply
1964        elif self._match(TokenType.LATERAL):
1965            this = self._parse_select(table=True)
1966            view = self._match(TokenType.VIEW)
1967            outer = self._match(TokenType.OUTER)
1968        else:
1969            return None
1970
1971        if not this:
1972            this = self._parse_function() or self._parse_id_var(any_token=False)
1973            while self._match(TokenType.DOT):
1974                this = exp.Dot(
1975                    this=this,
1976                    expression=self._parse_function() or self._parse_id_var(any_token=False),
1977                )
1978
1979        table_alias: t.Optional[exp.Expression]
1980
1981        if view:
1982            table = self._parse_id_var(any_token=False)
1983            columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
1984            table_alias = self.expression(exp.TableAlias, this=table, columns=columns)
1985        else:
1986            table_alias = self._parse_table_alias()
1987
1988        expression = self.expression(
1989            exp.Lateral,
1990            this=this,
1991            view=view,
1992            outer=outer,
1993            alias=table_alias,
1994        )
1995
1996        if outer_apply or cross_apply:
1997            return self.expression(exp.Join, this=expression, side=None if cross_apply else "LEFT")
1998
1999        return expression
2000
2001    def _parse_join_side_and_kind(
2002        self,
2003    ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
2004        return (
2005            self._match(TokenType.NATURAL) and self._prev,
2006            self._match_set(self.JOIN_SIDES) and self._prev,
2007            self._match_set(self.JOIN_KINDS) and self._prev,
2008        )
2009
2010    def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]:
2011        natural, side, kind = self._parse_join_side_and_kind()
2012
2013        if not skip_join_token and not self._match(TokenType.JOIN):
2014            return None
2015
2016        kwargs: t.Dict[
2017            str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]]
2018        ] = {"this": self._parse_table()}
2019
2020        if natural:
2021            kwargs["natural"] = True
2022        if side:
2023            kwargs["side"] = side.text
2024        if kind:
2025            kwargs["kind"] = kind.text
2026
2027        if self._match(TokenType.ON):
2028            kwargs["on"] = self._parse_conjunction()
2029        elif self._match(TokenType.USING):
2030            kwargs["using"] = self._parse_wrapped_id_vars()
2031
2032        return self.expression(exp.Join, **kwargs)  # type: ignore
2033
2034    def _parse_index(self) -> exp.Expression:
2035        index = self._parse_id_var()
2036        self._match(TokenType.ON)
2037        self._match(TokenType.TABLE)  # hive
2038
2039        return self.expression(
2040            exp.Index,
2041            this=index,
2042            table=self.expression(exp.Table, this=self._parse_id_var()),
2043            columns=self._parse_expression(),
2044        )
2045
2046    def _parse_create_table_index(self) -> t.Optional[exp.Expression]:
2047        unique = self._match(TokenType.UNIQUE)
2048        primary = self._match_text_seq("PRIMARY")
2049        amp = self._match_text_seq("AMP")
2050        if not self._match(TokenType.INDEX):
2051            return None
2052        index = self._parse_id_var()
2053        columns = None
2054        if self._match(TokenType.L_PAREN, advance=False):
2055            columns = self._parse_wrapped_csv(self._parse_column)
2056        return self.expression(
2057            exp.Index,
2058            this=index,
2059            columns=columns,
2060            unique=unique,
2061            primary=primary,
2062            amp=amp,
2063        )
2064
2065    def _parse_table_parts(self, schema: bool = False) -> exp.Expression:
2066        catalog = None
2067        db = None
2068
2069        table = (
2070            (not schema and self._parse_function())
2071            or self._parse_id_var(any_token=False)
2072            or self._parse_string_as_identifier()
2073        )
2074
2075        while self._match(TokenType.DOT):
2076            if catalog:
2077                # This allows nesting the table in arbitrarily many dot expressions if needed
2078                table = self.expression(exp.Dot, this=table, expression=self._parse_id_var())
2079            else:
2080                catalog = db
2081                db = table
2082                table = self._parse_id_var()
2083
2084        if not table:
2085            self.raise_error(f"Expected table name but got {self._curr}")
2086
2087        return self.expression(
2088            exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots()
2089        )
2090
2091    def _parse_table(
2092        self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
2093    ) -> t.Optional[exp.Expression]:
2094        lateral = self._parse_lateral()
2095
2096        if lateral:
2097            return lateral
2098
2099        unnest = self._parse_unnest()
2100
2101        if unnest:
2102            return unnest
2103
2104        values = self._parse_derived_table_values()
2105
2106        if values:
2107            return values
2108
2109        subquery = self._parse_select(table=True)
2110
2111        if subquery:
2112            if not subquery.args.get("pivots"):
2113                subquery.set("pivots", self._parse_pivots())
2114            return subquery
2115
2116        this = self._parse_table_parts(schema=schema)
2117
2118        if schema:
2119            return self._parse_schema(this=this)
2120
2121        if self.alias_post_tablesample:
2122            table_sample = self._parse_table_sample()
2123
2124        alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
2125
2126        if alias:
2127            this.set("alias", alias)
2128
2129        if not this.args.get("pivots"):
2130            this.set("pivots", self._parse_pivots())
2131
2132        if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
2133            this.set(
2134                "hints",
2135                self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)),
2136            )
2137            self._match_r_paren()
2138
2139        if not self.alias_post_tablesample:
2140            table_sample = self._parse_table_sample()
2141
2142        if table_sample:
2143            table_sample.set("this", this)
2144            this = table_sample
2145
2146        return this
2147
2148    def _parse_unnest(self) -> t.Optional[exp.Expression]:
2149        if not self._match(TokenType.UNNEST):
2150            return None
2151
2152        expressions = self._parse_wrapped_csv(self._parse_column)
2153        ordinality = bool(self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY))
2154        alias = self._parse_table_alias()
2155
2156        if alias and self.unnest_column_only:
2157            if alias.args.get("columns"):
2158                self.raise_error("Unexpected extra column alias in unnest.")
2159            alias.set("columns", [alias.this])
2160            alias.set("this", None)
2161
2162        offset = None
2163        if self._match_pair(TokenType.WITH, TokenType.OFFSET):
2164            self._match(TokenType.ALIAS)
2165            offset = self._parse_conjunction()
2166
2167        return self.expression(
2168            exp.Unnest,
2169            expressions=expressions,
2170            ordinality=ordinality,
2171            alias=alias,
2172            offset=offset,
2173        )
2174
2175    def _parse_derived_table_values(self) -> t.Optional[exp.Expression]:
2176        is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES)
2177        if not is_derived and not self._match(TokenType.VALUES):
2178            return None
2179
2180        expressions = self._parse_csv(self._parse_value)
2181
2182        if is_derived:
2183            self._match_r_paren()
2184
2185        return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias())
2186
2187    def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]:
2188        if not self._match(TokenType.TABLE_SAMPLE) and not (
2189            as_modifier and self._match_text_seq("USING", "SAMPLE")
2190        ):
2191            return None
2192
2193        bucket_numerator = None
2194        bucket_denominator = None
2195        bucket_field = None
2196        percent = None
2197        rows = None
2198        size = None
2199        seed = None
2200
2201        kind = "TABLESAMPLE" if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE"
2202        method = self._parse_var(tokens=(TokenType.ROW,))
2203
2204        self._match(TokenType.L_PAREN)
2205
2206        num = self._parse_number()
2207
2208        if self._match(TokenType.BUCKET):
2209            bucket_numerator = self._parse_number()
2210            self._match(TokenType.OUT_OF)
2211            bucket_denominator = bucket_denominator = self._parse_number()
2212            self._match(TokenType.ON)
2213            bucket_field = self._parse_field()
2214        elif self._match_set((TokenType.PERCENT, TokenType.MOD)):
2215            percent = num
2216        elif self._match(TokenType.ROWS):
2217            rows = num
2218        else:
2219            size = num
2220
2221        self._match(TokenType.R_PAREN)
2222
2223        if self._match(TokenType.L_PAREN):
2224            method = self._parse_var()
2225            seed = self._match(TokenType.COMMA) and self._parse_number()
2226            self._match_r_paren()
2227        elif self._match_texts(("SEED", "REPEATABLE")):
2228            seed = self._parse_wrapped(self._parse_number)
2229
2230        return self.expression(
2231            exp.TableSample,
2232            method=method,
2233            bucket_numerator=bucket_numerator,
2234            bucket_denominator=bucket_denominator,
2235            bucket_field=bucket_field,
2236            percent=percent,
2237            rows=rows,
2238            size=size,
2239            seed=seed,
2240            kind=kind,
2241        )
2242
2243    def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]:
2244        return list(iter(self._parse_pivot, None))
2245
2246    def _parse_pivot(self) -> t.Optional[exp.Expression]:
2247        index = self._index
2248
2249        if self._match(TokenType.PIVOT):
2250            unpivot = False
2251        elif self._match(TokenType.UNPIVOT):
2252            unpivot = True
2253        else:
2254            return None
2255
2256        expressions = []
2257        field = None
2258
2259        if not self._match(TokenType.L_PAREN):
2260            self._retreat(index)
2261            return None
2262
2263        if unpivot:
2264            expressions = self._parse_csv(self._parse_column)
2265        else:
2266            expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function()))
2267
2268        if not self._match(TokenType.FOR):
2269            self.raise_error("Expecting FOR")
2270
2271        value = self._parse_column()
2272
2273        if not self._match(TokenType.IN):
2274            self.raise_error("Expecting IN")
2275
2276        field = self._parse_in(value)
2277
2278        self._match_r_paren()
2279
2280        pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot)
2281
2282        if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False):
2283            pivot.set("alias", self._parse_table_alias())
2284
2285        return pivot
2286
2287    def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]:
2288        if not skip_where_token and not self._match(TokenType.WHERE):
2289            return None
2290
2291        return self.expression(
2292            exp.Where, comments=self._prev_comments, this=self._parse_conjunction()
2293        )
2294
2295    def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]:
2296        if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
2297            return None
2298
2299        elements = defaultdict(list)
2300
2301        while True:
2302            expressions = self._parse_csv(self._parse_conjunction)
2303            if expressions:
2304                elements["expressions"].extend(expressions)
2305
2306            grouping_sets = self._parse_grouping_sets()
2307            if grouping_sets:
2308                elements["grouping_sets"].extend(grouping_sets)
2309
2310            rollup = None
2311            cube = None
2312
2313            with_ = self._match(TokenType.WITH)
2314            if self._match(TokenType.ROLLUP):
2315                rollup = with_ or self._parse_wrapped_csv(self._parse_column)
2316                elements["rollup"].extend(ensure_list(rollup))
2317
2318            if self._match(TokenType.CUBE):
2319                cube = with_ or self._parse_wrapped_csv(self._parse_column)
2320                elements["cube"].extend(ensure_list(cube))
2321
2322            if not (expressions or grouping_sets or rollup or cube):
2323                break
2324
2325        return self.expression(exp.Group, **elements)  # type: ignore
2326
2327    def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
2328        if not self._match(TokenType.GROUPING_SETS):
2329            return None
2330
2331        return self._parse_wrapped_csv(self._parse_grouping_set)
2332
2333    def _parse_grouping_set(self) -> t.Optional[exp.Expression]:
2334        if self._match(TokenType.L_PAREN):
2335            grouping_set = self._parse_csv(self._parse_column)
2336            self._match_r_paren()
2337            return self.expression(exp.Tuple, expressions=grouping_set)
2338
2339        return self._parse_column()
2340
2341    def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]:
2342        if not skip_having_token and not self._match(TokenType.HAVING):
2343            return None
2344        return self.expression(exp.Having, this=self._parse_conjunction())
2345
2346    def _parse_qualify(self) -> t.Optional[exp.Expression]:
2347        if not self._match(TokenType.QUALIFY):
2348            return None
2349        return self.expression(exp.Qualify, this=self._parse_conjunction())
2350
2351    def _parse_order(
2352        self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
2353    ) -> t.Optional[exp.Expression]:
2354        if not skip_order_token and not self._match(TokenType.ORDER_BY):
2355            return this
2356
2357        return self.expression(
2358            exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
2359        )
2360
2361    def _parse_sort(
2362        self, token_type: TokenType, exp_class: t.Type[exp.Expression]
2363    ) -> t.Optional[exp.Expression]:
2364        if not self._match(token_type):
2365            return None
2366        return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered))
2367
2368    def _parse_ordered(self) -> exp.Expression:
2369        this = self._parse_conjunction()
2370        self._match(TokenType.ASC)
2371        is_desc = self._match(TokenType.DESC)
2372        is_nulls_first = self._match(TokenType.NULLS_FIRST)
2373        is_nulls_last = self._match(TokenType.NULLS_LAST)
2374        desc = is_desc or False
2375        asc = not desc
2376        nulls_first = is_nulls_first or False
2377        explicitly_null_ordered = is_nulls_first or is_nulls_last
2378        if (
2379            not explicitly_null_ordered
2380            and (
2381                (asc and self.null_ordering == "nulls_are_small")
2382                or (desc and self.null_ordering != "nulls_are_small")
2383            )
2384            and self.null_ordering != "nulls_are_last"
2385        ):
2386            nulls_first = True
2387
2388        return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first)
2389
2390    def _parse_limit(
2391        self, this: t.Optional[exp.Expression] = None, top: bool = False
2392    ) -> t.Optional[exp.Expression]:
2393        if self._match(TokenType.TOP if top else TokenType.LIMIT):
2394            limit_paren = self._match(TokenType.L_PAREN)
2395            limit_exp = self.expression(
2396                exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term()
2397            )
2398
2399            if limit_paren:
2400                self._match_r_paren()
2401
2402            return limit_exp
2403
2404        if self._match(TokenType.FETCH):
2405            direction = self._match_set((TokenType.FIRST, TokenType.NEXT))
2406            direction = self._prev.text if direction else "FIRST"
2407            count = self._parse_number()
2408            self._match_set((TokenType.ROW, TokenType.ROWS))
2409            self._match(TokenType.ONLY)
2410            return self.expression(exp.Fetch, direction=direction, count=count)
2411
2412        return this
2413
2414    def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2415        if not self._match_set((TokenType.OFFSET, TokenType.COMMA)):
2416            return this
2417
2418        count = self._parse_number()
2419        self._match_set((TokenType.ROW, TokenType.ROWS))
2420        return self.expression(exp.Offset, this=this, expression=count)
2421
2422    def _parse_lock(self) -> t.Optional[exp.Expression]:
2423        if self._match_text_seq("FOR", "UPDATE"):
2424            return self.expression(exp.Lock, update=True)
2425        if self._match_text_seq("FOR", "SHARE"):
2426            return self.expression(exp.Lock, update=False)
2427
2428        return None
2429
2430    def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2431        if not self._match_set(self.SET_OPERATIONS):
2432            return this
2433
2434        token_type = self._prev.token_type
2435
2436        if token_type == TokenType.UNION:
2437            expression = exp.Union
2438        elif token_type == TokenType.EXCEPT:
2439            expression = exp.Except
2440        else:
2441            expression = exp.Intersect
2442
2443        return self.expression(
2444            expression,
2445            this=this,
2446            distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
2447            expression=self._parse_set_operations(self._parse_select(nested=True)),
2448        )
2449
2450    def _parse_expression(self) -> t.Optional[exp.Expression]:
2451        return self._parse_alias(self._parse_conjunction())
2452
2453    def _parse_conjunction(self) -> t.Optional[exp.Expression]:
2454        return self._parse_tokens(self._parse_equality, self.CONJUNCTION)
2455
2456    def _parse_equality(self) -> t.Optional[exp.Expression]:
2457        return self._parse_tokens(self._parse_comparison, self.EQUALITY)
2458
2459    def _parse_comparison(self) -> t.Optional[exp.Expression]:
2460        return self._parse_tokens(self._parse_range, self.COMPARISON)
2461
2462    def _parse_range(self) -> t.Optional[exp.Expression]:
2463        this = self._parse_bitwise()
2464        negate = self._match(TokenType.NOT)
2465
2466        if self._match_set(self.RANGE_PARSERS):
2467            this = self.RANGE_PARSERS[self._prev.token_type](self, this)
2468        elif self._match(TokenType.ISNULL):
2469            this = self.expression(exp.Is, this=this, expression=exp.Null())
2470
2471        # Postgres supports ISNULL and NOTNULL for conditions.
2472        # https://blog.andreiavram.ro/postgresql-null-composite-type/
2473        if self._match(TokenType.NOTNULL):
2474            this = self.expression(exp.Is, this=this, expression=exp.Null())
2475            this = self.expression(exp.Not, this=this)
2476
2477        if negate:
2478            this = self.expression(exp.Not, this=this)
2479
2480        if self._match(TokenType.IS):
2481            this = self._parse_is(this)
2482
2483        return this
2484
2485    def _parse_is(self, this: t.Optional[exp.Expression]) -> exp.Expression:
2486        negate = self._match(TokenType.NOT)
2487        if self._match(TokenType.DISTINCT_FROM):
2488            klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ
2489            return self.expression(klass, this=this, expression=self._parse_expression())
2490
2491        this = self.expression(
2492            exp.Is,
2493            this=this,
2494            expression=self._parse_null() or self._parse_boolean(),
2495        )
2496        return self.expression(exp.Not, this=this) if negate else this
2497
2498    def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression:
2499        unnest = self._parse_unnest()
2500        if unnest:
2501            this = self.expression(exp.In, this=this, unnest=unnest)
2502        elif self._match(TokenType.L_PAREN):
2503            expressions = self._parse_csv(self._parse_select_or_expression)
2504
2505            if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable):
2506                this = self.expression(exp.In, this=this, query=expressions[0])
2507            else:
2508                this = self.expression(exp.In, this=this, expressions=expressions)
2509
2510            self._match_r_paren()
2511        else:
2512            this = self.expression(exp.In, this=this, field=self._parse_field())
2513
2514        return this
2515
2516    def _parse_between(self, this: exp.Expression) -> exp.Expression:
2517        low = self._parse_bitwise()
2518        self._match(TokenType.AND)
2519        high = self._parse_bitwise()
2520        return self.expression(exp.Between, this=this, low=low, high=high)
2521
2522    def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2523        if not self._match(TokenType.ESCAPE):
2524            return this
2525        return self.expression(exp.Escape, this=this, expression=self._parse_string())
2526
2527    def _parse_bitwise(self) -> t.Optional[exp.Expression]:
2528        this = self._parse_term()
2529
2530        while True:
2531            if self._match_set(self.BITWISE):
2532                this = self.expression(
2533                    self.BITWISE[self._prev.token_type],
2534                    this=this,
2535                    expression=self._parse_term(),
2536                )
2537            elif self._match_pair(TokenType.LT, TokenType.LT):
2538                this = self.expression(
2539                    exp.BitwiseLeftShift, this=this, expression=self._parse_term()
2540                )
2541            elif self._match_pair(TokenType.GT, TokenType.GT):
2542                this = self.expression(
2543                    exp.BitwiseRightShift, this=this, expression=self._parse_term()
2544                )
2545            else:
2546                break
2547
2548        return this
2549
2550    def _parse_term(self) -> t.Optional[exp.Expression]:
2551        return self._parse_tokens(self._parse_factor, self.TERM)
2552
2553    def _parse_factor(self) -> t.Optional[exp.Expression]:
2554        return self._parse_tokens(self._parse_unary, self.FACTOR)
2555
2556    def _parse_unary(self) -> t.Optional[exp.Expression]:
2557        if self._match_set(self.UNARY_PARSERS):
2558            return self.UNARY_PARSERS[self._prev.token_type](self)
2559        return self._parse_at_time_zone(self._parse_type())
2560
2561    def _parse_type(self) -> t.Optional[exp.Expression]:
2562        if self._match(TokenType.INTERVAL):
2563            return self.expression(exp.Interval, this=self._parse_term(), unit=self._parse_field())
2564
2565        index = self._index
2566        type_token = self._parse_types(check_func=True)
2567        this = self._parse_column()
2568
2569        if type_token:
2570            if isinstance(this, exp.Literal):
2571                return self.expression(exp.Cast, this=this, to=type_token)
2572            if not type_token.args.get("expressions"):
2573                self._retreat(index)
2574                return self._parse_column()
2575            return type_token
2576
2577        return this
2578
2579    def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]:
2580        index = self._index
2581
2582        prefix = self._match_text_seq("SYSUDTLIB", ".")
2583
2584        if not self._match_set(self.TYPE_TOKENS):
2585            return None
2586
2587        type_token = self._prev.token_type
2588
2589        if type_token == TokenType.PSEUDO_TYPE:
2590            return self.expression(exp.PseudoType, this=self._prev.text)
2591
2592        nested = type_token in self.NESTED_TYPE_TOKENS
2593        is_struct = type_token == TokenType.STRUCT
2594        expressions = None
2595        maybe_func = False
2596
2597        if self._match(TokenType.L_PAREN):
2598            if is_struct:
2599                expressions = self._parse_csv(self._parse_struct_kwargs)
2600            elif nested:
2601                expressions = self._parse_csv(self._parse_types)
2602            else:
2603                expressions = self._parse_csv(self._parse_conjunction)
2604
2605            if not expressions:
2606                self._retreat(index)
2607                return None
2608
2609            self._match_r_paren()
2610            maybe_func = True
2611
2612        if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2613            this = exp.DataType(
2614                this=exp.DataType.Type.ARRAY,
2615                expressions=[exp.DataType.build(type_token.value, expressions=expressions)],
2616                nested=True,
2617            )
2618
2619            while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2620                this = exp.DataType(
2621                    this=exp.DataType.Type.ARRAY,
2622                    expressions=[this],
2623                    nested=True,
2624                )
2625
2626            return this
2627
2628        if self._match(TokenType.L_BRACKET):
2629            self._retreat(index)
2630            return None
2631
2632        values: t.Optional[t.List[t.Optional[exp.Expression]]] = None
2633        if nested and self._match(TokenType.LT):
2634            if is_struct:
2635                expressions = self._parse_csv(self._parse_struct_kwargs)
2636            else:
2637                expressions = self._parse_csv(self._parse_types)
2638
2639            if not self._match(TokenType.GT):
2640                self.raise_error("Expecting >")
2641
2642            if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)):
2643                values = self._parse_csv(self._parse_conjunction)
2644                self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN))
2645
2646        value: t.Optional[exp.Expression] = None
2647        if type_token in self.TIMESTAMPS:
2648            if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ:
2649                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions)
2650            elif (
2651                self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ
2652            ):
2653                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions)
2654            elif self._match(TokenType.WITHOUT_TIME_ZONE):
2655                if type_token == TokenType.TIME:
2656                    value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions)
2657                else:
2658                    value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2659
2660            maybe_func = maybe_func and value is None
2661
2662            if value is None:
2663                value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2664        elif type_token == TokenType.INTERVAL:
2665            unit = self._parse_var()
2666
2667            if not unit:
2668                value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL)
2669            else:
2670                value = self.expression(exp.Interval, unit=unit)
2671
2672        if maybe_func and check_func:
2673            index2 = self._index
2674            peek = self._parse_string()
2675
2676            if not peek:
2677                self._retreat(index)
2678                return None
2679
2680            self._retreat(index2)
2681
2682        if value:
2683            return value
2684
2685        return exp.DataType(
2686            this=exp.DataType.Type[type_token.value.upper()],
2687            expressions=expressions,
2688            nested=nested,
2689            values=values,
2690            prefix=prefix,
2691        )
2692
2693    def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]:
2694        if self._curr and self._curr.token_type in self.TYPE_TOKENS:
2695            return self._parse_types()
2696
2697        this = self._parse_id_var()
2698        self._match(TokenType.COLON)
2699        data_type = self._parse_types()
2700
2701        if not data_type:
2702            return None
2703        return self.expression(exp.StructKwarg, this=this, expression=data_type)
2704
2705    def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2706        if not self._match(TokenType.AT_TIME_ZONE):
2707            return this
2708        return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary())
2709
2710    def _parse_column(self) -> t.Optional[exp.Expression]:
2711        this = self._parse_field()
2712        if isinstance(this, exp.Identifier):
2713            this = self.expression(exp.Column, this=this)
2714        elif not this:
2715            return self._parse_bracket(this)
2716        this = self._parse_bracket(this)
2717
2718        while self._match_set(self.COLUMN_OPERATORS):
2719            op_token = self._prev.token_type
2720            op = self.COLUMN_OPERATORS.get(op_token)
2721
2722            if op_token == TokenType.DCOLON:
2723                field = self._parse_types()
2724                if not field:
2725                    self.raise_error("Expected type")
2726            elif op:
2727                self._advance()
2728                value = self._prev.text
2729                field = (
2730                    exp.Literal.number(value)
2731                    if self._prev.token_type == TokenType.NUMBER
2732                    else exp.Literal.string(value)
2733                )
2734            else:
2735                field = self._parse_star() or self._parse_function() or self._parse_id_var()
2736
2737            if isinstance(field, exp.Func):
2738                # bigquery allows function calls like x.y.count(...)
2739                # SAFE.SUBSTR(...)
2740                # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
2741                this = self._replace_columns_with_dots(this)
2742
2743            if op:
2744                this = op(self, this, field)
2745            elif isinstance(this, exp.Column) and not this.args.get("catalog"):
2746                this = self.expression(
2747                    exp.Column,
2748                    this=field,
2749                    table=this.this,
2750                    db=this.args.get("table"),
2751                    catalog=this.args.get("db"),
2752                )
2753            else:
2754                this = self.expression(exp.Dot, this=this, expression=field)
2755            this = self._parse_bracket(this)
2756
2757        return this
2758
2759    def _parse_primary(self) -> t.Optional[exp.Expression]:
2760        if self._match_set(self.PRIMARY_PARSERS):
2761            token_type = self._prev.token_type
2762            primary = self.PRIMARY_PARSERS[token_type](self, self._prev)
2763
2764            if token_type == TokenType.STRING:
2765                expressions = [primary]
2766                while self._match(TokenType.STRING):
2767                    expressions.append(exp.Literal.string(self._prev.text))
2768                if len(expressions) > 1:
2769                    return self.expression(exp.Concat, expressions=expressions)
2770            return primary
2771
2772        if self._match_pair(TokenType.DOT, TokenType.NUMBER):
2773            return exp.Literal.number(f"0.{self._prev.text}")
2774
2775        if self._match(TokenType.L_PAREN):
2776            comments = self._prev_comments
2777            query = self._parse_select()
2778
2779            if query:
2780                expressions = [query]
2781            else:
2782                expressions = self._parse_csv(
2783                    lambda: self._parse_alias(self._parse_conjunction(), explicit=True)
2784                )
2785
2786            this = seq_get(expressions, 0)
2787            self._parse_query_modifiers(this)
2788            self._match_r_paren()
2789
2790            if isinstance(this, exp.Subqueryable):
2791                this = self._parse_set_operations(
2792                    self._parse_subquery(this=this, parse_alias=False)
2793                )
2794            elif len(expressions) > 1:
2795                this = self.expression(exp.Tuple, expressions=expressions)
2796            else:
2797                this = self.expression(exp.Paren, this=this)
2798
2799            if this and comments:
2800                this.comments = comments
2801
2802            return this
2803
2804        return None
2805
2806    def _parse_field(self, any_token: bool = False) -> t.Optional[exp.Expression]:
2807        return self._parse_primary() or self._parse_function() or self._parse_id_var(any_token)
2808
2809    def _parse_function(
2810        self, functions: t.Optional[t.Dict[str, t.Callable]] = None
2811    ) -> t.Optional[exp.Expression]:
2812        if not self._curr:
2813            return None
2814
2815        token_type = self._curr.token_type
2816
2817        if self._match_set(self.NO_PAREN_FUNCTION_PARSERS):
2818            return self.NO_PAREN_FUNCTION_PARSERS[token_type](self)
2819
2820        if not self._next or self._next.token_type != TokenType.L_PAREN:
2821            if token_type in self.NO_PAREN_FUNCTIONS:
2822                self._advance()
2823                return self.expression(self.NO_PAREN_FUNCTIONS[token_type])
2824
2825            return None
2826
2827        if token_type not in self.FUNC_TOKENS:
2828            return None
2829
2830        this = self._curr.text
2831        upper = this.upper()
2832        self._advance(2)
2833
2834        parser = self.FUNCTION_PARSERS.get(upper)
2835
2836        if parser:
2837            this = parser(self)
2838        else:
2839            subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
2840
2841            if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH):
2842                this = self.expression(subquery_predicate, this=self._parse_select())
2843                self._match_r_paren()
2844                return this
2845
2846            if functions is None:
2847                functions = self.FUNCTIONS
2848
2849            function = functions.get(upper)
2850            args = self._parse_csv(self._parse_lambda)
2851
2852            if function:
2853                # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the
2854                # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists.
2855                if count_params(function) == 2:
2856                    params = None
2857                    if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN):
2858                        params = self._parse_csv(self._parse_lambda)
2859
2860                    this = function(args, params)
2861                else:
2862                    this = function(args)
2863
2864                self.validate_expression(this, args)
2865            else:
2866                this = self.expression(exp.Anonymous, this=this, expressions=args)
2867
2868        self._match_r_paren(this)
2869        return self._parse_window(this)
2870
2871    def _parse_function_parameter(self) -> t.Optional[exp.Expression]:
2872        return self._parse_column_def(self._parse_id_var())
2873
2874    def _parse_user_defined_function(
2875        self, kind: t.Optional[TokenType] = None
2876    ) -> t.Optional[exp.Expression]:
2877        this = self._parse_id_var()
2878
2879        while self._match(TokenType.DOT):
2880            this = self.expression(exp.Dot, this=this, expression=self._parse_id_var())
2881
2882        if not self._match(TokenType.L_PAREN):
2883            return this
2884
2885        expressions = self._parse_csv(self._parse_function_parameter)
2886        self._match_r_paren()
2887        return self.expression(
2888            exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True
2889        )
2890
2891    def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]:
2892        literal = self._parse_primary()
2893        if literal:
2894            return self.expression(exp.Introducer, this=token.text, expression=literal)
2895
2896        return self.expression(exp.Identifier, this=token.text)
2897
2898    def _parse_national(self, token: Token) -> exp.Expression:
2899        return self.expression(exp.National, this=exp.Literal.string(token.text))
2900
2901    def _parse_session_parameter(self) -> exp.Expression:
2902        kind = None
2903        this = self._parse_id_var() or self._parse_primary()
2904
2905        if this and self._match(TokenType.DOT):
2906            kind = this.name
2907            this = self._parse_var() or self._parse_primary()
2908
2909        return self.expression(exp.SessionParameter, this=this, kind=kind)
2910
2911    def _parse_lambda(self) -> t.Optional[exp.Expression]:
2912        index = self._index
2913
2914        if self._match(TokenType.L_PAREN):
2915            expressions = self._parse_csv(self._parse_id_var)
2916
2917            if not self._match(TokenType.R_PAREN):
2918                self._retreat(index)
2919        else:
2920            expressions = [self._parse_id_var()]
2921
2922        if self._match_set(self.LAMBDAS):
2923            return self.LAMBDAS[self._prev.token_type](self, expressions)
2924
2925        self._retreat(index)
2926
2927        this: t.Optional[exp.Expression]
2928
2929        if self._match(TokenType.DISTINCT):
2930            this = self.expression(
2931                exp.Distinct, expressions=self._parse_csv(self._parse_conjunction)
2932            )
2933        else:
2934            this = self._parse_select_or_expression()
2935
2936        if self._match(TokenType.IGNORE_NULLS):
2937            this = self.expression(exp.IgnoreNulls, this=this)
2938        else:
2939            self._match(TokenType.RESPECT_NULLS)
2940
2941        return self._parse_limit(self._parse_order(this))
2942
2943    def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2944        index = self._index
2945        if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT):
2946            self._retreat(index)
2947            return this
2948
2949        args = self._parse_csv(
2950            lambda: self._parse_constraint()
2951            or self._parse_column_def(self._parse_field(any_token=True))
2952        )
2953        self._match_r_paren()
2954        return self.expression(exp.Schema, this=this, expressions=args)
2955
2956    def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2957        kind = self._parse_types()
2958
2959        if self._match_text_seq("FOR", "ORDINALITY"):
2960            return self.expression(exp.ColumnDef, this=this, ordinality=True)
2961
2962        constraints = []
2963        while True:
2964            constraint = self._parse_column_constraint()
2965            if not constraint:
2966                break
2967            constraints.append(constraint)
2968
2969        if not kind and not constraints:
2970            return this
2971
2972        return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints)
2973
2974    def _parse_auto_increment(self) -> exp.Expression:
2975        start = None
2976        increment = None
2977
2978        if self._match(TokenType.L_PAREN, advance=False):
2979            args = self._parse_wrapped_csv(self._parse_bitwise)
2980            start = seq_get(args, 0)
2981            increment = seq_get(args, 1)
2982        elif self._match_text_seq("START"):
2983            start = self._parse_bitwise()
2984            self._match_text_seq("INCREMENT")
2985            increment = self._parse_bitwise()
2986
2987        if start and increment:
2988            return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment)
2989
2990        return exp.AutoIncrementColumnConstraint()
2991
2992    def _parse_compress(self) -> exp.Expression:
2993        if self._match(TokenType.L_PAREN, advance=False):
2994            return self.expression(
2995                exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise)
2996            )
2997
2998        return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise())
2999
3000    def _parse_generated_as_identity(self) -> exp.Expression:
3001        if self._match(TokenType.BY_DEFAULT):
3002            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False)
3003        else:
3004            self._match_text_seq("ALWAYS")
3005            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True)
3006
3007        self._match_text_seq("AS", "IDENTITY")
3008        if self._match(TokenType.L_PAREN):
3009            if self._match_text_seq("START", "WITH"):
3010                this.set("start", self._parse_bitwise())
3011            if self._match_text_seq("INCREMENT", "BY"):
3012                this.set("increment", self._parse_bitwise())
3013            if self._match_text_seq("MINVALUE"):
3014                this.set("minvalue", self._parse_bitwise())
3015            if self._match_text_seq("MAXVALUE"):
3016                this.set("maxvalue", self._parse_bitwise())
3017
3018            if self._match_text_seq("CYCLE"):
3019                this.set("cycle", True)
3020            elif self._match_text_seq("NO", "CYCLE"):
3021                this.set("cycle", False)
3022
3023            self._match_r_paren()
3024
3025        return this
3026
3027    def _parse_inline(self) -> t.Optional[exp.Expression]:
3028        self._match_text_seq("LENGTH")
3029        return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise())
3030
3031    def _parse_not_constraint(self) -> t.Optional[exp.Expression]:
3032        if self._match_text_seq("NULL"):
3033            return self.expression(exp.NotNullColumnConstraint)
3034        if self._match_text_seq("CASESPECIFIC"):
3035            return self.expression(exp.CaseSpecificColumnConstraint, not_=True)
3036        return None
3037
3038    def _parse_column_constraint(self) -> t.Optional[exp.Expression]:
3039        this = self._parse_references()
3040        if this:
3041            return this
3042
3043        if self._match(TokenType.CONSTRAINT):
3044            this = self._parse_id_var()
3045
3046        if self._match_texts(self.CONSTRAINT_PARSERS):
3047            return self.expression(
3048                exp.ColumnConstraint,
3049                this=this,
3050                kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self),
3051            )
3052
3053        return this
3054
3055    def _parse_constraint(self) -> t.Optional[exp.Expression]:
3056        if not self._match(TokenType.CONSTRAINT):
3057            return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS)
3058
3059        this = self._parse_id_var()
3060        expressions = []
3061
3062        while True:
3063            constraint = self._parse_unnamed_constraint() or self._parse_function()
3064            if not constraint:
3065                break
3066            expressions.append(constraint)
3067
3068        return self.expression(exp.Constraint, this=this, expressions=expressions)
3069
3070    def _parse_unnamed_constraint(
3071        self, constraints: t.Optional[t.Collection[str]] = None
3072    ) -> t.Optional[exp.Expression]:
3073        if not self._match_texts(constraints or self.CONSTRAINT_PARSERS):
3074            return None
3075
3076        constraint = self._prev.text.upper()
3077        if constraint not in self.CONSTRAINT_PARSERS:
3078            self.raise_error(f"No parser found for schema constraint {constraint}.")
3079
3080        return self.CONSTRAINT_PARSERS[constraint](self)
3081
3082    def _parse_unique(self) -> exp.Expression:
3083        if not self._match(TokenType.L_PAREN, advance=False):
3084            return self.expression(exp.UniqueColumnConstraint)
3085        return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars())
3086
3087    def _parse_key_constraint_options(self) -> t.List[str]:
3088        options = []
3089        while True:
3090            if not self._curr:
3091                break
3092
3093            if self._match(TokenType.ON):
3094                action = None
3095                on = self._advance_any() and self._prev.text
3096
3097                if self._match(TokenType.NO_ACTION):
3098                    action = "NO ACTION"
3099                elif self._match(TokenType.CASCADE):
3100                    action = "CASCADE"
3101                elif self._match_pair(TokenType.SET, TokenType.NULL):
3102                    action = "SET NULL"
3103                elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
3104                    action = "SET DEFAULT"
3105                else:
3106                    self.raise_error("Invalid key constraint")
3107
3108                options.append(f"ON {on} {action}")
3109            elif self._match_text_seq("NOT", "ENFORCED"):
3110                options.append("NOT ENFORCED")
3111            elif self._match_text_seq("DEFERRABLE"):
3112                options.append("DEFERRABLE")
3113            elif self._match_text_seq("INITIALLY", "DEFERRED"):
3114                options.append("INITIALLY DEFERRED")
3115            elif self._match_text_seq("NORELY"):
3116                options.append("NORELY")
3117            elif self._match_text_seq("MATCH", "FULL"):
3118                options.append("MATCH FULL")
3119            else:
3120                break
3121
3122        return options
3123
3124    def _parse_references(self) -> t.Optional[exp.Expression]:
3125        if not self._match(TokenType.REFERENCES):
3126            return None
3127
3128        expressions = None
3129        this = self._parse_id_var()
3130
3131        if self._match(TokenType.L_PAREN, advance=False):
3132            expressions = self._parse_wrapped_id_vars()
3133
3134        options = self._parse_key_constraint_options()
3135        return self.expression(exp.Reference, this=this, expressions=expressions, options=options)
3136
3137    def _parse_foreign_key(self) -> exp.Expression:
3138        expressions = self._parse_wrapped_id_vars()
3139        reference = self._parse_references()
3140        options = {}
3141
3142        while self._match(TokenType.ON):
3143            if not self._match_set((TokenType.DELETE, TokenType.UPDATE)):
3144                self.raise_error("Expected DELETE or UPDATE")
3145
3146            kind = self._prev.text.lower()
3147
3148            if self._match(TokenType.NO_ACTION):
3149                action = "NO ACTION"
3150            elif self._match(TokenType.SET):
3151                self._match_set((TokenType.NULL, TokenType.DEFAULT))
3152                action = "SET " + self._prev.text.upper()
3153            else:
3154                self._advance()
3155                action = self._prev.text.upper()
3156
3157            options[kind] = action
3158
3159        return self.expression(
3160            exp.ForeignKey, expressions=expressions, reference=reference, **options  # type: ignore
3161        )
3162
3163    def _parse_primary_key(self) -> exp.Expression:
3164        desc = (
3165            self._match_set((TokenType.ASC, TokenType.DESC))
3166            and self._prev.token_type == TokenType.DESC
3167        )
3168
3169        if not self._match(TokenType.L_PAREN, advance=False):
3170            return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc)
3171
3172        expressions = self._parse_wrapped_id_vars()
3173        options = self._parse_key_constraint_options()
3174        return self.expression(exp.PrimaryKey, expressions=expressions, options=options)
3175
3176    def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3177        if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)):
3178            return this
3179
3180        bracket_kind = self._prev.token_type
3181        expressions: t.List[t.Optional[exp.Expression]]
3182
3183        if self._match(TokenType.COLON):
3184            expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())]
3185        else:
3186            expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction()))
3187
3188        # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
3189        if bracket_kind == TokenType.L_BRACE:
3190            this = self.expression(exp.Struct, expressions=expressions)
3191        elif not this or this.name.upper() == "ARRAY":
3192            this = self.expression(exp.Array, expressions=expressions)
3193        else:
3194            expressions = apply_index_offset(expressions, -self.index_offset)
3195            this = self.expression(exp.Bracket, this=this, expressions=expressions)
3196
3197        if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET:
3198            self.raise_error("Expected ]")
3199        elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE:
3200            self.raise_error("Expected }")
3201
3202        this.comments = self._prev_comments
3203        return self._parse_bracket(this)
3204
3205    def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3206        if self._match(TokenType.COLON):
3207            return self.expression(exp.Slice, this=this, expression=self._parse_conjunction())
3208        return this
3209
3210    def _parse_case(self) -> t.Optional[exp.Expression]:
3211        ifs = []
3212        default = None
3213
3214        expression = self._parse_conjunction()
3215
3216        while self._match(TokenType.WHEN):
3217            this = self._parse_conjunction()
3218            self._match(TokenType.THEN)
3219            then = self._parse_conjunction()
3220            ifs.append(self.expression(exp.If, this=this, true=then))
3221
3222        if self._match(TokenType.ELSE):
3223            default = self._parse_conjunction()
3224
3225        if not self._match(TokenType.END):
3226            self.raise_error("Expected END after CASE", self._prev)
3227
3228        return self._parse_window(
3229            self.expression(exp.Case, this=expression, ifs=ifs, default=default)
3230        )
3231
3232    def _parse_if(self) -> t.Optional[exp.Expression]:
3233        if self._match(TokenType.L_PAREN):
3234            args = self._parse_csv(self._parse_conjunction)
3235            this = exp.If.from_arg_list(args)
3236            self.validate_expression(this, args)
3237            self._match_r_paren()
3238        else:
3239            condition = self._parse_conjunction()
3240            self._match(TokenType.THEN)
3241            true = self._parse_conjunction()
3242            false = self._parse_conjunction() if self._match(TokenType.ELSE) else None
3243            self._match(TokenType.END)
3244            this = self.expression(exp.If, this=condition, true=true, false=false)
3245
3246        return self._parse_window(this)
3247
3248    def _parse_extract(self) -> exp.Expression:
3249        this = self._parse_function() or self._parse_var() or self._parse_type()
3250
3251        if self._match(TokenType.FROM):
3252            return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3253
3254        if not self._match(TokenType.COMMA):
3255            self.raise_error("Expected FROM or comma after EXTRACT", self._prev)
3256
3257        return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3258
3259    def _parse_cast(self, strict: bool) -> exp.Expression:
3260        this = self._parse_conjunction()
3261
3262        if not self._match(TokenType.ALIAS):
3263            self.raise_error("Expected AS after CAST")
3264
3265        to = self._parse_types()
3266
3267        if not to:
3268            self.raise_error("Expected TYPE after CAST")
3269        elif to.this == exp.DataType.Type.CHAR:
3270            if self._match(TokenType.CHARACTER_SET):
3271                to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())
3272
3273        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3274
3275    def _parse_string_agg(self) -> exp.Expression:
3276        expression: t.Optional[exp.Expression]
3277
3278        if self._match(TokenType.DISTINCT):
3279            args = self._parse_csv(self._parse_conjunction)
3280            expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)])
3281        else:
3282            args = self._parse_csv(self._parse_conjunction)
3283            expression = seq_get(args, 0)
3284
3285        index = self._index
3286        if not self._match(TokenType.R_PAREN):
3287            # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]])
3288            order = self._parse_order(this=expression)
3289            return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3290
3291        # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]).
3292        # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that
3293        # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them.
3294        if not self._match(TokenType.WITHIN_GROUP):
3295            self._retreat(index)
3296            this = exp.GroupConcat.from_arg_list(args)
3297            self.validate_expression(this, args)
3298            return this
3299
3300        self._match_l_paren()  # The corresponding match_r_paren will be called in parse_function (caller)
3301        order = self._parse_order(this=expression)
3302        return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3303
3304    def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]:
3305        to: t.Optional[exp.Expression]
3306        this = self._parse_bitwise()
3307
3308        if self._match(TokenType.USING):
3309            to = self.expression(exp.CharacterSet, this=self._parse_var())
3310        elif self._match(TokenType.COMMA):
3311            to = self._parse_bitwise()
3312        else:
3313            to = None
3314
3315        # Swap the argument order if needed to produce the correct AST
3316        if self.CONVERT_TYPE_FIRST:
3317            this, to = to, this
3318
3319        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3320
3321    def _parse_position(self, haystack_first: bool = False) -> exp.Expression:
3322        args = self._parse_csv(self._parse_bitwise)
3323
3324        if self._match(TokenType.IN):
3325            return self.expression(
3326                exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0)
3327            )
3328
3329        if haystack_first:
3330            haystack = seq_get(args, 0)
3331            needle = seq_get(args, 1)
3332        else:
3333            needle = seq_get(args, 0)
3334            haystack = seq_get(args, 1)
3335
3336        this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2))
3337
3338        self.validate_expression(this, args)
3339
3340        return this
3341
3342    def _parse_join_hint(self, func_name: str) -> exp.Expression:
3343        args = self._parse_csv(self._parse_table)
3344        return exp.JoinHint(this=func_name.upper(), expressions=args)
3345
3346    def _parse_substring(self) -> exp.Expression:
3347        # Postgres supports the form: substring(string [from int] [for int])
3348        # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
3349
3350        args = self._parse_csv(self._parse_bitwise)
3351
3352        if self._match(TokenType.FROM):
3353            args.append(self._parse_bitwise())
3354            if self._match(TokenType.FOR):
3355                args.append(self._parse_bitwise())
3356
3357        this = exp.Substring.from_arg_list(args)
3358        self.validate_expression(this, args)
3359
3360        return this
3361
3362    def _parse_trim(self) -> exp.Expression:
3363        # https://www.w3resource.com/sql/character-functions/trim.php
3364        # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html
3365
3366        position = None
3367        collation = None
3368
3369        if self._match_set(self.TRIM_TYPES):
3370            position = self._prev.text.upper()
3371
3372        expression = self._parse_term()
3373        if self._match_set((TokenType.FROM, TokenType.COMMA)):
3374            this = self._parse_term()
3375        else:
3376            this = expression
3377            expression = None
3378
3379        if self._match(TokenType.COLLATE):
3380            collation = self._parse_term()
3381
3382        return self.expression(
3383            exp.Trim,
3384            this=this,
3385            position=position,
3386            expression=expression,
3387            collation=collation,
3388        )
3389
3390    def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3391        return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window)
3392
3393    def _parse_named_window(self) -> t.Optional[exp.Expression]:
3394        return self._parse_window(self._parse_id_var(), alias=True)
3395
3396    def _parse_window(
3397        self, this: t.Optional[exp.Expression], alias: bool = False
3398    ) -> t.Optional[exp.Expression]:
3399        if self._match_pair(TokenType.FILTER, TokenType.L_PAREN):
3400            this = self.expression(exp.Filter, this=this, expression=self._parse_where())
3401            self._match_r_paren()
3402
3403        # T-SQL allows the OVER (...) syntax after WITHIN GROUP.
3404        # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
3405        if self._match(TokenType.WITHIN_GROUP):
3406            order = self._parse_wrapped(self._parse_order)
3407            this = self.expression(exp.WithinGroup, this=this, expression=order)
3408
3409        # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
3410        # Some dialects choose to implement and some do not.
3411        # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
3412
3413        # There is some code above in _parse_lambda that handles
3414        #   SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ...
3415
3416        # The below changes handle
3417        #   SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ...
3418
3419        # Oracle allows both formats
3420        #   (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html)
3421        #   and Snowflake chose to do the same for familiarity
3422        #   https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes
3423        if self._match(TokenType.IGNORE_NULLS):
3424            this = self.expression(exp.IgnoreNulls, this=this)
3425        elif self._match(TokenType.RESPECT_NULLS):
3426            this = self.expression(exp.RespectNulls, this=this)
3427
3428        # bigquery select from window x AS (partition by ...)
3429        if alias:
3430            self._match(TokenType.ALIAS)
3431        elif not self._match(TokenType.OVER):
3432            return this
3433
3434        if not self._match(TokenType.L_PAREN):
3435            return self.expression(exp.Window, this=this, alias=self._parse_id_var(False))
3436
3437        window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
3438        partition = self._parse_partition_by()
3439        order = self._parse_order()
3440        kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text
3441
3442        if kind:
3443            self._match(TokenType.BETWEEN)
3444            start = self._parse_window_spec()
3445            self._match(TokenType.AND)
3446            end = self._parse_window_spec()
3447
3448            spec = self.expression(
3449                exp.WindowSpec,
3450                kind=kind,
3451                start=start["value"],
3452                start_side=start["side"],
3453                end=end["value"],
3454                end_side=end["side"],
3455            )
3456        else:
3457            spec = None
3458
3459        self._match_r_paren()
3460
3461        return self.expression(
3462            exp.Window,
3463            this=this,
3464            partition_by=partition,
3465            order=order,
3466            spec=spec,
3467            alias=window_alias,
3468        )
3469
3470    def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]:
3471        self._match(TokenType.BETWEEN)
3472
3473        return {
3474            "value": (
3475                self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text
3476            )
3477            or self._parse_bitwise(),
3478            "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text,
3479        }
3480
3481    def _parse_alias(
3482        self, this: t.Optional[exp.Expression], explicit: bool = False
3483    ) -> t.Optional[exp.Expression]:
3484        any_token = self._match(TokenType.ALIAS)
3485
3486        if explicit and not any_token:
3487            return this
3488
3489        if self._match(TokenType.L_PAREN):
3490            aliases = self.expression(
3491                exp.Aliases,
3492                this=this,
3493                expressions=self._parse_csv(lambda: self._parse_id_var(any_token)),
3494            )
3495            self._match_r_paren(aliases)
3496            return aliases
3497
3498        alias = self._parse_id_var(any_token)
3499
3500        if alias:
3501            return self.expression(exp.Alias, this=this, alias=alias)
3502
3503        return this
3504
3505    def _parse_id_var(
3506        self,
3507        any_token: bool = True,
3508        tokens: t.Optional[t.Collection[TokenType]] = None,
3509        prefix_tokens: t.Optional[t.Collection[TokenType]] = None,
3510    ) -> t.Optional[exp.Expression]:
3511        identifier = self._parse_identifier()
3512
3513        if identifier:
3514            return identifier
3515
3516        prefix = ""
3517
3518        if prefix_tokens:
3519            while self._match_set(prefix_tokens):
3520                prefix += self._prev.text
3521
3522        if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS):
3523            quoted = self._prev.token_type == TokenType.STRING
3524            return exp.Identifier(this=prefix + self._prev.text, quoted=quoted)
3525
3526        return None
3527
3528    def _parse_string(self) -> t.Optional[exp.Expression]:
3529        if self._match(TokenType.STRING):
3530            return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev)
3531        return self._parse_placeholder()
3532
3533    def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]:
3534        return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True)
3535
3536    def _parse_number(self) -> t.Optional[exp.Expression]:
3537        if self._match(TokenType.NUMBER):
3538            return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev)
3539        return self._parse_placeholder()
3540
3541    def _parse_identifier(self) -> t.Optional[exp.Expression]:
3542        if self._match(TokenType.IDENTIFIER):
3543            return self.expression(exp.Identifier, this=self._prev.text, quoted=True)
3544        return self._parse_placeholder()
3545
3546    def _parse_var(
3547        self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None
3548    ) -> t.Optional[exp.Expression]:
3549        if (
3550            (any_token and self._advance_any())
3551            or self._match(TokenType.VAR)
3552            or (self._match_set(tokens) if tokens else False)
3553        ):
3554            return self.expression(exp.Var, this=self._prev.text)
3555        return self._parse_placeholder()
3556
3557    def _advance_any(self) -> t.Optional[Token]:
3558        if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS:
3559            self._advance()
3560            return self._prev
3561        return None
3562
3563    def _parse_var_or_string(self) -> t.Optional[exp.Expression]:
3564        return self._parse_var() or self._parse_string()
3565
3566    def _parse_null(self) -> t.Optional[exp.Expression]:
3567        if self._match(TokenType.NULL):
3568            return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev)
3569        return None
3570
3571    def _parse_boolean(self) -> t.Optional[exp.Expression]:
3572        if self._match(TokenType.TRUE):
3573            return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev)
3574        if self._match(TokenType.FALSE):
3575            return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev)
3576        return None
3577
3578    def _parse_star(self) -> t.Optional[exp.Expression]:
3579        if self._match(TokenType.STAR):
3580            return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev)
3581        return None
3582
3583    def _parse_parameter(self) -> exp.Expression:
3584        wrapped = self._match(TokenType.L_BRACE)
3585        this = self._parse_var() or self._parse_primary()
3586        self._match(TokenType.R_BRACE)
3587        return self.expression(exp.Parameter, this=this, wrapped=wrapped)
3588
3589    def _parse_placeholder(self) -> t.Optional[exp.Expression]:
3590        if self._match_set(self.PLACEHOLDER_PARSERS):
3591            placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self)
3592            if placeholder:
3593                return placeholder
3594            self._advance(-1)
3595        return None
3596
3597    def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3598        if not self._match(TokenType.EXCEPT):
3599            return None
3600        if self._match(TokenType.L_PAREN, advance=False):
3601            return self._parse_wrapped_csv(self._parse_column)
3602        return self._parse_csv(self._parse_column)
3603
3604    def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3605        if not self._match(TokenType.REPLACE):
3606            return None
3607        if self._match(TokenType.L_PAREN, advance=False):
3608            return self._parse_wrapped_csv(self._parse_expression)
3609        return self._parse_csv(self._parse_expression)
3610
3611    def _parse_csv(
3612        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
3613    ) -> t.List[t.Optional[exp.Expression]]:
3614        parse_result = parse_method()
3615        items = [parse_result] if parse_result is not None else []
3616
3617        while self._match(sep):
3618            if parse_result and self._prev_comments:
3619                parse_result.comments = self._prev_comments
3620
3621            parse_result = parse_method()
3622            if parse_result is not None:
3623                items.append(parse_result)
3624
3625        return items
3626
3627    def _parse_tokens(
3628        self, parse_method: t.Callable, expressions: t.Dict
3629    ) -> t.Optional[exp.Expression]:
3630        this = parse_method()
3631
3632        while self._match_set(expressions):
3633            this = self.expression(
3634                expressions[self._prev.token_type],
3635                this=this,
3636                comments=self._prev_comments,
3637                expression=parse_method(),
3638            )
3639
3640        return this
3641
3642    def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]:
3643        return self._parse_wrapped_csv(self._parse_id_var)
3644
3645    def _parse_wrapped_csv(
3646        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
3647    ) -> t.List[t.Optional[exp.Expression]]:
3648        return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep))
3649
3650    def _parse_wrapped(self, parse_method: t.Callable) -> t.Any:
3651        self._match_l_paren()
3652        parse_result = parse_method()
3653        self._match_r_paren()
3654        return parse_result
3655
3656    def _parse_select_or_expression(self) -> t.Optional[exp.Expression]:
3657        return self._parse_select() or self._parse_expression()
3658
3659    def _parse_ddl_select(self) -> t.Optional[exp.Expression]:
3660        return self._parse_set_operations(
3661            self._parse_select(nested=True, parse_subquery_alias=False)
3662        )
3663
3664    def _parse_transaction(self) -> exp.Expression:
3665        this = None
3666        if self._match_texts(self.TRANSACTION_KIND):
3667            this = self._prev.text
3668
3669        self._match_texts({"TRANSACTION", "WORK"})
3670
3671        modes = []
3672        while True:
3673            mode = []
3674            while self._match(TokenType.VAR):
3675                mode.append(self._prev.text)
3676
3677            if mode:
3678                modes.append(" ".join(mode))
3679            if not self._match(TokenType.COMMA):
3680                break
3681
3682        return self.expression(exp.Transaction, this=this, modes=modes)
3683
3684    def _parse_commit_or_rollback(self) -> exp.Expression:
3685        chain = None
3686        savepoint = None
3687        is_rollback = self._prev.token_type == TokenType.ROLLBACK
3688
3689        self._match_texts({"TRANSACTION", "WORK"})
3690
3691        if self._match_text_seq("TO"):
3692            self._match_text_seq("SAVEPOINT")
3693            savepoint = self._parse_id_var()
3694
3695        if self._match(TokenType.AND):
3696            chain = not self._match_text_seq("NO")
3697            self._match_text_seq("CHAIN")
3698
3699        if is_rollback:
3700            return self.expression(exp.Rollback, savepoint=savepoint)
3701        return self.expression(exp.Commit, chain=chain)
3702
3703    def _parse_add_column(self) -> t.Optional[exp.Expression]:
3704        if not self._match_text_seq("ADD"):
3705            return None
3706
3707        self._match(TokenType.COLUMN)
3708        exists_column = self._parse_exists(not_=True)
3709        expression = self._parse_column_def(self._parse_field(any_token=True))
3710
3711        if expression:
3712            expression.set("exists", exists_column)
3713
3714        return expression
3715
3716    def _parse_drop_column(self) -> t.Optional[exp.Expression]:
3717        return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN")
3718
3719    # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html
3720    def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression:
3721        return self.expression(
3722            exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists
3723        )
3724
3725    def _parse_add_constraint(self) -> t.Optional[exp.Expression]:
3726        this = None
3727        kind = self._prev.token_type
3728
3729        if kind == TokenType.CONSTRAINT:
3730            this = self._parse_id_var()
3731
3732            if self._match_text_seq("CHECK"):
3733                expression = self._parse_wrapped(self._parse_conjunction)
3734                enforced = self._match_text_seq("ENFORCED")
3735
3736                return self.expression(
3737                    exp.AddConstraint, this=this, expression=expression, enforced=enforced
3738                )
3739
3740        if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY):
3741            expression = self._parse_foreign_key()
3742        elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY):
3743            expression = self._parse_primary_key()
3744
3745        return self.expression(exp.AddConstraint, this=this, expression=expression)
3746
3747    def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]:
3748        index = self._index - 1
3749
3750        if self._match_set(self.ADD_CONSTRAINT_TOKENS):
3751            return self._parse_csv(self._parse_add_constraint)
3752
3753        self._retreat(index)
3754        return self._parse_csv(self._parse_add_column)
3755
3756    def _parse_alter_table_alter(self) -> exp.Expression:
3757        self._match(TokenType.COLUMN)
3758        column = self._parse_field(any_token=True)
3759
3760        if self._match_pair(TokenType.DROP, TokenType.DEFAULT):
3761            return self.expression(exp.AlterColumn, this=column, drop=True)
3762        if self._match_pair(TokenType.SET, TokenType.DEFAULT):
3763            return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction())
3764
3765        self._match_text_seq("SET", "DATA")
3766        return self.expression(
3767            exp.AlterColumn,
3768            this=column,
3769            dtype=self._match_text_seq("TYPE") and self._parse_types(),
3770            collate=self._match(TokenType.COLLATE) and self._parse_term(),
3771            using=self._match(TokenType.USING) and self._parse_conjunction(),
3772        )
3773
3774    def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]:
3775        index = self._index - 1
3776
3777        partition_exists = self._parse_exists()
3778        if self._match(TokenType.PARTITION, advance=False):
3779            return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists))
3780
3781        self._retreat(index)
3782        return self._parse_csv(self._parse_drop_column)
3783
3784    def _parse_alter_table_rename(self) -> exp.Expression:
3785        self._match_text_seq("TO")
3786        return self.expression(exp.RenameTable, this=self._parse_table(schema=True))
3787
3788    def _parse_alter(self) -> t.Optional[exp.Expression]:
3789        start = self._prev
3790
3791        if not self._match(TokenType.TABLE):
3792            return self._parse_as_command(start)
3793
3794        exists = self._parse_exists()
3795        this = self._parse_table(schema=True)
3796
3797        if self._next:
3798            self._advance()
3799        parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None
3800
3801        if parser:
3802            return self.expression(
3803                exp.AlterTable,
3804                this=this,
3805                exists=exists,
3806                actions=ensure_list(parser(self)),
3807            )
3808        return self._parse_as_command(start)
3809
3810    def _parse_merge(self) -> exp.Expression:
3811        self._match(TokenType.INTO)
3812        target = self._parse_table()
3813
3814        self._match(TokenType.USING)
3815        using = self._parse_table()
3816
3817        self._match(TokenType.ON)
3818        on = self._parse_conjunction()
3819
3820        whens = []
3821        while self._match(TokenType.WHEN):
3822            matched = not self._match(TokenType.NOT)
3823            self._match_text_seq("MATCHED")
3824            source = (
3825                False
3826                if self._match_text_seq("BY", "TARGET")
3827                else self._match_text_seq("BY", "SOURCE")
3828            )
3829            condition = self._parse_conjunction() if self._match(TokenType.AND) else None
3830
3831            self._match(TokenType.THEN)
3832
3833            if self._match(TokenType.INSERT):
3834                _this = self._parse_star()
3835                if _this:
3836                    then = self.expression(exp.Insert, this=_this)
3837                else:
3838                    then = self.expression(
3839                        exp.Insert,
3840                        this=self._parse_value(),
3841                        expression=self._match(TokenType.VALUES) and self._parse_value(),
3842                    )
3843            elif self._match(TokenType.UPDATE):
3844                expressions = self._parse_star()
3845                if expressions:
3846                    then = self.expression(exp.Update, expressions=expressions)
3847                else:
3848                    then = self.expression(
3849                        exp.Update,
3850                        expressions=self._match(TokenType.SET)
3851                        and self._parse_csv(self._parse_equality),
3852                    )
3853            elif self._match(TokenType.DELETE):
3854                then = self.expression(exp.Var, this=self._prev.text)
3855            else:
3856                then = None
3857
3858            whens.append(
3859                self.expression(
3860                    exp.When,
3861                    matched=matched,
3862                    source=source,
3863                    condition=condition,
3864                    then=then,
3865                )
3866            )
3867
3868        return self.expression(
3869            exp.Merge,
3870            this=target,
3871            using=using,
3872            on=on,
3873            expressions=whens,
3874        )
3875
3876    def _parse_show(self) -> t.Optional[exp.Expression]:
3877        parser = self._find_parser(self.SHOW_PARSERS, self._show_trie)  # type: ignore
3878        if parser:
3879            return parser(self)
3880        self._advance()
3881        return self.expression(exp.Show, this=self._prev.text.upper())
3882
3883    def _parse_set_item_assignment(
3884        self, kind: t.Optional[str] = None
3885    ) -> t.Optional[exp.Expression]:
3886        index = self._index
3887
3888        if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"):
3889            return self._parse_set_transaction(global_=kind == "GLOBAL")
3890
3891        left = self._parse_primary() or self._parse_id_var()
3892
3893        if not self._match_texts(("=", "TO")):
3894            self._retreat(index)
3895            return None
3896
3897        right = self._parse_statement() or self._parse_id_var()
3898        this = self.expression(
3899            exp.EQ,
3900            this=left,
3901            expression=right,
3902        )
3903
3904        return self.expression(
3905            exp.SetItem,
3906            this=this,
3907            kind=kind,
3908        )
3909
3910    def _parse_set_transaction(self, global_: bool = False) -> exp.Expression:
3911        self._match_text_seq("TRANSACTION")
3912        characteristics = self._parse_csv(
3913            lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS)
3914        )
3915        return self.expression(
3916            exp.SetItem,
3917            expressions=characteristics,
3918            kind="TRANSACTION",
3919            **{"global": global_},  # type: ignore
3920        )
3921
3922    def _parse_set_item(self) -> t.Optional[exp.Expression]:
3923        parser = self._find_parser(self.SET_PARSERS, self._set_trie)  # type: ignore
3924        return parser(self) if parser else self._parse_set_item_assignment(kind=None)
3925
3926    def _parse_set(self) -> exp.Expression:
3927        index = self._index
3928        set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item))
3929
3930        if self._curr:
3931            self._retreat(index)
3932            return self._parse_as_command(self._prev)
3933
3934        return set_
3935
3936    def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]:
3937        for option in options:
3938            if self._match_text_seq(*option.split(" ")):
3939                return exp.Var(this=option)
3940        return None
3941
3942    def _parse_as_command(self, start: Token) -> exp.Command:
3943        while self._curr:
3944            self._advance()
3945        text = self._find_sql(start, self._prev)
3946        size = len(start.text)
3947        return exp.Command(this=text[:size], expression=text[size:])
3948
3949    def _find_parser(
3950        self, parsers: t.Dict[str, t.Callable], trie: t.Dict
3951    ) -> t.Optional[t.Callable]:
3952        if not self._curr:
3953            return None
3954
3955        index = self._index
3956        this = []
3957        while True:
3958            # The current token might be multiple words
3959            curr = self._curr.text.upper()
3960            key = curr.split(" ")
3961            this.append(curr)
3962            self._advance()
3963            result, trie = in_trie(trie, key)
3964            if result == 0:
3965                break
3966            if result == 2:
3967                subparser = parsers[" ".join(this)]
3968                return subparser
3969        self._retreat(index)
3970        return None
3971
3972    def _match(self, token_type, advance=True):
3973        if not self._curr:
3974            return None
3975
3976        if self._curr.token_type == token_type:
3977            if advance:
3978                self._advance()
3979            return True
3980
3981        return None
3982
3983    def _match_set(self, types, advance=True):
3984        if not self._curr:
3985            return None
3986
3987        if self._curr.token_type in types:
3988            if advance:
3989                self._advance()
3990            return True
3991
3992        return None
3993
3994    def _match_pair(self, token_type_a, token_type_b, advance=True):
3995        if not self._curr or not self._next:
3996            return None
3997
3998        if self._curr.token_type == token_type_a and self._next.token_type == token_type_b:
3999            if advance:
4000                self._advance(2)
4001            return True
4002
4003        return None
4004
4005    def _match_l_paren(self, expression=None):
4006        if not self._match(TokenType.L_PAREN):
4007            self.raise_error("Expecting (")
4008        if expression and self._prev_comments:
4009            expression.comments = self._prev_comments
4010
4011    def _match_r_paren(self, expression=None):
4012        if not self._match(TokenType.R_PAREN):
4013            self.raise_error("Expecting )")
4014        if expression and self._prev_comments:
4015            expression.comments = self._prev_comments
4016
4017    def _match_texts(self, texts, advance=True):
4018        if self._curr and self._curr.text.upper() in texts:
4019            if advance:
4020                self._advance()
4021            return True
4022        return False
4023
4024    def _match_text_seq(self, *texts, advance=True):
4025        index = self._index
4026        for text in texts:
4027            if self._curr and self._curr.text.upper() == text:
4028                self._advance()
4029            else:
4030                self._retreat(index)
4031                return False
4032
4033        if not advance:
4034            self._retreat(index)
4035
4036        return True
4037
4038    def _replace_columns_with_dots(self, this):
4039        if isinstance(this, exp.Dot):
4040            exp.replace_children(this, self._replace_columns_with_dots)
4041        elif isinstance(this, exp.Column):
4042            exp.replace_children(this, self._replace_columns_with_dots)
4043            table = this.args.get("table")
4044            this = (
4045                self.expression(exp.Dot, this=table, expression=this.this)
4046                if table
4047                else self.expression(exp.Var, this=this.name)
4048            )
4049        elif isinstance(this, exp.Identifier):
4050            this = self.expression(exp.Var, this=this.name)
4051        return this
4052
4053    def _replace_lambda(self, node, lambda_variables):
4054        if isinstance(node, exp.Column):
4055            if node.name in lambda_variables:
4056                return node.this
4057        return node

Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer and produces a parsed syntax tree.

Arguments:
  • error_level: the desired error level. Default: ErrorLevel.RAISE
  • error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
  • index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
  • alias_post_tablesample: If the table alias comes after tablesample. Default: False
  • max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
  • null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
Parser( error_level: Optional[sqlglot.errors.ErrorLevel] = None, error_message_context: int = 100, index_offset: int = 0, unnest_column_only: bool = False, alias_post_tablesample: bool = False, max_errors: int = 3, null_ordering: Optional[str] = None)
742    def __init__(
743        self,
744        error_level: t.Optional[ErrorLevel] = None,
745        error_message_context: int = 100,
746        index_offset: int = 0,
747        unnest_column_only: bool = False,
748        alias_post_tablesample: bool = False,
749        max_errors: int = 3,
750        null_ordering: t.Optional[str] = None,
751    ):
752        self.error_level = error_level or ErrorLevel.IMMEDIATE
753        self.error_message_context = error_message_context
754        self.index_offset = index_offset
755        self.unnest_column_only = unnest_column_only
756        self.alias_post_tablesample = alias_post_tablesample
757        self.max_errors = max_errors
758        self.null_ordering = null_ordering
759        self.reset()
def reset(self):
761    def reset(self):
762        self.sql = ""
763        self.errors = []
764        self._tokens = []
765        self._index = 0
766        self._curr = None
767        self._next = None
768        self._prev = None
769        self._prev_comments = None
def parse( self, raw_tokens: List[sqlglot.tokens.Token], sql: Optional[str] = None) -> List[Optional[sqlglot.expressions.Expression]]:
771    def parse(
772        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
773    ) -> t.List[t.Optional[exp.Expression]]:
774        """
775        Parses a list of tokens and returns a list of syntax trees, one tree
776        per parsed SQL statement.
777
778        Args:
779            raw_tokens: the list of tokens.
780            sql: the original SQL string, used to produce helpful debug messages.
781
782        Returns:
783            The list of syntax trees.
784        """
785        return self._parse(
786            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
787        )

Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.

Arguments:
  • raw_tokens: the list of tokens.
  • sql: the original SQL string, used to produce helpful debug messages.
Returns:

The list of syntax trees.

def parse_into( self, expression_types: Union[str, Type[sqlglot.expressions.Expression], Collection[Union[str, Type[sqlglot.expressions.Expression]]]], raw_tokens: List[sqlglot.tokens.Token], sql: Optional[str] = None) -> List[Optional[sqlglot.expressions.Expression]]:
789    def parse_into(
790        self,
791        expression_types: exp.IntoType,
792        raw_tokens: t.List[Token],
793        sql: t.Optional[str] = None,
794    ) -> t.List[t.Optional[exp.Expression]]:
795        """
796        Parses a list of tokens into a given Expression type. If a collection of Expression
797        types is given instead, this method will try to parse the token list into each one
798        of them, stopping at the first for which the parsing succeeds.
799
800        Args:
801            expression_types: the expression type(s) to try and parse the token list into.
802            raw_tokens: the list of tokens.
803            sql: the original SQL string, used to produce helpful debug messages.
804
805        Returns:
806            The target Expression.
807        """
808        errors = []
809        for expression_type in ensure_collection(expression_types):
810            parser = self.EXPRESSION_PARSERS.get(expression_type)
811            if not parser:
812                raise TypeError(f"No parser registered for {expression_type}")
813            try:
814                return self._parse(parser, raw_tokens, sql)
815            except ParseError as e:
816                e.errors[0]["into_expression"] = expression_type
817                errors.append(e)
818        raise ParseError(
819            f"Failed to parse into {expression_types}",
820            errors=merge_errors(errors),
821        ) from errors[-1]

Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.

Arguments:
  • expression_types: the expression type(s) to try and parse the token list into.
  • raw_tokens: the list of tokens.
  • sql: the original SQL string, used to produce helpful debug messages.
Returns:

The target Expression.

def check_errors(self) -> None:
857    def check_errors(self) -> None:
858        """
859        Logs or raises any found errors, depending on the chosen error level setting.
860        """
861        if self.error_level == ErrorLevel.WARN:
862            for error in self.errors:
863                logger.error(str(error))
864        elif self.error_level == ErrorLevel.RAISE and self.errors:
865            raise ParseError(
866                concat_messages(self.errors, self.max_errors),
867                errors=merge_errors(self.errors),
868            )

Logs or raises any found errors, depending on the chosen error level setting.

def raise_error(self, message: str, token: Optional[sqlglot.tokens.Token] = None) -> None:
870    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
871        """
872        Appends an error in the list of recorded errors or raises it, depending on the chosen
873        error level setting.
874        """
875        token = token or self._curr or self._prev or Token.string("")
876        start = self._find_token(token)
877        end = start + len(token.text)
878        start_context = self.sql[max(start - self.error_message_context, 0) : start]
879        highlight = self.sql[start:end]
880        end_context = self.sql[end : end + self.error_message_context]
881
882        error = ParseError.new(
883            f"{message}. Line {token.line}, Col: {token.col}.\n"
884            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
885            description=message,
886            line=token.line,
887            col=token.col,
888            start_context=start_context,
889            highlight=highlight,
890            end_context=end_context,
891        )
892
893        if self.error_level == ErrorLevel.IMMEDIATE:
894            raise error
895
896        self.errors.append(error)

Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.

def expression( self, exp_class: Type[sqlglot.expressions.Expression], comments: Optional[List[str]] = None, **kwargs) -> sqlglot.expressions.Expression:
898    def expression(
899        self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs
900    ) -> exp.Expression:
901        """
902        Creates a new, validated Expression.
903
904        Args:
905            exp_class: the expression class to instantiate.
906            comments: an optional list of comments to attach to the expression.
907            kwargs: the arguments to set for the expression along with their respective values.
908
909        Returns:
910            The target expression.
911        """
912        instance = exp_class(**kwargs)
913        if self._prev_comments:
914            instance.comments = self._prev_comments
915            self._prev_comments = None
916        if comments:
917            instance.comments = comments
918        self.validate_expression(instance)
919        return instance

Creates a new, validated Expression.

Arguments:
  • exp_class: the expression class to instantiate.
  • comments: an optional list of comments to attach to the expression.
  • kwargs: the arguments to set for the expression along with their respective values.
Returns:

The target expression.

def validate_expression( self, expression: sqlglot.expressions.Expression, args: Optional[List] = None) -> None:
921    def validate_expression(
922        self, expression: exp.Expression, args: t.Optional[t.List] = None
923    ) -> None:
924        """
925        Validates an already instantiated expression, making sure that all its mandatory arguments
926        are set.
927
928        Args:
929            expression: the expression to validate.
930            args: an optional list of items that was used to instantiate the expression, if it's a Func.
931        """
932        if self.error_level == ErrorLevel.IGNORE:
933            return
934
935        for error_message in expression.error_messages(args):
936            self.raise_error(error_message)

Validates an already instantiated expression, making sure that all its mandatory arguments are set.

Arguments:
  • expression: the expression to validate.
  • args: an optional list of items that was used to instantiate the expression, if it's a Func.