Edit on GitHub

sqlglot.parser

   1from __future__ import annotations
   2
   3import logging
   4import typing as t
   5from collections import defaultdict
   6
   7from sqlglot import exp
   8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors
   9from sqlglot.helper import (
  10    apply_index_offset,
  11    count_params,
  12    ensure_collection,
  13    ensure_list,
  14    seq_get,
  15)
  16from sqlglot.tokens import Token, Tokenizer, TokenType
  17from sqlglot.trie import in_trie, new_trie
  18
  19logger = logging.getLogger("sqlglot")
  20
  21E = t.TypeVar("E", bound=exp.Expression)
  22
  23
  24def parse_var_map(args: t.Sequence) -> exp.Expression:
  25    if len(args) == 1 and args[0].is_star:
  26        return exp.StarMap(this=args[0])
  27
  28    keys = []
  29    values = []
  30    for i in range(0, len(args), 2):
  31        keys.append(args[i])
  32        values.append(args[i + 1])
  33    return exp.VarMap(
  34        keys=exp.Array(expressions=keys),
  35        values=exp.Array(expressions=values),
  36    )
  37
  38
  39def parse_like(args):
  40    like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0))
  41    return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like
  42
  43
  44def binary_range_parser(
  45    expr_type: t.Type[exp.Expression],
  46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]:
  47    return lambda self, this: self._parse_escape(
  48        self.expression(expr_type, this=this, expression=self._parse_bitwise())
  49    )
  50
  51
  52class _Parser(type):
  53    def __new__(cls, clsname, bases, attrs):
  54        klass = super().__new__(cls, clsname, bases, attrs)
  55        klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS)
  56        klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS)
  57
  58        return klass
  59
  60
  61class Parser(metaclass=_Parser):
  62    """
  63    Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces
  64    a parsed syntax tree.
  65
  66    Args:
  67        error_level: the desired error level.
  68            Default: ErrorLevel.RAISE
  69        error_message_context: determines the amount of context to capture from a
  70            query string when displaying the error message (in number of characters).
  71            Default: 50.
  72        index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list.
  73            Default: 0
  74        alias_post_tablesample: If the table alias comes after tablesample.
  75            Default: False
  76        max_errors: Maximum number of error messages to include in a raised ParseError.
  77            This is only relevant if error_level is ErrorLevel.RAISE.
  78            Default: 3
  79        null_ordering: Indicates the default null ordering method to use if not explicitly set.
  80            Options are "nulls_are_small", "nulls_are_large", "nulls_are_last".
  81            Default: "nulls_are_small"
  82    """
  83
  84    FUNCTIONS: t.Dict[str, t.Callable] = {
  85        **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()},
  86        "DATE_TO_DATE_STR": lambda args: exp.Cast(
  87            this=seq_get(args, 0),
  88            to=exp.DataType(this=exp.DataType.Type.TEXT),
  89        ),
  90        "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)),
  91        "IFNULL": exp.Coalesce.from_arg_list,
  92        "LIKE": parse_like,
  93        "TIME_TO_TIME_STR": lambda args: exp.Cast(
  94            this=seq_get(args, 0),
  95            to=exp.DataType(this=exp.DataType.Type.TEXT),
  96        ),
  97        "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
  98            this=exp.Cast(
  99                this=seq_get(args, 0),
 100                to=exp.DataType(this=exp.DataType.Type.TEXT),
 101            ),
 102            start=exp.Literal.number(1),
 103            length=exp.Literal.number(10),
 104        ),
 105        "VAR_MAP": parse_var_map,
 106    }
 107
 108    NO_PAREN_FUNCTIONS = {
 109        TokenType.CURRENT_DATE: exp.CurrentDate,
 110        TokenType.CURRENT_DATETIME: exp.CurrentDate,
 111        TokenType.CURRENT_TIME: exp.CurrentTime,
 112        TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
 113        TokenType.CURRENT_USER: exp.CurrentUser,
 114    }
 115
 116    JOIN_HINTS: t.Set[str] = set()
 117
 118    NESTED_TYPE_TOKENS = {
 119        TokenType.ARRAY,
 120        TokenType.MAP,
 121        TokenType.STRUCT,
 122        TokenType.NULLABLE,
 123    }
 124
 125    TYPE_TOKENS = {
 126        TokenType.BIT,
 127        TokenType.BOOLEAN,
 128        TokenType.TINYINT,
 129        TokenType.SMALLINT,
 130        TokenType.INT,
 131        TokenType.BIGINT,
 132        TokenType.FLOAT,
 133        TokenType.DOUBLE,
 134        TokenType.CHAR,
 135        TokenType.NCHAR,
 136        TokenType.VARCHAR,
 137        TokenType.NVARCHAR,
 138        TokenType.TEXT,
 139        TokenType.MEDIUMTEXT,
 140        TokenType.LONGTEXT,
 141        TokenType.MEDIUMBLOB,
 142        TokenType.LONGBLOB,
 143        TokenType.BINARY,
 144        TokenType.VARBINARY,
 145        TokenType.JSON,
 146        TokenType.JSONB,
 147        TokenType.INTERVAL,
 148        TokenType.TIME,
 149        TokenType.TIMESTAMP,
 150        TokenType.TIMESTAMPTZ,
 151        TokenType.TIMESTAMPLTZ,
 152        TokenType.DATETIME,
 153        TokenType.DATE,
 154        TokenType.DECIMAL,
 155        TokenType.BIGDECIMAL,
 156        TokenType.UUID,
 157        TokenType.GEOGRAPHY,
 158        TokenType.GEOMETRY,
 159        TokenType.HLLSKETCH,
 160        TokenType.HSTORE,
 161        TokenType.PSEUDO_TYPE,
 162        TokenType.SUPER,
 163        TokenType.SERIAL,
 164        TokenType.SMALLSERIAL,
 165        TokenType.BIGSERIAL,
 166        TokenType.XML,
 167        TokenType.UNIQUEIDENTIFIER,
 168        TokenType.MONEY,
 169        TokenType.SMALLMONEY,
 170        TokenType.ROWVERSION,
 171        TokenType.IMAGE,
 172        TokenType.VARIANT,
 173        TokenType.OBJECT,
 174        TokenType.INET,
 175        *NESTED_TYPE_TOKENS,
 176    }
 177
 178    SUBQUERY_PREDICATES = {
 179        TokenType.ANY: exp.Any,
 180        TokenType.ALL: exp.All,
 181        TokenType.EXISTS: exp.Exists,
 182        TokenType.SOME: exp.Any,
 183    }
 184
 185    RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT}
 186
 187    DB_CREATABLES = {
 188        TokenType.DATABASE,
 189        TokenType.SCHEMA,
 190        TokenType.TABLE,
 191        TokenType.VIEW,
 192    }
 193
 194    CREATABLES = {
 195        TokenType.COLUMN,
 196        TokenType.FUNCTION,
 197        TokenType.INDEX,
 198        TokenType.PROCEDURE,
 199        *DB_CREATABLES,
 200    }
 201
 202    ID_VAR_TOKENS = {
 203        TokenType.VAR,
 204        TokenType.ANTI,
 205        TokenType.APPLY,
 206        TokenType.AUTO_INCREMENT,
 207        TokenType.BEGIN,
 208        TokenType.BOTH,
 209        TokenType.BUCKET,
 210        TokenType.CACHE,
 211        TokenType.CASCADE,
 212        TokenType.COLLATE,
 213        TokenType.COMMAND,
 214        TokenType.COMMENT,
 215        TokenType.COMMIT,
 216        TokenType.COMPOUND,
 217        TokenType.CONSTRAINT,
 218        TokenType.DEFAULT,
 219        TokenType.DELETE,
 220        TokenType.DESCRIBE,
 221        TokenType.DIV,
 222        TokenType.END,
 223        TokenType.EXECUTE,
 224        TokenType.ESCAPE,
 225        TokenType.FALSE,
 226        TokenType.FIRST,
 227        TokenType.FILTER,
 228        TokenType.FOLLOWING,
 229        TokenType.FORMAT,
 230        TokenType.FULL,
 231        TokenType.IF,
 232        TokenType.IS,
 233        TokenType.ISNULL,
 234        TokenType.INTERVAL,
 235        TokenType.LAZY,
 236        TokenType.LEADING,
 237        TokenType.LEFT,
 238        TokenType.LOCAL,
 239        TokenType.MATERIALIZED,
 240        TokenType.MERGE,
 241        TokenType.NATURAL,
 242        TokenType.NEXT,
 243        TokenType.OFFSET,
 244        TokenType.ONLY,
 245        TokenType.OPTIONS,
 246        TokenType.ORDINALITY,
 247        TokenType.OVERWRITE,
 248        TokenType.PARTITION,
 249        TokenType.PERCENT,
 250        TokenType.PIVOT,
 251        TokenType.PRAGMA,
 252        TokenType.PRECEDING,
 253        TokenType.RANGE,
 254        TokenType.REFERENCES,
 255        TokenType.RIGHT,
 256        TokenType.ROW,
 257        TokenType.ROWS,
 258        TokenType.SEED,
 259        TokenType.SEMI,
 260        TokenType.SET,
 261        TokenType.SHOW,
 262        TokenType.SORTKEY,
 263        TokenType.TEMPORARY,
 264        TokenType.TOP,
 265        TokenType.TRAILING,
 266        TokenType.TRUE,
 267        TokenType.UNBOUNDED,
 268        TokenType.UNIQUE,
 269        TokenType.UNLOGGED,
 270        TokenType.UNPIVOT,
 271        TokenType.VOLATILE,
 272        TokenType.WINDOW,
 273        *CREATABLES,
 274        *SUBQUERY_PREDICATES,
 275        *TYPE_TOKENS,
 276        *NO_PAREN_FUNCTIONS,
 277    }
 278
 279    INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END}
 280
 281    TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {
 282        TokenType.APPLY,
 283        TokenType.FULL,
 284        TokenType.LEFT,
 285        TokenType.NATURAL,
 286        TokenType.OFFSET,
 287        TokenType.RIGHT,
 288        TokenType.WINDOW,
 289    }
 290
 291    COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS}
 292
 293    UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
 294
 295    TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH}
 296
 297    FUNC_TOKENS = {
 298        TokenType.COMMAND,
 299        TokenType.CURRENT_DATE,
 300        TokenType.CURRENT_DATETIME,
 301        TokenType.CURRENT_TIMESTAMP,
 302        TokenType.CURRENT_TIME,
 303        TokenType.CURRENT_USER,
 304        TokenType.FILTER,
 305        TokenType.FIRST,
 306        TokenType.FORMAT,
 307        TokenType.GLOB,
 308        TokenType.IDENTIFIER,
 309        TokenType.INDEX,
 310        TokenType.ISNULL,
 311        TokenType.ILIKE,
 312        TokenType.LIKE,
 313        TokenType.MERGE,
 314        TokenType.OFFSET,
 315        TokenType.PRIMARY_KEY,
 316        TokenType.REPLACE,
 317        TokenType.ROW,
 318        TokenType.UNNEST,
 319        TokenType.VAR,
 320        TokenType.LEFT,
 321        TokenType.RIGHT,
 322        TokenType.DATE,
 323        TokenType.DATETIME,
 324        TokenType.TABLE,
 325        TokenType.TIMESTAMP,
 326        TokenType.TIMESTAMPTZ,
 327        TokenType.WINDOW,
 328        *TYPE_TOKENS,
 329        *SUBQUERY_PREDICATES,
 330    }
 331
 332    CONJUNCTION = {
 333        TokenType.AND: exp.And,
 334        TokenType.OR: exp.Or,
 335    }
 336
 337    EQUALITY = {
 338        TokenType.EQ: exp.EQ,
 339        TokenType.NEQ: exp.NEQ,
 340        TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
 341    }
 342
 343    COMPARISON = {
 344        TokenType.GT: exp.GT,
 345        TokenType.GTE: exp.GTE,
 346        TokenType.LT: exp.LT,
 347        TokenType.LTE: exp.LTE,
 348    }
 349
 350    BITWISE = {
 351        TokenType.AMP: exp.BitwiseAnd,
 352        TokenType.CARET: exp.BitwiseXor,
 353        TokenType.PIPE: exp.BitwiseOr,
 354        TokenType.DPIPE: exp.DPipe,
 355    }
 356
 357    TERM = {
 358        TokenType.DASH: exp.Sub,
 359        TokenType.PLUS: exp.Add,
 360        TokenType.MOD: exp.Mod,
 361        TokenType.COLLATE: exp.Collate,
 362    }
 363
 364    FACTOR = {
 365        TokenType.DIV: exp.IntDiv,
 366        TokenType.LR_ARROW: exp.Distance,
 367        TokenType.SLASH: exp.Div,
 368        TokenType.STAR: exp.Mul,
 369    }
 370
 371    TIMESTAMPS = {
 372        TokenType.TIME,
 373        TokenType.TIMESTAMP,
 374        TokenType.TIMESTAMPTZ,
 375        TokenType.TIMESTAMPLTZ,
 376    }
 377
 378    SET_OPERATIONS = {
 379        TokenType.UNION,
 380        TokenType.INTERSECT,
 381        TokenType.EXCEPT,
 382    }
 383
 384    JOIN_SIDES = {
 385        TokenType.LEFT,
 386        TokenType.RIGHT,
 387        TokenType.FULL,
 388    }
 389
 390    JOIN_KINDS = {
 391        TokenType.INNER,
 392        TokenType.OUTER,
 393        TokenType.CROSS,
 394        TokenType.SEMI,
 395        TokenType.ANTI,
 396    }
 397
 398    LAMBDAS = {
 399        TokenType.ARROW: lambda self, expressions: self.expression(
 400            exp.Lambda,
 401            this=self._replace_lambda(
 402                self._parse_conjunction(),
 403                {node.name for node in expressions},
 404            ),
 405            expressions=expressions,
 406        ),
 407        TokenType.FARROW: lambda self, expressions: self.expression(
 408            exp.Kwarg,
 409            this=exp.Var(this=expressions[0].name),
 410            expression=self._parse_conjunction(),
 411        ),
 412    }
 413
 414    COLUMN_OPERATORS = {
 415        TokenType.DOT: None,
 416        TokenType.DCOLON: lambda self, this, to: self.expression(
 417            exp.Cast if self.STRICT_CAST else exp.TryCast,
 418            this=this,
 419            to=to,
 420        ),
 421        TokenType.ARROW: lambda self, this, path: self.expression(
 422            exp.JSONExtract,
 423            this=this,
 424            expression=path,
 425        ),
 426        TokenType.DARROW: lambda self, this, path: self.expression(
 427            exp.JSONExtractScalar,
 428            this=this,
 429            expression=path,
 430        ),
 431        TokenType.HASH_ARROW: lambda self, this, path: self.expression(
 432            exp.JSONBExtract,
 433            this=this,
 434            expression=path,
 435        ),
 436        TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
 437            exp.JSONBExtractScalar,
 438            this=this,
 439            expression=path,
 440        ),
 441        TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
 442            exp.JSONBContains,
 443            this=this,
 444            expression=key,
 445        ),
 446    }
 447
 448    EXPRESSION_PARSERS = {
 449        exp.Column: lambda self: self._parse_column(),
 450        exp.DataType: lambda self: self._parse_types(),
 451        exp.From: lambda self: self._parse_from(),
 452        exp.Group: lambda self: self._parse_group(),
 453        exp.Identifier: lambda self: self._parse_id_var(),
 454        exp.Lateral: lambda self: self._parse_lateral(),
 455        exp.Join: lambda self: self._parse_join(),
 456        exp.Order: lambda self: self._parse_order(),
 457        exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
 458        exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
 459        exp.Lambda: lambda self: self._parse_lambda(),
 460        exp.Limit: lambda self: self._parse_limit(),
 461        exp.Offset: lambda self: self._parse_offset(),
 462        exp.TableAlias: lambda self: self._parse_table_alias(),
 463        exp.Table: lambda self: self._parse_table(),
 464        exp.Condition: lambda self: self._parse_conjunction(),
 465        exp.Expression: lambda self: self._parse_statement(),
 466        exp.Properties: lambda self: self._parse_properties(),
 467        exp.Where: lambda self: self._parse_where(),
 468        exp.Ordered: lambda self: self._parse_ordered(),
 469        exp.Having: lambda self: self._parse_having(),
 470        exp.With: lambda self: self._parse_with(),
 471        exp.Window: lambda self: self._parse_named_window(),
 472        exp.Qualify: lambda self: self._parse_qualify(),
 473        exp.Returning: lambda self: self._parse_returning(),
 474        "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(),
 475    }
 476
 477    STATEMENT_PARSERS = {
 478        TokenType.ALTER: lambda self: self._parse_alter(),
 479        TokenType.BEGIN: lambda self: self._parse_transaction(),
 480        TokenType.CACHE: lambda self: self._parse_cache(),
 481        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
 482        TokenType.COMMENT: lambda self: self._parse_comment(),
 483        TokenType.CREATE: lambda self: self._parse_create(),
 484        TokenType.DELETE: lambda self: self._parse_delete(),
 485        TokenType.DESC: lambda self: self._parse_describe(),
 486        TokenType.DESCRIBE: lambda self: self._parse_describe(),
 487        TokenType.DROP: lambda self: self._parse_drop(),
 488        TokenType.END: lambda self: self._parse_commit_or_rollback(),
 489        TokenType.INSERT: lambda self: self._parse_insert(),
 490        TokenType.LOAD_DATA: lambda self: self._parse_load_data(),
 491        TokenType.MERGE: lambda self: self._parse_merge(),
 492        TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()),
 493        TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
 494        TokenType.SET: lambda self: self._parse_set(),
 495        TokenType.UNCACHE: lambda self: self._parse_uncache(),
 496        TokenType.UPDATE: lambda self: self._parse_update(),
 497        TokenType.USE: lambda self: self.expression(
 498            exp.Use,
 499            kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"))
 500            and exp.Var(this=self._prev.text),
 501            this=self._parse_table(schema=False),
 502        ),
 503    }
 504
 505    UNARY_PARSERS = {
 506        TokenType.PLUS: lambda self: self._parse_unary(),  # Unary + is handled as a no-op
 507        TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()),
 508        TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()),
 509        TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()),
 510    }
 511
 512    PRIMARY_PARSERS = {
 513        TokenType.STRING: lambda self, token: self.expression(
 514            exp.Literal, this=token.text, is_string=True
 515        ),
 516        TokenType.NUMBER: lambda self, token: self.expression(
 517            exp.Literal, this=token.text, is_string=False
 518        ),
 519        TokenType.STAR: lambda self, _: self.expression(
 520            exp.Star,
 521            **{"except": self._parse_except(), "replace": self._parse_replace()},
 522        ),
 523        TokenType.NULL: lambda self, _: self.expression(exp.Null),
 524        TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True),
 525        TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False),
 526        TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text),
 527        TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text),
 528        TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text),
 529        TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
 530        TokenType.NATIONAL: lambda self, token: self._parse_national(token),
 531        TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
 532    }
 533
 534    PLACEHOLDER_PARSERS = {
 535        TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
 536        TokenType.PARAMETER: lambda self: self._parse_parameter(),
 537        TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text)
 538        if self._match_set((TokenType.NUMBER, TokenType.VAR))
 539        else None,
 540    }
 541
 542    RANGE_PARSERS = {
 543        TokenType.BETWEEN: lambda self, this: self._parse_between(this),
 544        TokenType.GLOB: binary_range_parser(exp.Glob),
 545        TokenType.OVERLAPS: binary_range_parser(exp.Overlaps),
 546        TokenType.IN: lambda self, this: self._parse_in(this),
 547        TokenType.IS: lambda self, this: self._parse_is(this),
 548        TokenType.LIKE: binary_range_parser(exp.Like),
 549        TokenType.ILIKE: binary_range_parser(exp.ILike),
 550        TokenType.IRLIKE: binary_range_parser(exp.RegexpILike),
 551        TokenType.RLIKE: binary_range_parser(exp.RegexpLike),
 552        TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo),
 553    }
 554
 555    PROPERTY_PARSERS = {
 556        "AFTER": lambda self: self._parse_afterjournal(
 557            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 558        ),
 559        "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
 560        "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
 561        "BEFORE": lambda self: self._parse_journal(
 562            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 563        ),
 564        "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
 565        "CHARACTER SET": lambda self: self._parse_character_set(),
 566        "CHECKSUM": lambda self: self._parse_checksum(),
 567        "CLUSTER BY": lambda self: self.expression(
 568            exp.Cluster, expressions=self._parse_csv(self._parse_ordered)
 569        ),
 570        "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty),
 571        "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
 572        "DATABLOCKSIZE": lambda self: self._parse_datablocksize(
 573            default=self._prev.text.upper() == "DEFAULT"
 574        ),
 575        "DEFINER": lambda self: self._parse_definer(),
 576        "DETERMINISTIC": lambda self: self.expression(
 577            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 578        ),
 579        "DISTKEY": lambda self: self._parse_distkey(),
 580        "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
 581        "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
 582        "EXTERNAL": lambda self: self.expression(exp.ExternalProperty),
 583        "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"),
 584        "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 585        "FREESPACE": lambda self: self._parse_freespace(),
 586        "GLOBAL": lambda self: self._parse_temporary(global_=True),
 587        "IMMUTABLE": lambda self: self.expression(
 588            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 589        ),
 590        "JOURNAL": lambda self: self._parse_journal(
 591            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 592        ),
 593        "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
 594        "LIKE": lambda self: self._parse_create_like(),
 595        "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True),
 596        "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
 597        "LOCK": lambda self: self._parse_locking(),
 598        "LOCKING": lambda self: self._parse_locking(),
 599        "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"),
 600        "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty),
 601        "MAX": lambda self: self._parse_datablocksize(),
 602        "MAXIMUM": lambda self: self._parse_datablocksize(),
 603        "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio(
 604            no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT"
 605        ),
 606        "MIN": lambda self: self._parse_datablocksize(),
 607        "MINIMUM": lambda self: self._parse_datablocksize(),
 608        "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True),
 609        "NO": lambda self: self._parse_noprimaryindex(),
 610        "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False),
 611        "ON": lambda self: self._parse_oncommit(),
 612        "PARTITION BY": lambda self: self._parse_partitioned_by(),
 613        "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
 614        "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
 615        "RETURNS": lambda self: self._parse_returns(),
 616        "ROW": lambda self: self._parse_row(),
 617        "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty),
 618        "SET": lambda self: self.expression(exp.SetProperty, multi=False),
 619        "SORTKEY": lambda self: self._parse_sortkey(),
 620        "STABLE": lambda self: self.expression(
 621            exp.StabilityProperty, this=exp.Literal.string("STABLE")
 622        ),
 623        "STORED": lambda self: self._parse_stored(),
 624        "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty),
 625        "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property),
 626        "TEMP": lambda self: self._parse_temporary(global_=False),
 627        "TEMPORARY": lambda self: self._parse_temporary(global_=False),
 628        "TRANSIENT": lambda self: self.expression(exp.TransientProperty),
 629        "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty),
 630        "VOLATILE": lambda self: self._parse_volatile_property(),
 631        "WITH": lambda self: self._parse_with_property(),
 632    }
 633
 634    CONSTRAINT_PARSERS = {
 635        "AUTOINCREMENT": lambda self: self._parse_auto_increment(),
 636        "AUTO_INCREMENT": lambda self: self._parse_auto_increment(),
 637        "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False),
 638        "CHARACTER SET": lambda self: self.expression(
 639            exp.CharacterSetColumnConstraint, this=self._parse_var_or_string()
 640        ),
 641        "CHECK": lambda self: self.expression(
 642            exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction)
 643        ),
 644        "COLLATE": lambda self: self.expression(
 645            exp.CollateColumnConstraint, this=self._parse_var()
 646        ),
 647        "COMMENT": lambda self: self.expression(
 648            exp.CommentColumnConstraint, this=self._parse_string()
 649        ),
 650        "COMPRESS": lambda self: self._parse_compress(),
 651        "DEFAULT": lambda self: self.expression(
 652            exp.DefaultColumnConstraint, this=self._parse_bitwise()
 653        ),
 654        "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()),
 655        "FOREIGN KEY": lambda self: self._parse_foreign_key(),
 656        "FORMAT": lambda self: self.expression(
 657            exp.DateFormatColumnConstraint, this=self._parse_var_or_string()
 658        ),
 659        "GENERATED": lambda self: self._parse_generated_as_identity(),
 660        "IDENTITY": lambda self: self._parse_auto_increment(),
 661        "INLINE": lambda self: self._parse_inline(),
 662        "LIKE": lambda self: self._parse_create_like(),
 663        "NOT": lambda self: self._parse_not_constraint(),
 664        "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True),
 665        "ON": lambda self: self._match(TokenType.UPDATE)
 666        and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()),
 667        "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()),
 668        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 669        "REFERENCES": lambda self: self._parse_references(match=False),
 670        "TITLE": lambda self: self.expression(
 671            exp.TitleColumnConstraint, this=self._parse_var_or_string()
 672        ),
 673        "UNIQUE": lambda self: self._parse_unique(),
 674        "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint),
 675    }
 676
 677    ALTER_PARSERS = {
 678        "ADD": lambda self: self._parse_alter_table_add(),
 679        "ALTER": lambda self: self._parse_alter_table_alter(),
 680        "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()),
 681        "DROP": lambda self: self._parse_alter_table_drop(),
 682        "RENAME": lambda self: self._parse_alter_table_rename(),
 683    }
 684
 685    SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"}
 686
 687    NO_PAREN_FUNCTION_PARSERS = {
 688        TokenType.CASE: lambda self: self._parse_case(),
 689        TokenType.IF: lambda self: self._parse_if(),
 690        TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()),
 691    }
 692
 693    FUNCTION_PARSERS: t.Dict[str, t.Callable] = {
 694        "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
 695        "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
 696        "DECODE": lambda self: self._parse_decode(),
 697        "EXTRACT": lambda self: self._parse_extract(),
 698        "JSON_OBJECT": lambda self: self._parse_json_object(),
 699        "LOG": lambda self: self._parse_logarithm(),
 700        "MATCH": lambda self: self._parse_match_against(),
 701        "POSITION": lambda self: self._parse_position(),
 702        "STRING_AGG": lambda self: self._parse_string_agg(),
 703        "SUBSTRING": lambda self: self._parse_substring(),
 704        "TRIM": lambda self: self._parse_trim(),
 705        "TRY_CAST": lambda self: self._parse_cast(False),
 706        "TRY_CONVERT": lambda self: self._parse_convert(False),
 707    }
 708
 709    QUERY_MODIFIER_PARSERS = {
 710        "match": lambda self: self._parse_match_recognize(),
 711        "where": lambda self: self._parse_where(),
 712        "group": lambda self: self._parse_group(),
 713        "having": lambda self: self._parse_having(),
 714        "qualify": lambda self: self._parse_qualify(),
 715        "windows": lambda self: self._parse_window_clause(),
 716        "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute),
 717        "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
 718        "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
 719        "order": lambda self: self._parse_order(),
 720        "limit": lambda self: self._parse_limit(),
 721        "offset": lambda self: self._parse_offset(),
 722        "lock": lambda self: self._parse_lock(),
 723        "sample": lambda self: self._parse_table_sample(as_modifier=True),
 724    }
 725
 726    SET_PARSERS = {
 727        "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"),
 728        "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"),
 729        "SESSION": lambda self: self._parse_set_item_assignment("SESSION"),
 730        "TRANSACTION": lambda self: self._parse_set_transaction(),
 731    }
 732
 733    SHOW_PARSERS: t.Dict[str, t.Callable] = {}
 734
 735    TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {}
 736
 737    MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
 738
 739    TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
 740
 741    TRANSACTION_CHARACTERISTICS = {
 742        "ISOLATION LEVEL REPEATABLE READ",
 743        "ISOLATION LEVEL READ COMMITTED",
 744        "ISOLATION LEVEL READ UNCOMMITTED",
 745        "ISOLATION LEVEL SERIALIZABLE",
 746        "READ WRITE",
 747        "READ ONLY",
 748    }
 749
 750    INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
 751
 752    WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
 753
 754    ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY}
 755
 756    STRICT_CAST = True
 757
 758    CONVERT_TYPE_FIRST = False
 759
 760    QUOTED_PIVOT_COLUMNS: t.Optional[bool] = None
 761    PREFIXED_PIVOT_COLUMNS = False
 762
 763    LOG_BASE_FIRST = True
 764    LOG_DEFAULTS_TO_LN = False
 765
 766    __slots__ = (
 767        "error_level",
 768        "error_message_context",
 769        "sql",
 770        "errors",
 771        "index_offset",
 772        "unnest_column_only",
 773        "alias_post_tablesample",
 774        "max_errors",
 775        "null_ordering",
 776        "_tokens",
 777        "_index",
 778        "_curr",
 779        "_next",
 780        "_prev",
 781        "_prev_comments",
 782        "_show_trie",
 783        "_set_trie",
 784    )
 785
 786    def __init__(
 787        self,
 788        error_level: t.Optional[ErrorLevel] = None,
 789        error_message_context: int = 100,
 790        index_offset: int = 0,
 791        unnest_column_only: bool = False,
 792        alias_post_tablesample: bool = False,
 793        max_errors: int = 3,
 794        null_ordering: t.Optional[str] = None,
 795    ):
 796        self.error_level = error_level or ErrorLevel.IMMEDIATE
 797        self.error_message_context = error_message_context
 798        self.index_offset = index_offset
 799        self.unnest_column_only = unnest_column_only
 800        self.alias_post_tablesample = alias_post_tablesample
 801        self.max_errors = max_errors
 802        self.null_ordering = null_ordering
 803        self.reset()
 804
 805    def reset(self):
 806        self.sql = ""
 807        self.errors = []
 808        self._tokens = []
 809        self._index = 0
 810        self._curr = None
 811        self._next = None
 812        self._prev = None
 813        self._prev_comments = None
 814
 815    def parse(
 816        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
 817    ) -> t.List[t.Optional[exp.Expression]]:
 818        """
 819        Parses a list of tokens and returns a list of syntax trees, one tree
 820        per parsed SQL statement.
 821
 822        Args:
 823            raw_tokens: the list of tokens.
 824            sql: the original SQL string, used to produce helpful debug messages.
 825
 826        Returns:
 827            The list of syntax trees.
 828        """
 829        return self._parse(
 830            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
 831        )
 832
 833    def parse_into(
 834        self,
 835        expression_types: exp.IntoType,
 836        raw_tokens: t.List[Token],
 837        sql: t.Optional[str] = None,
 838    ) -> t.List[t.Optional[exp.Expression]]:
 839        """
 840        Parses a list of tokens into a given Expression type. If a collection of Expression
 841        types is given instead, this method will try to parse the token list into each one
 842        of them, stopping at the first for which the parsing succeeds.
 843
 844        Args:
 845            expression_types: the expression type(s) to try and parse the token list into.
 846            raw_tokens: the list of tokens.
 847            sql: the original SQL string, used to produce helpful debug messages.
 848
 849        Returns:
 850            The target Expression.
 851        """
 852        errors = []
 853        for expression_type in ensure_collection(expression_types):
 854            parser = self.EXPRESSION_PARSERS.get(expression_type)
 855            if not parser:
 856                raise TypeError(f"No parser registered for {expression_type}")
 857            try:
 858                return self._parse(parser, raw_tokens, sql)
 859            except ParseError as e:
 860                e.errors[0]["into_expression"] = expression_type
 861                errors.append(e)
 862        raise ParseError(
 863            f"Failed to parse into {expression_types}",
 864            errors=merge_errors(errors),
 865        ) from errors[-1]
 866
 867    def _parse(
 868        self,
 869        parse_method: t.Callable[[Parser], t.Optional[exp.Expression]],
 870        raw_tokens: t.List[Token],
 871        sql: t.Optional[str] = None,
 872    ) -> t.List[t.Optional[exp.Expression]]:
 873        self.reset()
 874        self.sql = sql or ""
 875        total = len(raw_tokens)
 876        chunks: t.List[t.List[Token]] = [[]]
 877
 878        for i, token in enumerate(raw_tokens):
 879            if token.token_type == TokenType.SEMICOLON:
 880                if i < total - 1:
 881                    chunks.append([])
 882            else:
 883                chunks[-1].append(token)
 884
 885        expressions = []
 886
 887        for tokens in chunks:
 888            self._index = -1
 889            self._tokens = tokens
 890            self._advance()
 891
 892            expressions.append(parse_method(self))
 893
 894            if self._index < len(self._tokens):
 895                self.raise_error("Invalid expression / Unexpected token")
 896
 897            self.check_errors()
 898
 899        return expressions
 900
 901    def check_errors(self) -> None:
 902        """
 903        Logs or raises any found errors, depending on the chosen error level setting.
 904        """
 905        if self.error_level == ErrorLevel.WARN:
 906            for error in self.errors:
 907                logger.error(str(error))
 908        elif self.error_level == ErrorLevel.RAISE and self.errors:
 909            raise ParseError(
 910                concat_messages(self.errors, self.max_errors),
 911                errors=merge_errors(self.errors),
 912            )
 913
 914    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
 915        """
 916        Appends an error in the list of recorded errors or raises it, depending on the chosen
 917        error level setting.
 918        """
 919        token = token or self._curr or self._prev or Token.string("")
 920        start = token.start
 921        end = token.end
 922        start_context = self.sql[max(start - self.error_message_context, 0) : start]
 923        highlight = self.sql[start:end]
 924        end_context = self.sql[end : end + self.error_message_context]
 925
 926        error = ParseError.new(
 927            f"{message}. Line {token.line}, Col: {token.col}.\n"
 928            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
 929            description=message,
 930            line=token.line,
 931            col=token.col,
 932            start_context=start_context,
 933            highlight=highlight,
 934            end_context=end_context,
 935        )
 936
 937        if self.error_level == ErrorLevel.IMMEDIATE:
 938            raise error
 939
 940        self.errors.append(error)
 941
 942    def expression(
 943        self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs
 944    ) -> E:
 945        """
 946        Creates a new, validated Expression.
 947
 948        Args:
 949            exp_class: the expression class to instantiate.
 950            comments: an optional list of comments to attach to the expression.
 951            kwargs: the arguments to set for the expression along with their respective values.
 952
 953        Returns:
 954            The target expression.
 955        """
 956        instance = exp_class(**kwargs)
 957        if self._prev_comments:
 958            instance.comments = self._prev_comments
 959            self._prev_comments = None
 960        if comments:
 961            instance.comments = comments
 962        self.validate_expression(instance)
 963        return instance
 964
 965    def validate_expression(
 966        self, expression: exp.Expression, args: t.Optional[t.List] = None
 967    ) -> None:
 968        """
 969        Validates an already instantiated expression, making sure that all its mandatory arguments
 970        are set.
 971
 972        Args:
 973            expression: the expression to validate.
 974            args: an optional list of items that was used to instantiate the expression, if it's a Func.
 975        """
 976        if self.error_level == ErrorLevel.IGNORE:
 977            return
 978
 979        for error_message in expression.error_messages(args):
 980            self.raise_error(error_message)
 981
 982    def _find_sql(self, start: Token, end: Token) -> str:
 983        return self.sql[start.start : end.end]
 984
 985    def _advance(self, times: int = 1) -> None:
 986        self._index += times
 987        self._curr = seq_get(self._tokens, self._index)
 988        self._next = seq_get(self._tokens, self._index + 1)
 989        if self._index > 0:
 990            self._prev = self._tokens[self._index - 1]
 991            self._prev_comments = self._prev.comments
 992        else:
 993            self._prev = None
 994            self._prev_comments = None
 995
 996    def _retreat(self, index: int) -> None:
 997        if index != self._index:
 998            self._advance(index - self._index)
 999
1000    def _parse_command(self) -> exp.Command:
1001        return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string())
1002
1003    def _parse_comment(self, allow_exists: bool = True) -> exp.Expression:
1004        start = self._prev
1005        exists = self._parse_exists() if allow_exists else None
1006
1007        self._match(TokenType.ON)
1008
1009        kind = self._match_set(self.CREATABLES) and self._prev
1010
1011        if not kind:
1012            return self._parse_as_command(start)
1013
1014        if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1015            this = self._parse_user_defined_function(kind=kind.token_type)
1016        elif kind.token_type == TokenType.TABLE:
1017            this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS)
1018        elif kind.token_type == TokenType.COLUMN:
1019            this = self._parse_column()
1020        else:
1021            this = self._parse_id_var()
1022
1023        self._match(TokenType.IS)
1024
1025        return self.expression(
1026            exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists
1027        )
1028
1029    def _parse_statement(self) -> t.Optional[exp.Expression]:
1030        if self._curr is None:
1031            return None
1032
1033        if self._match_set(self.STATEMENT_PARSERS):
1034            return self.STATEMENT_PARSERS[self._prev.token_type](self)
1035
1036        if self._match_set(Tokenizer.COMMANDS):
1037            return self._parse_command()
1038
1039        expression = self._parse_expression()
1040        expression = self._parse_set_operations(expression) if expression else self._parse_select()
1041
1042        self._parse_query_modifiers(expression)
1043        return expression
1044
1045    def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]:
1046        start = self._prev
1047        temporary = self._match(TokenType.TEMPORARY)
1048        materialized = self._match(TokenType.MATERIALIZED)
1049        kind = self._match_set(self.CREATABLES) and self._prev.text
1050        if not kind:
1051            return self._parse_as_command(start)
1052
1053        return self.expression(
1054            exp.Drop,
1055            exists=self._parse_exists(),
1056            this=self._parse_table(schema=True),
1057            kind=kind,
1058            temporary=temporary,
1059            materialized=materialized,
1060            cascade=self._match(TokenType.CASCADE),
1061            constraints=self._match_text_seq("CONSTRAINTS"),
1062            purge=self._match_text_seq("PURGE"),
1063        )
1064
1065    def _parse_exists(self, not_: bool = False) -> t.Optional[bool]:
1066        return (
1067            self._match(TokenType.IF)
1068            and (not not_ or self._match(TokenType.NOT))
1069            and self._match(TokenType.EXISTS)
1070        )
1071
1072    def _parse_create(self) -> t.Optional[exp.Expression]:
1073        start = self._prev
1074        replace = self._prev.text.upper() == "REPLACE" or self._match_pair(
1075            TokenType.OR, TokenType.REPLACE
1076        )
1077        unique = self._match(TokenType.UNIQUE)
1078
1079        if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
1080            self._match(TokenType.TABLE)
1081
1082        properties = None
1083        create_token = self._match_set(self.CREATABLES) and self._prev
1084
1085        if not create_token:
1086            properties = self._parse_properties()  # exp.Properties.Location.POST_CREATE
1087            create_token = self._match_set(self.CREATABLES) and self._prev
1088
1089            if not properties or not create_token:
1090                return self._parse_as_command(start)
1091
1092        exists = self._parse_exists(not_=True)
1093        this = None
1094        expression = None
1095        indexes = None
1096        no_schema_binding = None
1097        begin = None
1098
1099        if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1100            this = self._parse_user_defined_function(kind=create_token.token_type)
1101            temp_properties = self._parse_properties()
1102            if properties and temp_properties:
1103                properties.expressions.extend(temp_properties.expressions)
1104            elif temp_properties:
1105                properties = temp_properties
1106
1107            self._match(TokenType.ALIAS)
1108            begin = self._match(TokenType.BEGIN)
1109            return_ = self._match_text_seq("RETURN")
1110            expression = self._parse_statement()
1111
1112            if return_:
1113                expression = self.expression(exp.Return, this=expression)
1114        elif create_token.token_type == TokenType.INDEX:
1115            this = self._parse_index()
1116        elif create_token.token_type in self.DB_CREATABLES:
1117            table_parts = self._parse_table_parts(schema=True)
1118
1119            # exp.Properties.Location.POST_NAME
1120            if self._match(TokenType.COMMA):
1121                temp_properties = self._parse_properties(before=True)
1122                if properties and temp_properties:
1123                    properties.expressions.extend(temp_properties.expressions)
1124                elif temp_properties:
1125                    properties = temp_properties
1126
1127            this = self._parse_schema(this=table_parts)
1128
1129            # exp.Properties.Location.POST_SCHEMA and POST_WITH
1130            temp_properties = self._parse_properties()
1131            if properties and temp_properties:
1132                properties.expressions.extend(temp_properties.expressions)
1133            elif temp_properties:
1134                properties = temp_properties
1135
1136            self._match(TokenType.ALIAS)
1137
1138            # exp.Properties.Location.POST_ALIAS
1139            if not (
1140                self._match(TokenType.SELECT, advance=False)
1141                or self._match(TokenType.WITH, advance=False)
1142                or self._match(TokenType.L_PAREN, advance=False)
1143            ):
1144                temp_properties = self._parse_properties()
1145                if properties and temp_properties:
1146                    properties.expressions.extend(temp_properties.expressions)
1147                elif temp_properties:
1148                    properties = temp_properties
1149
1150            expression = self._parse_ddl_select()
1151
1152            if create_token.token_type == TokenType.TABLE:
1153                # exp.Properties.Location.POST_EXPRESSION
1154                temp_properties = self._parse_properties()
1155                if properties and temp_properties:
1156                    properties.expressions.extend(temp_properties.expressions)
1157                elif temp_properties:
1158                    properties = temp_properties
1159
1160                indexes = []
1161                while True:
1162                    index = self._parse_create_table_index()
1163
1164                    # exp.Properties.Location.POST_INDEX
1165                    if self._match(TokenType.PARTITION_BY, advance=False):
1166                        temp_properties = self._parse_properties()
1167                        if properties and temp_properties:
1168                            properties.expressions.extend(temp_properties.expressions)
1169                        elif temp_properties:
1170                            properties = temp_properties
1171
1172                    if not index:
1173                        break
1174                    else:
1175                        indexes.append(index)
1176            elif create_token.token_type == TokenType.VIEW:
1177                if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
1178                    no_schema_binding = True
1179
1180        return self.expression(
1181            exp.Create,
1182            this=this,
1183            kind=create_token.text,
1184            replace=replace,
1185            unique=unique,
1186            expression=expression,
1187            exists=exists,
1188            properties=properties,
1189            indexes=indexes,
1190            no_schema_binding=no_schema_binding,
1191            begin=begin,
1192        )
1193
1194    def _parse_property_before(self) -> t.Optional[exp.Expression]:
1195        self._match(TokenType.COMMA)
1196
1197        # parsers look to _prev for no/dual/default, so need to consume first
1198        self._match_text_seq("NO")
1199        self._match_text_seq("DUAL")
1200        self._match_text_seq("DEFAULT")
1201
1202        if self.PROPERTY_PARSERS.get(self._curr.text.upper()):
1203            return self.PROPERTY_PARSERS[self._curr.text.upper()](self)
1204
1205        return None
1206
1207    def _parse_property(self) -> t.Optional[exp.Expression]:
1208        if self._match_texts(self.PROPERTY_PARSERS):
1209            return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
1210
1211        if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET):
1212            return self._parse_character_set(default=True)
1213
1214        if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY):
1215            return self._parse_sortkey(compound=True)
1216
1217        if self._match_text_seq("SQL", "SECURITY"):
1218            return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER"))
1219
1220        assignment = self._match_pair(
1221            TokenType.VAR, TokenType.EQ, advance=False
1222        ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False)
1223
1224        if assignment:
1225            key = self._parse_var_or_string()
1226            self._match(TokenType.EQ)
1227            return self.expression(exp.Property, this=key, value=self._parse_column())
1228
1229        return None
1230
1231    def _parse_stored(self) -> exp.Expression:
1232        self._match(TokenType.ALIAS)
1233
1234        input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None
1235        output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None
1236
1237        return self.expression(
1238            exp.FileFormatProperty,
1239            this=self.expression(
1240                exp.InputOutputFormat, input_format=input_format, output_format=output_format
1241            )
1242            if input_format or output_format
1243            else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
1244        )
1245
1246    def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression:
1247        self._match(TokenType.EQ)
1248        self._match(TokenType.ALIAS)
1249        return self.expression(
1250            exp_class,
1251            this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
1252        )
1253
1254    def _parse_properties(self, before=None) -> t.Optional[exp.Expression]:
1255        properties = []
1256
1257        while True:
1258            if before:
1259                identified_property = self._parse_property_before()
1260            else:
1261                identified_property = self._parse_property()
1262
1263            if not identified_property:
1264                break
1265            for p in ensure_list(identified_property):
1266                properties.append(p)
1267
1268        if properties:
1269            return self.expression(exp.Properties, expressions=properties)
1270
1271        return None
1272
1273    def _parse_fallback(self, no=False) -> exp.Expression:
1274        self._match_text_seq("FALLBACK")
1275        return self.expression(
1276            exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
1277        )
1278
1279    def _parse_volatile_property(self) -> exp.Expression:
1280        if self._index >= 2:
1281            pre_volatile_token = self._tokens[self._index - 2]
1282        else:
1283            pre_volatile_token = None
1284
1285        if pre_volatile_token and pre_volatile_token.token_type in (
1286            TokenType.CREATE,
1287            TokenType.REPLACE,
1288            TokenType.UNIQUE,
1289        ):
1290            return exp.VolatileProperty()
1291
1292        return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE"))
1293
1294    def _parse_with_property(
1295        self,
1296    ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]:
1297        self._match(TokenType.WITH)
1298        if self._match(TokenType.L_PAREN, advance=False):
1299            return self._parse_wrapped_csv(self._parse_property)
1300
1301        if self._match_text_seq("JOURNAL"):
1302            return self._parse_withjournaltable()
1303
1304        if self._match_text_seq("DATA"):
1305            return self._parse_withdata(no=False)
1306        elif self._match_text_seq("NO", "DATA"):
1307            return self._parse_withdata(no=True)
1308
1309        if not self._next:
1310            return None
1311
1312        return self._parse_withisolatedloading()
1313
1314    # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
1315    def _parse_definer(self) -> t.Optional[exp.Expression]:
1316        self._match(TokenType.EQ)
1317
1318        user = self._parse_id_var()
1319        self._match(TokenType.PARAMETER)
1320        host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
1321
1322        if not user or not host:
1323            return None
1324
1325        return exp.DefinerProperty(this=f"{user}@{host}")
1326
1327    def _parse_withjournaltable(self) -> exp.Expression:
1328        self._match(TokenType.TABLE)
1329        self._match(TokenType.EQ)
1330        return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
1331
1332    def _parse_log(self, no=False) -> exp.Expression:
1333        self._match_text_seq("LOG")
1334        return self.expression(exp.LogProperty, no=no)
1335
1336    def _parse_journal(self, no=False, dual=False) -> exp.Expression:
1337        before = self._match_text_seq("BEFORE")
1338        self._match_text_seq("JOURNAL")
1339        return self.expression(exp.JournalProperty, no=no, dual=dual, before=before)
1340
1341    def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression:
1342        self._match_text_seq("NOT")
1343        self._match_text_seq("LOCAL")
1344        self._match_text_seq("AFTER", "JOURNAL")
1345        return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local)
1346
1347    def _parse_checksum(self) -> exp.Expression:
1348        self._match_text_seq("CHECKSUM")
1349        self._match(TokenType.EQ)
1350
1351        on = None
1352        if self._match(TokenType.ON):
1353            on = True
1354        elif self._match_text_seq("OFF"):
1355            on = False
1356        default = self._match(TokenType.DEFAULT)
1357
1358        return self.expression(
1359            exp.ChecksumProperty,
1360            on=on,
1361            default=default,
1362        )
1363
1364    def _parse_freespace(self) -> exp.Expression:
1365        self._match_text_seq("FREESPACE")
1366        self._match(TokenType.EQ)
1367        return self.expression(
1368            exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
1369        )
1370
1371    def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression:
1372        self._match_text_seq("MERGEBLOCKRATIO")
1373        if self._match(TokenType.EQ):
1374            return self.expression(
1375                exp.MergeBlockRatioProperty,
1376                this=self._parse_number(),
1377                percent=self._match(TokenType.PERCENT),
1378            )
1379        else:
1380            return self.expression(
1381                exp.MergeBlockRatioProperty,
1382                no=no,
1383                default=default,
1384            )
1385
1386    def _parse_datablocksize(self, default=None) -> exp.Expression:
1387        if default:
1388            self._match_text_seq("DATABLOCKSIZE")
1389            return self.expression(exp.DataBlocksizeProperty, default=True)
1390        elif self._match_texts(("MIN", "MINIMUM")):
1391            self._match_text_seq("DATABLOCKSIZE")
1392            return self.expression(exp.DataBlocksizeProperty, min=True)
1393        elif self._match_texts(("MAX", "MAXIMUM")):
1394            self._match_text_seq("DATABLOCKSIZE")
1395            return self.expression(exp.DataBlocksizeProperty, min=False)
1396
1397        self._match_text_seq("DATABLOCKSIZE")
1398        self._match(TokenType.EQ)
1399        size = self._parse_number()
1400        units = None
1401        if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
1402            units = self._prev.text
1403        return self.expression(exp.DataBlocksizeProperty, size=size, units=units)
1404
1405    def _parse_blockcompression(self) -> exp.Expression:
1406        self._match_text_seq("BLOCKCOMPRESSION")
1407        self._match(TokenType.EQ)
1408        always = self._match_text_seq("ALWAYS")
1409        manual = self._match_text_seq("MANUAL")
1410        never = self._match_text_seq("NEVER")
1411        default = self._match_text_seq("DEFAULT")
1412        autotemp = None
1413        if self._match_text_seq("AUTOTEMP"):
1414            autotemp = self._parse_schema()
1415
1416        return self.expression(
1417            exp.BlockCompressionProperty,
1418            always=always,
1419            manual=manual,
1420            never=never,
1421            default=default,
1422            autotemp=autotemp,
1423        )
1424
1425    def _parse_withisolatedloading(self) -> exp.Expression:
1426        no = self._match_text_seq("NO")
1427        concurrent = self._match_text_seq("CONCURRENT")
1428        self._match_text_seq("ISOLATED", "LOADING")
1429        for_all = self._match_text_seq("FOR", "ALL")
1430        for_insert = self._match_text_seq("FOR", "INSERT")
1431        for_none = self._match_text_seq("FOR", "NONE")
1432        return self.expression(
1433            exp.IsolatedLoadingProperty,
1434            no=no,
1435            concurrent=concurrent,
1436            for_all=for_all,
1437            for_insert=for_insert,
1438            for_none=for_none,
1439        )
1440
1441    def _parse_locking(self) -> exp.Expression:
1442        if self._match(TokenType.TABLE):
1443            kind = "TABLE"
1444        elif self._match(TokenType.VIEW):
1445            kind = "VIEW"
1446        elif self._match(TokenType.ROW):
1447            kind = "ROW"
1448        elif self._match_text_seq("DATABASE"):
1449            kind = "DATABASE"
1450        else:
1451            kind = None
1452
1453        if kind in ("DATABASE", "TABLE", "VIEW"):
1454            this = self._parse_table_parts()
1455        else:
1456            this = None
1457
1458        if self._match(TokenType.FOR):
1459            for_or_in = "FOR"
1460        elif self._match(TokenType.IN):
1461            for_or_in = "IN"
1462        else:
1463            for_or_in = None
1464
1465        if self._match_text_seq("ACCESS"):
1466            lock_type = "ACCESS"
1467        elif self._match_texts(("EXCL", "EXCLUSIVE")):
1468            lock_type = "EXCLUSIVE"
1469        elif self._match_text_seq("SHARE"):
1470            lock_type = "SHARE"
1471        elif self._match_text_seq("READ"):
1472            lock_type = "READ"
1473        elif self._match_text_seq("WRITE"):
1474            lock_type = "WRITE"
1475        elif self._match_text_seq("CHECKSUM"):
1476            lock_type = "CHECKSUM"
1477        else:
1478            lock_type = None
1479
1480        override = self._match_text_seq("OVERRIDE")
1481
1482        return self.expression(
1483            exp.LockingProperty,
1484            this=this,
1485            kind=kind,
1486            for_or_in=for_or_in,
1487            lock_type=lock_type,
1488            override=override,
1489        )
1490
1491    def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]:
1492        if self._match(TokenType.PARTITION_BY):
1493            return self._parse_csv(self._parse_conjunction)
1494        return []
1495
1496    def _parse_partitioned_by(self) -> exp.Expression:
1497        self._match(TokenType.EQ)
1498        return self.expression(
1499            exp.PartitionedByProperty,
1500            this=self._parse_schema() or self._parse_bracket(self._parse_field()),
1501        )
1502
1503    def _parse_withdata(self, no=False) -> exp.Expression:
1504        if self._match_text_seq("AND", "STATISTICS"):
1505            statistics = True
1506        elif self._match_text_seq("AND", "NO", "STATISTICS"):
1507            statistics = False
1508        else:
1509            statistics = None
1510
1511        return self.expression(exp.WithDataProperty, no=no, statistics=statistics)
1512
1513    def _parse_noprimaryindex(self) -> exp.Expression:
1514        self._match_text_seq("PRIMARY", "INDEX")
1515        return exp.NoPrimaryIndexProperty()
1516
1517    def _parse_oncommit(self) -> exp.Expression:
1518        self._match_text_seq("COMMIT", "PRESERVE", "ROWS")
1519        return exp.OnCommitProperty()
1520
1521    def _parse_distkey(self) -> exp.Expression:
1522        return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
1523
1524    def _parse_create_like(self) -> t.Optional[exp.Expression]:
1525        table = self._parse_table(schema=True)
1526        options = []
1527        while self._match_texts(("INCLUDING", "EXCLUDING")):
1528            this = self._prev.text.upper()
1529            id_var = self._parse_id_var()
1530
1531            if not id_var:
1532                return None
1533
1534            options.append(
1535                self.expression(
1536                    exp.Property,
1537                    this=this,
1538                    value=exp.Var(this=id_var.this.upper()),
1539                )
1540            )
1541        return self.expression(exp.LikeProperty, this=table, expressions=options)
1542
1543    def _parse_sortkey(self, compound: bool = False) -> exp.Expression:
1544        return self.expression(
1545            exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound
1546        )
1547
1548    def _parse_character_set(self, default: bool = False) -> exp.Expression:
1549        self._match(TokenType.EQ)
1550        return self.expression(
1551            exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default
1552        )
1553
1554    def _parse_returns(self) -> exp.Expression:
1555        value: t.Optional[exp.Expression]
1556        is_table = self._match(TokenType.TABLE)
1557
1558        if is_table:
1559            if self._match(TokenType.LT):
1560                value = self.expression(
1561                    exp.Schema,
1562                    this="TABLE",
1563                    expressions=self._parse_csv(self._parse_struct_kwargs),
1564                )
1565                if not self._match(TokenType.GT):
1566                    self.raise_error("Expecting >")
1567            else:
1568                value = self._parse_schema(exp.Var(this="TABLE"))
1569        else:
1570            value = self._parse_types()
1571
1572        return self.expression(exp.ReturnsProperty, this=value, is_table=is_table)
1573
1574    def _parse_temporary(self, global_=False) -> exp.Expression:
1575        self._match(TokenType.TEMPORARY)  # in case calling from "GLOBAL"
1576        return self.expression(exp.TemporaryProperty, global_=global_)
1577
1578    def _parse_describe(self) -> exp.Expression:
1579        kind = self._match_set(self.CREATABLES) and self._prev.text
1580        this = self._parse_table()
1581
1582        return self.expression(exp.Describe, this=this, kind=kind)
1583
1584    def _parse_insert(self) -> exp.Expression:
1585        overwrite = self._match(TokenType.OVERWRITE)
1586        local = self._match(TokenType.LOCAL)
1587        alternative = None
1588
1589        if self._match_text_seq("DIRECTORY"):
1590            this: t.Optional[exp.Expression] = self.expression(
1591                exp.Directory,
1592                this=self._parse_var_or_string(),
1593                local=local,
1594                row_format=self._parse_row_format(match_row=True),
1595            )
1596        else:
1597            if self._match(TokenType.OR):
1598                alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text
1599
1600            self._match(TokenType.INTO)
1601            self._match(TokenType.TABLE)
1602            this = self._parse_table(schema=True)
1603
1604        return self.expression(
1605            exp.Insert,
1606            this=this,
1607            exists=self._parse_exists(),
1608            partition=self._parse_partition(),
1609            expression=self._parse_ddl_select(),
1610            conflict=self._parse_on_conflict(),
1611            returning=self._parse_returning(),
1612            overwrite=overwrite,
1613            alternative=alternative,
1614        )
1615
1616    def _parse_on_conflict(self) -> t.Optional[exp.Expression]:
1617        conflict = self._match_text_seq("ON", "CONFLICT")
1618        duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY")
1619
1620        if not (conflict or duplicate):
1621            return None
1622
1623        nothing = None
1624        expressions = None
1625        key = None
1626        constraint = None
1627
1628        if conflict:
1629            if self._match_text_seq("ON", "CONSTRAINT"):
1630                constraint = self._parse_id_var()
1631            else:
1632                key = self._parse_csv(self._parse_value)
1633
1634        self._match_text_seq("DO")
1635        if self._match_text_seq("NOTHING"):
1636            nothing = True
1637        else:
1638            self._match(TokenType.UPDATE)
1639            expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality)
1640
1641        return self.expression(
1642            exp.OnConflict,
1643            duplicate=duplicate,
1644            expressions=expressions,
1645            nothing=nothing,
1646            key=key,
1647            constraint=constraint,
1648        )
1649
1650    def _parse_returning(self) -> t.Optional[exp.Expression]:
1651        if not self._match(TokenType.RETURNING):
1652            return None
1653
1654        return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column))
1655
1656    def _parse_row(self) -> t.Optional[exp.Expression]:
1657        if not self._match(TokenType.FORMAT):
1658            return None
1659        return self._parse_row_format()
1660
1661    def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]:
1662        if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
1663            return None
1664
1665        if self._match_text_seq("SERDE"):
1666            return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string())
1667
1668        self._match_text_seq("DELIMITED")
1669
1670        kwargs = {}
1671
1672        if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
1673            kwargs["fields"] = self._parse_string()
1674            if self._match_text_seq("ESCAPED", "BY"):
1675                kwargs["escaped"] = self._parse_string()
1676        if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
1677            kwargs["collection_items"] = self._parse_string()
1678        if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
1679            kwargs["map_keys"] = self._parse_string()
1680        if self._match_text_seq("LINES", "TERMINATED", "BY"):
1681            kwargs["lines"] = self._parse_string()
1682        if self._match_text_seq("NULL", "DEFINED", "AS"):
1683            kwargs["null"] = self._parse_string()
1684
1685        return self.expression(exp.RowFormatDelimitedProperty, **kwargs)  # type: ignore
1686
1687    def _parse_load_data(self) -> exp.Expression:
1688        local = self._match(TokenType.LOCAL)
1689        self._match_text_seq("INPATH")
1690        inpath = self._parse_string()
1691        overwrite = self._match(TokenType.OVERWRITE)
1692        self._match_pair(TokenType.INTO, TokenType.TABLE)
1693
1694        return self.expression(
1695            exp.LoadData,
1696            this=self._parse_table(schema=True),
1697            local=local,
1698            overwrite=overwrite,
1699            inpath=inpath,
1700            partition=self._parse_partition(),
1701            input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
1702            serde=self._match_text_seq("SERDE") and self._parse_string(),
1703        )
1704
1705    def _parse_delete(self) -> exp.Expression:
1706        self._match(TokenType.FROM)
1707
1708        return self.expression(
1709            exp.Delete,
1710            this=self._parse_table(),
1711            using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()),
1712            where=self._parse_where(),
1713            returning=self._parse_returning(),
1714        )
1715
1716    def _parse_update(self) -> exp.Expression:
1717        return self.expression(
1718            exp.Update,
1719            **{  # type: ignore
1720                "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS),
1721                "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality),
1722                "from": self._parse_from(),
1723                "where": self._parse_where(),
1724                "returning": self._parse_returning(),
1725            },
1726        )
1727
1728    def _parse_uncache(self) -> exp.Expression:
1729        if not self._match(TokenType.TABLE):
1730            self.raise_error("Expecting TABLE after UNCACHE")
1731
1732        return self.expression(
1733            exp.Uncache,
1734            exists=self._parse_exists(),
1735            this=self._parse_table(schema=True),
1736        )
1737
1738    def _parse_cache(self) -> exp.Expression:
1739        lazy = self._match(TokenType.LAZY)
1740        self._match(TokenType.TABLE)
1741        table = self._parse_table(schema=True)
1742        options = []
1743
1744        if self._match(TokenType.OPTIONS):
1745            self._match_l_paren()
1746            k = self._parse_string()
1747            self._match(TokenType.EQ)
1748            v = self._parse_string()
1749            options = [k, v]
1750            self._match_r_paren()
1751
1752        self._match(TokenType.ALIAS)
1753        return self.expression(
1754            exp.Cache,
1755            this=table,
1756            lazy=lazy,
1757            options=options,
1758            expression=self._parse_select(nested=True),
1759        )
1760
1761    def _parse_partition(self) -> t.Optional[exp.Expression]:
1762        if not self._match(TokenType.PARTITION):
1763            return None
1764
1765        return self.expression(
1766            exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction)
1767        )
1768
1769    def _parse_value(self) -> exp.Expression:
1770        if self._match(TokenType.L_PAREN):
1771            expressions = self._parse_csv(self._parse_conjunction)
1772            self._match_r_paren()
1773            return self.expression(exp.Tuple, expressions=expressions)
1774
1775        # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows.
1776        # Source: https://prestodb.io/docs/current/sql/values.html
1777        return self.expression(exp.Tuple, expressions=[self._parse_conjunction()])
1778
1779    def _parse_select(
1780        self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True
1781    ) -> t.Optional[exp.Expression]:
1782        cte = self._parse_with()
1783        if cte:
1784            this = self._parse_statement()
1785
1786            if not this:
1787                self.raise_error("Failed to parse any statement following CTE")
1788                return cte
1789
1790            if "with" in this.arg_types:
1791                this.set("with", cte)
1792            else:
1793                self.raise_error(f"{this.key} does not support CTE")
1794                this = cte
1795        elif self._match(TokenType.SELECT):
1796            comments = self._prev_comments
1797
1798            kind = (
1799                self._match(TokenType.ALIAS)
1800                and self._match_texts(("STRUCT", "VALUE"))
1801                and self._prev.text
1802            )
1803            hint = self._parse_hint()
1804            all_ = self._match(TokenType.ALL)
1805            distinct = self._match(TokenType.DISTINCT)
1806
1807            if distinct:
1808                distinct = self.expression(
1809                    exp.Distinct,
1810                    on=self._parse_value() if self._match(TokenType.ON) else None,
1811                )
1812
1813            if all_ and distinct:
1814                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
1815
1816            limit = self._parse_limit(top=True)
1817            expressions = self._parse_csv(self._parse_expression)
1818
1819            this = self.expression(
1820                exp.Select,
1821                kind=kind,
1822                hint=hint,
1823                distinct=distinct,
1824                expressions=expressions,
1825                limit=limit,
1826            )
1827            this.comments = comments
1828
1829            into = self._parse_into()
1830            if into:
1831                this.set("into", into)
1832
1833            from_ = self._parse_from()
1834            if from_:
1835                this.set("from", from_)
1836
1837            self._parse_query_modifiers(this)
1838        elif (table or nested) and self._match(TokenType.L_PAREN):
1839            this = self._parse_table() if table else self._parse_select(nested=True)
1840            self._parse_query_modifiers(this)
1841            this = self._parse_set_operations(this)
1842            self._match_r_paren()
1843
1844            # early return so that subquery unions aren't parsed again
1845            # SELECT * FROM (SELECT 1) UNION ALL SELECT 1
1846            # Union ALL should be a property of the top select node, not the subquery
1847            return self._parse_subquery(this, parse_alias=parse_subquery_alias)
1848        elif self._match(TokenType.VALUES):
1849            this = self.expression(
1850                exp.Values,
1851                expressions=self._parse_csv(self._parse_value),
1852                alias=self._parse_table_alias(),
1853            )
1854        else:
1855            this = None
1856
1857        return self._parse_set_operations(this)
1858
1859    def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]:
1860        if not skip_with_token and not self._match(TokenType.WITH):
1861            return None
1862
1863        comments = self._prev_comments
1864        recursive = self._match(TokenType.RECURSIVE)
1865
1866        expressions = []
1867        while True:
1868            expressions.append(self._parse_cte())
1869
1870            if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
1871                break
1872            else:
1873                self._match(TokenType.WITH)
1874
1875        return self.expression(
1876            exp.With, comments=comments, expressions=expressions, recursive=recursive
1877        )
1878
1879    def _parse_cte(self) -> exp.Expression:
1880        alias = self._parse_table_alias()
1881        if not alias or not alias.this:
1882            self.raise_error("Expected CTE to have alias")
1883
1884        self._match(TokenType.ALIAS)
1885
1886        return self.expression(
1887            exp.CTE,
1888            this=self._parse_wrapped(self._parse_statement),
1889            alias=alias,
1890        )
1891
1892    def _parse_table_alias(
1893        self, alias_tokens: t.Optional[t.Collection[TokenType]] = None
1894    ) -> t.Optional[exp.Expression]:
1895        any_token = self._match(TokenType.ALIAS)
1896        alias = (
1897            self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
1898            or self._parse_string_as_identifier()
1899        )
1900
1901        index = self._index
1902        if self._match(TokenType.L_PAREN):
1903            columns = self._parse_csv(self._parse_function_parameter)
1904            self._match_r_paren() if columns else self._retreat(index)
1905        else:
1906            columns = None
1907
1908        if not alias and not columns:
1909            return None
1910
1911        return self.expression(exp.TableAlias, this=alias, columns=columns)
1912
1913    def _parse_subquery(
1914        self, this: t.Optional[exp.Expression], parse_alias: bool = True
1915    ) -> exp.Expression:
1916        return self.expression(
1917            exp.Subquery,
1918            this=this,
1919            pivots=self._parse_pivots(),
1920            alias=self._parse_table_alias() if parse_alias else None,
1921        )
1922
1923    def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None:
1924        if not isinstance(this, self.MODIFIABLES):
1925            return
1926
1927        table = isinstance(this, exp.Table)
1928
1929        while True:
1930            join = self._parse_join()
1931            if join:
1932                this.append("joins", join)
1933
1934            lateral = None
1935            if not join:
1936                lateral = self._parse_lateral()
1937                if lateral:
1938                    this.append("laterals", lateral)
1939
1940            comma = None if table else self._match(TokenType.COMMA)
1941            if comma:
1942                this.args["from"].append("expressions", self._parse_table())
1943
1944            if not (lateral or join or comma):
1945                break
1946
1947        for key, parser in self.QUERY_MODIFIER_PARSERS.items():
1948            expression = parser(self)
1949
1950            if expression:
1951                this.set(key, expression)
1952
1953    def _parse_hint(self) -> t.Optional[exp.Expression]:
1954        if self._match(TokenType.HINT):
1955            hints = self._parse_csv(self._parse_function)
1956            if not self._match_pair(TokenType.STAR, TokenType.SLASH):
1957                self.raise_error("Expected */ after HINT")
1958            return self.expression(exp.Hint, expressions=hints)
1959
1960        return None
1961
1962    def _parse_into(self) -> t.Optional[exp.Expression]:
1963        if not self._match(TokenType.INTO):
1964            return None
1965
1966        temp = self._match(TokenType.TEMPORARY)
1967        unlogged = self._match(TokenType.UNLOGGED)
1968        self._match(TokenType.TABLE)
1969
1970        return self.expression(
1971            exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged
1972        )
1973
1974    def _parse_from(self) -> t.Optional[exp.Expression]:
1975        if not self._match(TokenType.FROM):
1976            return None
1977
1978        return self.expression(
1979            exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table)
1980        )
1981
1982    def _parse_match_recognize(self) -> t.Optional[exp.Expression]:
1983        if not self._match(TokenType.MATCH_RECOGNIZE):
1984            return None
1985
1986        self._match_l_paren()
1987
1988        partition = self._parse_partition_by()
1989        order = self._parse_order()
1990        measures = (
1991            self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None
1992        )
1993
1994        if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
1995            rows = exp.Var(this="ONE ROW PER MATCH")
1996        elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
1997            text = "ALL ROWS PER MATCH"
1998            if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
1999                text += f" SHOW EMPTY MATCHES"
2000            elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
2001                text += f" OMIT EMPTY MATCHES"
2002            elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
2003                text += f" WITH UNMATCHED ROWS"
2004            rows = exp.Var(this=text)
2005        else:
2006            rows = None
2007
2008        if self._match_text_seq("AFTER", "MATCH", "SKIP"):
2009            text = "AFTER MATCH SKIP"
2010            if self._match_text_seq("PAST", "LAST", "ROW"):
2011                text += f" PAST LAST ROW"
2012            elif self._match_text_seq("TO", "NEXT", "ROW"):
2013                text += f" TO NEXT ROW"
2014            elif self._match_text_seq("TO", "FIRST"):
2015                text += f" TO FIRST {self._advance_any().text}"  # type: ignore
2016            elif self._match_text_seq("TO", "LAST"):
2017                text += f" TO LAST {self._advance_any().text}"  # type: ignore
2018            after = exp.Var(this=text)
2019        else:
2020            after = None
2021
2022        if self._match_text_seq("PATTERN"):
2023            self._match_l_paren()
2024
2025            if not self._curr:
2026                self.raise_error("Expecting )", self._curr)
2027
2028            paren = 1
2029            start = self._curr
2030
2031            while self._curr and paren > 0:
2032                if self._curr.token_type == TokenType.L_PAREN:
2033                    paren += 1
2034                if self._curr.token_type == TokenType.R_PAREN:
2035                    paren -= 1
2036                end = self._prev
2037                self._advance()
2038            if paren > 0:
2039                self.raise_error("Expecting )", self._curr)
2040            pattern = exp.Var(this=self._find_sql(start, end))
2041        else:
2042            pattern = None
2043
2044        define = (
2045            self._parse_csv(
2046                lambda: self.expression(
2047                    exp.Alias,
2048                    alias=self._parse_id_var(any_token=True),
2049                    this=self._match(TokenType.ALIAS) and self._parse_conjunction(),
2050                )
2051            )
2052            if self._match_text_seq("DEFINE")
2053            else None
2054        )
2055
2056        self._match_r_paren()
2057
2058        return self.expression(
2059            exp.MatchRecognize,
2060            partition_by=partition,
2061            order=order,
2062            measures=measures,
2063            rows=rows,
2064            after=after,
2065            pattern=pattern,
2066            define=define,
2067            alias=self._parse_table_alias(),
2068        )
2069
2070    def _parse_lateral(self) -> t.Optional[exp.Expression]:
2071        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY)
2072        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
2073
2074        if outer_apply or cross_apply:
2075            this = self._parse_select(table=True)
2076            view = None
2077            outer = not cross_apply
2078        elif self._match(TokenType.LATERAL):
2079            this = self._parse_select(table=True)
2080            view = self._match(TokenType.VIEW)
2081            outer = self._match(TokenType.OUTER)
2082        else:
2083            return None
2084
2085        if not this:
2086            this = self._parse_function() or self._parse_id_var(any_token=False)
2087            while self._match(TokenType.DOT):
2088                this = exp.Dot(
2089                    this=this,
2090                    expression=self._parse_function() or self._parse_id_var(any_token=False),
2091                )
2092
2093        table_alias: t.Optional[exp.Expression]
2094
2095        if view:
2096            table = self._parse_id_var(any_token=False)
2097            columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
2098            table_alias = self.expression(exp.TableAlias, this=table, columns=columns)
2099        else:
2100            table_alias = self._parse_table_alias()
2101
2102        expression = self.expression(
2103            exp.Lateral,
2104            this=this,
2105            view=view,
2106            outer=outer,
2107            alias=table_alias,
2108        )
2109
2110        return expression
2111
2112    def _parse_join_side_and_kind(
2113        self,
2114    ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
2115        return (
2116            self._match(TokenType.NATURAL) and self._prev,
2117            self._match_set(self.JOIN_SIDES) and self._prev,
2118            self._match_set(self.JOIN_KINDS) and self._prev,
2119        )
2120
2121    def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]:
2122        index = self._index
2123        natural, side, kind = self._parse_join_side_and_kind()
2124        hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None
2125        join = self._match(TokenType.JOIN)
2126
2127        if not skip_join_token and not join:
2128            self._retreat(index)
2129            kind = None
2130            natural = None
2131            side = None
2132
2133        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False)
2134        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False)
2135
2136        if not skip_join_token and not join and not outer_apply and not cross_apply:
2137            return None
2138
2139        if outer_apply:
2140            side = Token(TokenType.LEFT, "LEFT")
2141
2142        kwargs: t.Dict[
2143            str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]]
2144        ] = {"this": self._parse_table()}
2145
2146        if natural:
2147            kwargs["natural"] = True
2148        if side:
2149            kwargs["side"] = side.text
2150        if kind:
2151            kwargs["kind"] = kind.text
2152        if hint:
2153            kwargs["hint"] = hint
2154
2155        if self._match(TokenType.ON):
2156            kwargs["on"] = self._parse_conjunction()
2157        elif self._match(TokenType.USING):
2158            kwargs["using"] = self._parse_wrapped_id_vars()
2159
2160        return self.expression(exp.Join, **kwargs)  # type: ignore
2161
2162    def _parse_index(self) -> exp.Expression:
2163        index = self._parse_id_var()
2164        self._match(TokenType.ON)
2165        self._match(TokenType.TABLE)  # hive
2166
2167        return self.expression(
2168            exp.Index,
2169            this=index,
2170            table=self.expression(exp.Table, this=self._parse_id_var()),
2171            columns=self._parse_expression(),
2172        )
2173
2174    def _parse_create_table_index(self) -> t.Optional[exp.Expression]:
2175        unique = self._match(TokenType.UNIQUE)
2176        primary = self._match_text_seq("PRIMARY")
2177        amp = self._match_text_seq("AMP")
2178        if not self._match(TokenType.INDEX):
2179            return None
2180        index = self._parse_id_var()
2181        columns = None
2182        if self._match(TokenType.L_PAREN, advance=False):
2183            columns = self._parse_wrapped_csv(self._parse_column)
2184        return self.expression(
2185            exp.Index,
2186            this=index,
2187            columns=columns,
2188            unique=unique,
2189            primary=primary,
2190            amp=amp,
2191        )
2192
2193    def _parse_table_parts(self, schema: bool = False) -> exp.Expression:
2194        catalog = None
2195        db = None
2196
2197        table = (
2198            (not schema and self._parse_function())
2199            or self._parse_id_var(any_token=False)
2200            or self._parse_string_as_identifier()
2201        )
2202
2203        while self._match(TokenType.DOT):
2204            if catalog:
2205                # This allows nesting the table in arbitrarily many dot expressions if needed
2206                table = self.expression(exp.Dot, this=table, expression=self._parse_id_var())
2207            else:
2208                catalog = db
2209                db = table
2210                table = self._parse_id_var()
2211
2212        if not table:
2213            self.raise_error(f"Expected table name but got {self._curr}")
2214
2215        return self.expression(
2216            exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots()
2217        )
2218
2219    def _parse_table(
2220        self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
2221    ) -> t.Optional[exp.Expression]:
2222        lateral = self._parse_lateral()
2223
2224        if lateral:
2225            return lateral
2226
2227        unnest = self._parse_unnest()
2228
2229        if unnest:
2230            return unnest
2231
2232        values = self._parse_derived_table_values()
2233
2234        if values:
2235            return values
2236
2237        subquery = self._parse_select(table=True)
2238
2239        if subquery:
2240            if not subquery.args.get("pivots"):
2241                subquery.set("pivots", self._parse_pivots())
2242            return subquery
2243
2244        this = self._parse_table_parts(schema=schema)
2245
2246        if schema:
2247            return self._parse_schema(this=this)
2248
2249        if self.alias_post_tablesample:
2250            table_sample = self._parse_table_sample()
2251
2252        alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
2253
2254        if alias:
2255            this.set("alias", alias)
2256
2257        if not this.args.get("pivots"):
2258            this.set("pivots", self._parse_pivots())
2259
2260        if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
2261            this.set(
2262                "hints",
2263                self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)),
2264            )
2265            self._match_r_paren()
2266
2267        if not self.alias_post_tablesample:
2268            table_sample = self._parse_table_sample()
2269
2270        if table_sample:
2271            table_sample.set("this", this)
2272            this = table_sample
2273
2274        return this
2275
2276    def _parse_unnest(self) -> t.Optional[exp.Expression]:
2277        if not self._match(TokenType.UNNEST):
2278            return None
2279
2280        expressions = self._parse_wrapped_csv(self._parse_column)
2281        ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY)
2282        alias = self._parse_table_alias()
2283
2284        if alias and self.unnest_column_only:
2285            if alias.args.get("columns"):
2286                self.raise_error("Unexpected extra column alias in unnest.")
2287            alias.set("columns", [alias.this])
2288            alias.set("this", None)
2289
2290        offset = None
2291        if self._match_pair(TokenType.WITH, TokenType.OFFSET):
2292            self._match(TokenType.ALIAS)
2293            offset = self._parse_id_var() or exp.Identifier(this="offset")
2294
2295        return self.expression(
2296            exp.Unnest,
2297            expressions=expressions,
2298            ordinality=ordinality,
2299            alias=alias,
2300            offset=offset,
2301        )
2302
2303    def _parse_derived_table_values(self) -> t.Optional[exp.Expression]:
2304        is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES)
2305        if not is_derived and not self._match(TokenType.VALUES):
2306            return None
2307
2308        expressions = self._parse_csv(self._parse_value)
2309
2310        if is_derived:
2311            self._match_r_paren()
2312
2313        return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias())
2314
2315    def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]:
2316        if not self._match(TokenType.TABLE_SAMPLE) and not (
2317            as_modifier and self._match_text_seq("USING", "SAMPLE")
2318        ):
2319            return None
2320
2321        bucket_numerator = None
2322        bucket_denominator = None
2323        bucket_field = None
2324        percent = None
2325        rows = None
2326        size = None
2327        seed = None
2328
2329        kind = "TABLESAMPLE" if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE"
2330        method = self._parse_var(tokens=(TokenType.ROW,))
2331
2332        self._match(TokenType.L_PAREN)
2333
2334        num = self._parse_number()
2335
2336        if self._match(TokenType.BUCKET):
2337            bucket_numerator = self._parse_number()
2338            self._match(TokenType.OUT_OF)
2339            bucket_denominator = bucket_denominator = self._parse_number()
2340            self._match(TokenType.ON)
2341            bucket_field = self._parse_field()
2342        elif self._match_set((TokenType.PERCENT, TokenType.MOD)):
2343            percent = num
2344        elif self._match(TokenType.ROWS):
2345            rows = num
2346        else:
2347            size = num
2348
2349        self._match(TokenType.R_PAREN)
2350
2351        if self._match(TokenType.L_PAREN):
2352            method = self._parse_var()
2353            seed = self._match(TokenType.COMMA) and self._parse_number()
2354            self._match_r_paren()
2355        elif self._match_texts(("SEED", "REPEATABLE")):
2356            seed = self._parse_wrapped(self._parse_number)
2357
2358        return self.expression(
2359            exp.TableSample,
2360            method=method,
2361            bucket_numerator=bucket_numerator,
2362            bucket_denominator=bucket_denominator,
2363            bucket_field=bucket_field,
2364            percent=percent,
2365            rows=rows,
2366            size=size,
2367            seed=seed,
2368            kind=kind,
2369        )
2370
2371    def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]:
2372        return list(iter(self._parse_pivot, None))
2373
2374    def _parse_pivot(self) -> t.Optional[exp.Expression]:
2375        index = self._index
2376
2377        if self._match(TokenType.PIVOT):
2378            unpivot = False
2379        elif self._match(TokenType.UNPIVOT):
2380            unpivot = True
2381        else:
2382            return None
2383
2384        expressions = []
2385        field = None
2386
2387        if not self._match(TokenType.L_PAREN):
2388            self._retreat(index)
2389            return None
2390
2391        if unpivot:
2392            expressions = self._parse_csv(self._parse_column)
2393        else:
2394            expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function()))
2395
2396        if not expressions:
2397            self.raise_error("Failed to parse PIVOT's aggregation list")
2398
2399        if not self._match(TokenType.FOR):
2400            self.raise_error("Expecting FOR")
2401
2402        value = self._parse_column()
2403
2404        if not self._match(TokenType.IN):
2405            self.raise_error("Expecting IN")
2406
2407        field = self._parse_in(value)
2408
2409        self._match_r_paren()
2410
2411        pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot)
2412
2413        if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False):
2414            pivot.set("alias", self._parse_table_alias())
2415
2416        if not unpivot:
2417            names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions))
2418
2419            columns: t.List[exp.Expression] = []
2420            for col in pivot.args["field"].expressions:
2421                for name in names:
2422                    if self.PREFIXED_PIVOT_COLUMNS:
2423                        name = f"{name}_{col.alias_or_name}" if name else col.alias_or_name
2424                    else:
2425                        name = f"{col.alias_or_name}_{name}" if name else col.alias_or_name
2426
2427                    columns.append(exp.to_identifier(name, quoted=self.QUOTED_PIVOT_COLUMNS))
2428
2429            pivot.set("columns", columns)
2430
2431        return pivot
2432
2433    def _pivot_column_names(self, pivot_columns: t.List[exp.Expression]) -> t.List[str]:
2434        return [agg.alias for agg in pivot_columns]
2435
2436    def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]:
2437        if not skip_where_token and not self._match(TokenType.WHERE):
2438            return None
2439
2440        return self.expression(
2441            exp.Where, comments=self._prev_comments, this=self._parse_conjunction()
2442        )
2443
2444    def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]:
2445        if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
2446            return None
2447
2448        elements = defaultdict(list)
2449
2450        while True:
2451            expressions = self._parse_csv(self._parse_conjunction)
2452            if expressions:
2453                elements["expressions"].extend(expressions)
2454
2455            grouping_sets = self._parse_grouping_sets()
2456            if grouping_sets:
2457                elements["grouping_sets"].extend(grouping_sets)
2458
2459            rollup = None
2460            cube = None
2461
2462            with_ = self._match(TokenType.WITH)
2463            if self._match(TokenType.ROLLUP):
2464                rollup = with_ or self._parse_wrapped_csv(self._parse_column)
2465                elements["rollup"].extend(ensure_list(rollup))
2466
2467            if self._match(TokenType.CUBE):
2468                cube = with_ or self._parse_wrapped_csv(self._parse_column)
2469                elements["cube"].extend(ensure_list(cube))
2470
2471            if not (expressions or grouping_sets or rollup or cube):
2472                break
2473
2474        return self.expression(exp.Group, **elements)  # type: ignore
2475
2476    def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
2477        if not self._match(TokenType.GROUPING_SETS):
2478            return None
2479
2480        return self._parse_wrapped_csv(self._parse_grouping_set)
2481
2482    def _parse_grouping_set(self) -> t.Optional[exp.Expression]:
2483        if self._match(TokenType.L_PAREN):
2484            grouping_set = self._parse_csv(self._parse_column)
2485            self._match_r_paren()
2486            return self.expression(exp.Tuple, expressions=grouping_set)
2487
2488        return self._parse_column()
2489
2490    def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]:
2491        if not skip_having_token and not self._match(TokenType.HAVING):
2492            return None
2493        return self.expression(exp.Having, this=self._parse_conjunction())
2494
2495    def _parse_qualify(self) -> t.Optional[exp.Expression]:
2496        if not self._match(TokenType.QUALIFY):
2497            return None
2498        return self.expression(exp.Qualify, this=self._parse_conjunction())
2499
2500    def _parse_order(
2501        self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
2502    ) -> t.Optional[exp.Expression]:
2503        if not skip_order_token and not self._match(TokenType.ORDER_BY):
2504            return this
2505
2506        return self.expression(
2507            exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
2508        )
2509
2510    def _parse_sort(
2511        self, token_type: TokenType, exp_class: t.Type[exp.Expression]
2512    ) -> t.Optional[exp.Expression]:
2513        if not self._match(token_type):
2514            return None
2515        return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered))
2516
2517    def _parse_ordered(self) -> exp.Expression:
2518        this = self._parse_conjunction()
2519        self._match(TokenType.ASC)
2520        is_desc = self._match(TokenType.DESC)
2521        is_nulls_first = self._match(TokenType.NULLS_FIRST)
2522        is_nulls_last = self._match(TokenType.NULLS_LAST)
2523        desc = is_desc or False
2524        asc = not desc
2525        nulls_first = is_nulls_first or False
2526        explicitly_null_ordered = is_nulls_first or is_nulls_last
2527        if (
2528            not explicitly_null_ordered
2529            and (
2530                (asc and self.null_ordering == "nulls_are_small")
2531                or (desc and self.null_ordering != "nulls_are_small")
2532            )
2533            and self.null_ordering != "nulls_are_last"
2534        ):
2535            nulls_first = True
2536
2537        return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first)
2538
2539    def _parse_limit(
2540        self, this: t.Optional[exp.Expression] = None, top: bool = False
2541    ) -> t.Optional[exp.Expression]:
2542        if self._match(TokenType.TOP if top else TokenType.LIMIT):
2543            limit_paren = self._match(TokenType.L_PAREN)
2544            limit_exp = self.expression(
2545                exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term()
2546            )
2547
2548            if limit_paren:
2549                self._match_r_paren()
2550
2551            return limit_exp
2552
2553        if self._match(TokenType.FETCH):
2554            direction = self._match_set((TokenType.FIRST, TokenType.NEXT))
2555            direction = self._prev.text if direction else "FIRST"
2556
2557            count = self._parse_number()
2558            percent = self._match(TokenType.PERCENT)
2559
2560            self._match_set((TokenType.ROW, TokenType.ROWS))
2561
2562            only = self._match(TokenType.ONLY)
2563            with_ties = self._match_text_seq("WITH", "TIES")
2564
2565            if only and with_ties:
2566                self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause")
2567
2568            return self.expression(
2569                exp.Fetch,
2570                direction=direction,
2571                count=count,
2572                percent=percent,
2573                with_ties=with_ties,
2574            )
2575
2576        return this
2577
2578    def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2579        if not self._match_set((TokenType.OFFSET, TokenType.COMMA)):
2580            return this
2581
2582        count = self._parse_number()
2583        self._match_set((TokenType.ROW, TokenType.ROWS))
2584        return self.expression(exp.Offset, this=this, expression=count)
2585
2586    def _parse_lock(self) -> t.Optional[exp.Expression]:
2587        if self._match_text_seq("FOR", "UPDATE"):
2588            return self.expression(exp.Lock, update=True)
2589        if self._match_text_seq("FOR", "SHARE"):
2590            return self.expression(exp.Lock, update=False)
2591
2592        return None
2593
2594    def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2595        if not self._match_set(self.SET_OPERATIONS):
2596            return this
2597
2598        token_type = self._prev.token_type
2599
2600        if token_type == TokenType.UNION:
2601            expression = exp.Union
2602        elif token_type == TokenType.EXCEPT:
2603            expression = exp.Except
2604        else:
2605            expression = exp.Intersect
2606
2607        return self.expression(
2608            expression,
2609            this=this,
2610            distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
2611            expression=self._parse_set_operations(self._parse_select(nested=True)),
2612        )
2613
2614    def _parse_expression(self) -> t.Optional[exp.Expression]:
2615        return self._parse_alias(self._parse_conjunction())
2616
2617    def _parse_conjunction(self) -> t.Optional[exp.Expression]:
2618        return self._parse_tokens(self._parse_equality, self.CONJUNCTION)
2619
2620    def _parse_equality(self) -> t.Optional[exp.Expression]:
2621        return self._parse_tokens(self._parse_comparison, self.EQUALITY)
2622
2623    def _parse_comparison(self) -> t.Optional[exp.Expression]:
2624        return self._parse_tokens(self._parse_range, self.COMPARISON)
2625
2626    def _parse_range(self) -> t.Optional[exp.Expression]:
2627        this = self._parse_bitwise()
2628        negate = self._match(TokenType.NOT)
2629
2630        if self._match_set(self.RANGE_PARSERS):
2631            expression = self.RANGE_PARSERS[self._prev.token_type](self, this)
2632            if not expression:
2633                return this
2634
2635            this = expression
2636        elif self._match(TokenType.ISNULL):
2637            this = self.expression(exp.Is, this=this, expression=exp.Null())
2638
2639        # Postgres supports ISNULL and NOTNULL for conditions.
2640        # https://blog.andreiavram.ro/postgresql-null-composite-type/
2641        if self._match(TokenType.NOTNULL):
2642            this = self.expression(exp.Is, this=this, expression=exp.Null())
2643            this = self.expression(exp.Not, this=this)
2644
2645        if negate:
2646            this = self.expression(exp.Not, this=this)
2647
2648        if self._match(TokenType.IS):
2649            this = self._parse_is(this)
2650
2651        return this
2652
2653    def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2654        index = self._index - 1
2655        negate = self._match(TokenType.NOT)
2656        if self._match(TokenType.DISTINCT_FROM):
2657            klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ
2658            return self.expression(klass, this=this, expression=self._parse_expression())
2659
2660        expression = self._parse_null() or self._parse_boolean()
2661        if not expression:
2662            self._retreat(index)
2663            return None
2664
2665        this = self.expression(exp.Is, this=this, expression=expression)
2666        return self.expression(exp.Not, this=this) if negate else this
2667
2668    def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression:
2669        unnest = self._parse_unnest()
2670        if unnest:
2671            this = self.expression(exp.In, this=this, unnest=unnest)
2672        elif self._match(TokenType.L_PAREN):
2673            expressions = self._parse_csv(self._parse_select_or_expression)
2674
2675            if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable):
2676                this = self.expression(exp.In, this=this, query=expressions[0])
2677            else:
2678                this = self.expression(exp.In, this=this, expressions=expressions)
2679
2680            self._match_r_paren()
2681        else:
2682            this = self.expression(exp.In, this=this, field=self._parse_field())
2683
2684        return this
2685
2686    def _parse_between(self, this: exp.Expression) -> exp.Expression:
2687        low = self._parse_bitwise()
2688        self._match(TokenType.AND)
2689        high = self._parse_bitwise()
2690        return self.expression(exp.Between, this=this, low=low, high=high)
2691
2692    def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2693        if not self._match(TokenType.ESCAPE):
2694            return this
2695        return self.expression(exp.Escape, this=this, expression=self._parse_string())
2696
2697    def _parse_interval(self) -> t.Optional[exp.Expression]:
2698        if not self._match(TokenType.INTERVAL):
2699            return None
2700
2701        this = self._parse_primary() or self._parse_term()
2702        unit = self._parse_function() or self._parse_var()
2703
2704        # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse
2705        # each INTERVAL expression into this canonical form so it's easy to transpile
2706        if this and isinstance(this, exp.Literal):
2707            if this.is_number:
2708                this = exp.Literal.string(this.name)
2709
2710            # Try to not clutter Snowflake's multi-part intervals like INTERVAL '1 day, 1 year'
2711            parts = this.name.split()
2712            if not unit and len(parts) <= 2:
2713                this = exp.Literal.string(seq_get(parts, 0))
2714                unit = self.expression(exp.Var, this=seq_get(parts, 1))
2715
2716        return self.expression(exp.Interval, this=this, unit=unit)
2717
2718    def _parse_bitwise(self) -> t.Optional[exp.Expression]:
2719        this = self._parse_term()
2720
2721        while True:
2722            if self._match_set(self.BITWISE):
2723                this = self.expression(
2724                    self.BITWISE[self._prev.token_type],
2725                    this=this,
2726                    expression=self._parse_term(),
2727                )
2728            elif self._match_pair(TokenType.LT, TokenType.LT):
2729                this = self.expression(
2730                    exp.BitwiseLeftShift, this=this, expression=self._parse_term()
2731                )
2732            elif self._match_pair(TokenType.GT, TokenType.GT):
2733                this = self.expression(
2734                    exp.BitwiseRightShift, this=this, expression=self._parse_term()
2735                )
2736            else:
2737                break
2738
2739        return this
2740
2741    def _parse_term(self) -> t.Optional[exp.Expression]:
2742        return self._parse_tokens(self._parse_factor, self.TERM)
2743
2744    def _parse_factor(self) -> t.Optional[exp.Expression]:
2745        return self._parse_tokens(self._parse_unary, self.FACTOR)
2746
2747    def _parse_unary(self) -> t.Optional[exp.Expression]:
2748        if self._match_set(self.UNARY_PARSERS):
2749            return self.UNARY_PARSERS[self._prev.token_type](self)
2750        return self._parse_at_time_zone(self._parse_type())
2751
2752    def _parse_type(self) -> t.Optional[exp.Expression]:
2753        interval = self._parse_interval()
2754        if interval:
2755            return interval
2756
2757        index = self._index
2758        data_type = self._parse_types(check_func=True)
2759        this = self._parse_column()
2760
2761        if data_type:
2762            if isinstance(this, exp.Literal):
2763                parser = self.TYPE_LITERAL_PARSERS.get(data_type.this)
2764                if parser:
2765                    return parser(self, this, data_type)
2766                return self.expression(exp.Cast, this=this, to=data_type)
2767            if not data_type.args.get("expressions"):
2768                self._retreat(index)
2769                return self._parse_column()
2770            return data_type
2771
2772        return this
2773
2774    def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]:
2775        index = self._index
2776
2777        prefix = self._match_text_seq("SYSUDTLIB", ".")
2778
2779        if not self._match_set(self.TYPE_TOKENS):
2780            return None
2781
2782        type_token = self._prev.token_type
2783
2784        if type_token == TokenType.PSEUDO_TYPE:
2785            return self.expression(exp.PseudoType, this=self._prev.text)
2786
2787        nested = type_token in self.NESTED_TYPE_TOKENS
2788        is_struct = type_token == TokenType.STRUCT
2789        expressions = None
2790        maybe_func = False
2791
2792        if self._match(TokenType.L_PAREN):
2793            if is_struct:
2794                expressions = self._parse_csv(self._parse_struct_kwargs)
2795            elif nested:
2796                expressions = self._parse_csv(self._parse_types)
2797            else:
2798                expressions = self._parse_csv(self._parse_conjunction)
2799
2800            if not expressions:
2801                self._retreat(index)
2802                return None
2803
2804            self._match_r_paren()
2805            maybe_func = True
2806
2807        if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2808            this = exp.DataType(
2809                this=exp.DataType.Type.ARRAY,
2810                expressions=[exp.DataType.build(type_token.value, expressions=expressions)],
2811                nested=True,
2812            )
2813
2814            while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2815                this = exp.DataType(
2816                    this=exp.DataType.Type.ARRAY,
2817                    expressions=[this],
2818                    nested=True,
2819                )
2820
2821            return this
2822
2823        if self._match(TokenType.L_BRACKET):
2824            self._retreat(index)
2825            return None
2826
2827        values: t.Optional[t.List[t.Optional[exp.Expression]]] = None
2828        if nested and self._match(TokenType.LT):
2829            if is_struct:
2830                expressions = self._parse_csv(self._parse_struct_kwargs)
2831            else:
2832                expressions = self._parse_csv(self._parse_types)
2833
2834            if not self._match(TokenType.GT):
2835                self.raise_error("Expecting >")
2836
2837            if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)):
2838                values = self._parse_csv(self._parse_conjunction)
2839                self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN))
2840
2841        value: t.Optional[exp.Expression] = None
2842        if type_token in self.TIMESTAMPS:
2843            if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ:
2844                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions)
2845            elif (
2846                self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ
2847            ):
2848                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions)
2849            elif self._match(TokenType.WITHOUT_TIME_ZONE):
2850                if type_token == TokenType.TIME:
2851                    value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions)
2852                else:
2853                    value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2854
2855            maybe_func = maybe_func and value is None
2856
2857            if value is None:
2858                value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2859        elif type_token == TokenType.INTERVAL:
2860            unit = self._parse_var()
2861
2862            if not unit:
2863                value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL)
2864            else:
2865                value = self.expression(exp.Interval, unit=unit)
2866
2867        if maybe_func and check_func:
2868            index2 = self._index
2869            peek = self._parse_string()
2870
2871            if not peek:
2872                self._retreat(index)
2873                return None
2874
2875            self._retreat(index2)
2876
2877        if value:
2878            return value
2879
2880        return exp.DataType(
2881            this=exp.DataType.Type[type_token.value.upper()],
2882            expressions=expressions,
2883            nested=nested,
2884            values=values,
2885            prefix=prefix,
2886        )
2887
2888    def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]:
2889        index = self._index
2890        this = self._parse_id_var()
2891        self._match(TokenType.COLON)
2892        data_type = self._parse_types()
2893
2894        if not data_type:
2895            self._retreat(index)
2896            return self._parse_types()
2897        return self.expression(exp.StructKwarg, this=this, expression=data_type)
2898
2899    def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2900        if not self._match(TokenType.AT_TIME_ZONE):
2901            return this
2902        return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary())
2903
2904    def _parse_column(self) -> t.Optional[exp.Expression]:
2905        this = self._parse_field()
2906        if isinstance(this, exp.Identifier):
2907            this = self.expression(exp.Column, this=this)
2908        elif not this:
2909            return self._parse_bracket(this)
2910        this = self._parse_bracket(this)
2911
2912        while self._match_set(self.COLUMN_OPERATORS):
2913            op_token = self._prev.token_type
2914            op = self.COLUMN_OPERATORS.get(op_token)
2915
2916            if op_token == TokenType.DCOLON:
2917                field = self._parse_types()
2918                if not field:
2919                    self.raise_error("Expected type")
2920            elif op:
2921                self._advance()
2922                value = self._prev.text
2923                field = (
2924                    exp.Literal.number(value)
2925                    if self._prev.token_type == TokenType.NUMBER
2926                    else exp.Literal.string(value)
2927                )
2928            else:
2929                field = self._parse_star() or self._parse_function() or self._parse_id_var()
2930
2931            if isinstance(field, exp.Func):
2932                # bigquery allows function calls like x.y.count(...)
2933                # SAFE.SUBSTR(...)
2934                # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
2935                this = self._replace_columns_with_dots(this)
2936
2937            if op:
2938                this = op(self, this, field)
2939            elif isinstance(this, exp.Column) and not this.args.get("catalog"):
2940                this = self.expression(
2941                    exp.Column,
2942                    this=field,
2943                    table=this.this,
2944                    db=this.args.get("table"),
2945                    catalog=this.args.get("db"),
2946                )
2947            else:
2948                this = self.expression(exp.Dot, this=this, expression=field)
2949            this = self._parse_bracket(this)
2950
2951        return this
2952
2953    def _parse_primary(self) -> t.Optional[exp.Expression]:
2954        if self._match_set(self.PRIMARY_PARSERS):
2955            token_type = self._prev.token_type
2956            primary = self.PRIMARY_PARSERS[token_type](self, self._prev)
2957
2958            if token_type == TokenType.STRING:
2959                expressions = [primary]
2960                while self._match(TokenType.STRING):
2961                    expressions.append(exp.Literal.string(self._prev.text))
2962                if len(expressions) > 1:
2963                    return self.expression(exp.Concat, expressions=expressions)
2964            return primary
2965
2966        if self._match_pair(TokenType.DOT, TokenType.NUMBER):
2967            return exp.Literal.number(f"0.{self._prev.text}")
2968
2969        if self._match(TokenType.L_PAREN):
2970            comments = self._prev_comments
2971            query = self._parse_select()
2972
2973            if query:
2974                expressions = [query]
2975            else:
2976                expressions = self._parse_csv(
2977                    lambda: self._parse_alias(self._parse_conjunction(), explicit=True)
2978                )
2979
2980            this = seq_get(expressions, 0)
2981            self._parse_query_modifiers(this)
2982
2983            if isinstance(this, exp.Subqueryable):
2984                this = self._parse_set_operations(
2985                    self._parse_subquery(this=this, parse_alias=False)
2986                )
2987            elif len(expressions) > 1:
2988                this = self.expression(exp.Tuple, expressions=expressions)
2989            else:
2990                this = self.expression(exp.Paren, this=self._parse_set_operations(this))
2991
2992            self._match_r_paren()
2993            comments.extend(self._prev_comments)
2994
2995            if this and comments:
2996                this.comments = comments
2997
2998            return this
2999
3000        return None
3001
3002    def _parse_field(
3003        self,
3004        any_token: bool = False,
3005        tokens: t.Optional[t.Collection[TokenType]] = None,
3006    ) -> t.Optional[exp.Expression]:
3007        return (
3008            self._parse_primary()
3009            or self._parse_function()
3010            or self._parse_id_var(any_token=any_token, tokens=tokens)
3011        )
3012
3013    def _parse_function(
3014        self, functions: t.Optional[t.Dict[str, t.Callable]] = None
3015    ) -> t.Optional[exp.Expression]:
3016        if not self._curr:
3017            return None
3018
3019        token_type = self._curr.token_type
3020
3021        if self._match_set(self.NO_PAREN_FUNCTION_PARSERS):
3022            return self.NO_PAREN_FUNCTION_PARSERS[token_type](self)
3023
3024        if not self._next or self._next.token_type != TokenType.L_PAREN:
3025            if token_type in self.NO_PAREN_FUNCTIONS:
3026                self._advance()
3027                return self.expression(self.NO_PAREN_FUNCTIONS[token_type])
3028
3029            return None
3030
3031        if token_type not in self.FUNC_TOKENS:
3032            return None
3033
3034        this = self._curr.text
3035        upper = this.upper()
3036        self._advance(2)
3037
3038        parser = self.FUNCTION_PARSERS.get(upper)
3039
3040        if parser:
3041            this = parser(self)
3042        else:
3043            subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
3044
3045            if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH):
3046                this = self.expression(subquery_predicate, this=self._parse_select())
3047                self._match_r_paren()
3048                return this
3049
3050            if functions is None:
3051                functions = self.FUNCTIONS
3052
3053            function = functions.get(upper)
3054            args = self._parse_csv(self._parse_lambda)
3055
3056            if function:
3057                # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the
3058                # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists.
3059                if count_params(function) == 2:
3060                    params = None
3061                    if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN):
3062                        params = self._parse_csv(self._parse_lambda)
3063
3064                    this = function(args, params)
3065                else:
3066                    this = function(args)
3067
3068                self.validate_expression(this, args)
3069            else:
3070                this = self.expression(exp.Anonymous, this=this, expressions=args)
3071
3072        self._match_r_paren(this)
3073        return self._parse_window(this)
3074
3075    def _parse_function_parameter(self) -> t.Optional[exp.Expression]:
3076        return self._parse_column_def(self._parse_id_var())
3077
3078    def _parse_user_defined_function(
3079        self, kind: t.Optional[TokenType] = None
3080    ) -> t.Optional[exp.Expression]:
3081        this = self._parse_id_var()
3082
3083        while self._match(TokenType.DOT):
3084            this = self.expression(exp.Dot, this=this, expression=self._parse_id_var())
3085
3086        if not self._match(TokenType.L_PAREN):
3087            return this
3088
3089        expressions = self._parse_csv(self._parse_function_parameter)
3090        self._match_r_paren()
3091        return self.expression(
3092            exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True
3093        )
3094
3095    def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]:
3096        literal = self._parse_primary()
3097        if literal:
3098            return self.expression(exp.Introducer, this=token.text, expression=literal)
3099
3100        return self.expression(exp.Identifier, this=token.text)
3101
3102    def _parse_national(self, token: Token) -> exp.Expression:
3103        return self.expression(exp.National, this=exp.Literal.string(token.text))
3104
3105    def _parse_session_parameter(self) -> exp.Expression:
3106        kind = None
3107        this = self._parse_id_var() or self._parse_primary()
3108
3109        if this and self._match(TokenType.DOT):
3110            kind = this.name
3111            this = self._parse_var() or self._parse_primary()
3112
3113        return self.expression(exp.SessionParameter, this=this, kind=kind)
3114
3115    def _parse_lambda(self) -> t.Optional[exp.Expression]:
3116        index = self._index
3117
3118        if self._match(TokenType.L_PAREN):
3119            expressions = self._parse_csv(self._parse_id_var)
3120
3121            if not self._match(TokenType.R_PAREN):
3122                self._retreat(index)
3123        else:
3124            expressions = [self._parse_id_var()]
3125
3126        if self._match_set(self.LAMBDAS):
3127            return self.LAMBDAS[self._prev.token_type](self, expressions)
3128
3129        self._retreat(index)
3130
3131        this: t.Optional[exp.Expression]
3132
3133        if self._match(TokenType.DISTINCT):
3134            this = self.expression(
3135                exp.Distinct, expressions=self._parse_csv(self._parse_conjunction)
3136            )
3137        else:
3138            this = self._parse_select_or_expression()
3139
3140            if isinstance(this, exp.EQ):
3141                left = this.this
3142                if isinstance(left, exp.Column):
3143                    left.replace(exp.Var(this=left.text("this")))
3144
3145        if self._match(TokenType.IGNORE_NULLS):
3146            this = self.expression(exp.IgnoreNulls, this=this)
3147        else:
3148            self._match(TokenType.RESPECT_NULLS)
3149
3150        return self._parse_limit(self._parse_order(this))
3151
3152    def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
3153        index = self._index
3154
3155        try:
3156            if self._parse_select(nested=True):
3157                return this
3158        except Exception:
3159            pass
3160        finally:
3161            self._retreat(index)
3162
3163        if not self._match(TokenType.L_PAREN):
3164            return this
3165
3166        args = self._parse_csv(
3167            lambda: self._parse_constraint()
3168            or self._parse_column_def(self._parse_field(any_token=True))
3169        )
3170        self._match_r_paren()
3171        return self.expression(exp.Schema, this=this, expressions=args)
3172
3173    def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3174        kind = self._parse_types()
3175
3176        if self._match_text_seq("FOR", "ORDINALITY"):
3177            return self.expression(exp.ColumnDef, this=this, ordinality=True)
3178
3179        constraints = []
3180        while True:
3181            constraint = self._parse_column_constraint()
3182            if not constraint:
3183                break
3184            constraints.append(constraint)
3185
3186        if not kind and not constraints:
3187            return this
3188
3189        return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints)
3190
3191    def _parse_auto_increment(self) -> exp.Expression:
3192        start = None
3193        increment = None
3194
3195        if self._match(TokenType.L_PAREN, advance=False):
3196            args = self._parse_wrapped_csv(self._parse_bitwise)
3197            start = seq_get(args, 0)
3198            increment = seq_get(args, 1)
3199        elif self._match_text_seq("START"):
3200            start = self._parse_bitwise()
3201            self._match_text_seq("INCREMENT")
3202            increment = self._parse_bitwise()
3203
3204        if start and increment:
3205            return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment)
3206
3207        return exp.AutoIncrementColumnConstraint()
3208
3209    def _parse_compress(self) -> exp.Expression:
3210        if self._match(TokenType.L_PAREN, advance=False):
3211            return self.expression(
3212                exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise)
3213            )
3214
3215        return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise())
3216
3217    def _parse_generated_as_identity(self) -> exp.Expression:
3218        if self._match(TokenType.BY_DEFAULT):
3219            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False)
3220        else:
3221            self._match_text_seq("ALWAYS")
3222            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True)
3223
3224        self._match_text_seq("AS", "IDENTITY")
3225        if self._match(TokenType.L_PAREN):
3226            if self._match_text_seq("START", "WITH"):
3227                this.set("start", self._parse_bitwise())
3228            if self._match_text_seq("INCREMENT", "BY"):
3229                this.set("increment", self._parse_bitwise())
3230            if self._match_text_seq("MINVALUE"):
3231                this.set("minvalue", self._parse_bitwise())
3232            if self._match_text_seq("MAXVALUE"):
3233                this.set("maxvalue", self._parse_bitwise())
3234
3235            if self._match_text_seq("CYCLE"):
3236                this.set("cycle", True)
3237            elif self._match_text_seq("NO", "CYCLE"):
3238                this.set("cycle", False)
3239
3240            self._match_r_paren()
3241
3242        return this
3243
3244    def _parse_inline(self) -> t.Optional[exp.Expression]:
3245        self._match_text_seq("LENGTH")
3246        return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise())
3247
3248    def _parse_not_constraint(self) -> t.Optional[exp.Expression]:
3249        if self._match_text_seq("NULL"):
3250            return self.expression(exp.NotNullColumnConstraint)
3251        if self._match_text_seq("CASESPECIFIC"):
3252            return self.expression(exp.CaseSpecificColumnConstraint, not_=True)
3253        return None
3254
3255    def _parse_column_constraint(self) -> t.Optional[exp.Expression]:
3256        if self._match(TokenType.CONSTRAINT):
3257            this = self._parse_id_var()
3258        else:
3259            this = None
3260
3261        if self._match_texts(self.CONSTRAINT_PARSERS):
3262            return self.expression(
3263                exp.ColumnConstraint,
3264                this=this,
3265                kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self),
3266            )
3267
3268        return this
3269
3270    def _parse_constraint(self) -> t.Optional[exp.Expression]:
3271        if not self._match(TokenType.CONSTRAINT):
3272            return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS)
3273
3274        this = self._parse_id_var()
3275        expressions = []
3276
3277        while True:
3278            constraint = self._parse_unnamed_constraint() or self._parse_function()
3279            if not constraint:
3280                break
3281            expressions.append(constraint)
3282
3283        return self.expression(exp.Constraint, this=this, expressions=expressions)
3284
3285    def _parse_unnamed_constraint(
3286        self, constraints: t.Optional[t.Collection[str]] = None
3287    ) -> t.Optional[exp.Expression]:
3288        if not self._match_texts(constraints or self.CONSTRAINT_PARSERS):
3289            return None
3290
3291        constraint = self._prev.text.upper()
3292        if constraint not in self.CONSTRAINT_PARSERS:
3293            self.raise_error(f"No parser found for schema constraint {constraint}.")
3294
3295        return self.CONSTRAINT_PARSERS[constraint](self)
3296
3297    def _parse_unique(self) -> exp.Expression:
3298        if not self._match(TokenType.L_PAREN, advance=False):
3299            return self.expression(exp.UniqueColumnConstraint)
3300        return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars())
3301
3302    def _parse_key_constraint_options(self) -> t.List[str]:
3303        options = []
3304        while True:
3305            if not self._curr:
3306                break
3307
3308            if self._match(TokenType.ON):
3309                action = None
3310                on = self._advance_any() and self._prev.text
3311
3312                if self._match(TokenType.NO_ACTION):
3313                    action = "NO ACTION"
3314                elif self._match(TokenType.CASCADE):
3315                    action = "CASCADE"
3316                elif self._match_pair(TokenType.SET, TokenType.NULL):
3317                    action = "SET NULL"
3318                elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
3319                    action = "SET DEFAULT"
3320                else:
3321                    self.raise_error("Invalid key constraint")
3322
3323                options.append(f"ON {on} {action}")
3324            elif self._match_text_seq("NOT", "ENFORCED"):
3325                options.append("NOT ENFORCED")
3326            elif self._match_text_seq("DEFERRABLE"):
3327                options.append("DEFERRABLE")
3328            elif self._match_text_seq("INITIALLY", "DEFERRED"):
3329                options.append("INITIALLY DEFERRED")
3330            elif self._match_text_seq("NORELY"):
3331                options.append("NORELY")
3332            elif self._match_text_seq("MATCH", "FULL"):
3333                options.append("MATCH FULL")
3334            else:
3335                break
3336
3337        return options
3338
3339    def _parse_references(self, match=True) -> t.Optional[exp.Expression]:
3340        if match and not self._match(TokenType.REFERENCES):
3341            return None
3342
3343        expressions = None
3344        this = self._parse_id_var()
3345
3346        if self._match(TokenType.L_PAREN, advance=False):
3347            expressions = self._parse_wrapped_id_vars()
3348
3349        options = self._parse_key_constraint_options()
3350        return self.expression(exp.Reference, this=this, expressions=expressions, options=options)
3351
3352    def _parse_foreign_key(self) -> exp.Expression:
3353        expressions = self._parse_wrapped_id_vars()
3354        reference = self._parse_references()
3355        options = {}
3356
3357        while self._match(TokenType.ON):
3358            if not self._match_set((TokenType.DELETE, TokenType.UPDATE)):
3359                self.raise_error("Expected DELETE or UPDATE")
3360
3361            kind = self._prev.text.lower()
3362
3363            if self._match(TokenType.NO_ACTION):
3364                action = "NO ACTION"
3365            elif self._match(TokenType.SET):
3366                self._match_set((TokenType.NULL, TokenType.DEFAULT))
3367                action = "SET " + self._prev.text.upper()
3368            else:
3369                self._advance()
3370                action = self._prev.text.upper()
3371
3372            options[kind] = action
3373
3374        return self.expression(
3375            exp.ForeignKey, expressions=expressions, reference=reference, **options  # type: ignore
3376        )
3377
3378    def _parse_primary_key(self) -> exp.Expression:
3379        desc = (
3380            self._match_set((TokenType.ASC, TokenType.DESC))
3381            and self._prev.token_type == TokenType.DESC
3382        )
3383
3384        if not self._match(TokenType.L_PAREN, advance=False):
3385            return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc)
3386
3387        expressions = self._parse_wrapped_id_vars()
3388        options = self._parse_key_constraint_options()
3389        return self.expression(exp.PrimaryKey, expressions=expressions, options=options)
3390
3391    def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3392        if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)):
3393            return this
3394
3395        bracket_kind = self._prev.token_type
3396        expressions: t.List[t.Optional[exp.Expression]]
3397
3398        if self._match(TokenType.COLON):
3399            expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())]
3400        else:
3401            expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction()))
3402
3403        # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
3404        if bracket_kind == TokenType.L_BRACE:
3405            this = self.expression(exp.Struct, expressions=expressions)
3406        elif not this or this.name.upper() == "ARRAY":
3407            this = self.expression(exp.Array, expressions=expressions)
3408        else:
3409            expressions = apply_index_offset(this, expressions, -self.index_offset)
3410            this = self.expression(exp.Bracket, this=this, expressions=expressions)
3411
3412        if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET:
3413            self.raise_error("Expected ]")
3414        elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE:
3415            self.raise_error("Expected }")
3416
3417        this.comments = self._prev_comments
3418        return self._parse_bracket(this)
3419
3420    def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3421        if self._match(TokenType.COLON):
3422            return self.expression(exp.Slice, this=this, expression=self._parse_conjunction())
3423        return this
3424
3425    def _parse_case(self) -> t.Optional[exp.Expression]:
3426        ifs = []
3427        default = None
3428
3429        expression = self._parse_conjunction()
3430
3431        while self._match(TokenType.WHEN):
3432            this = self._parse_conjunction()
3433            self._match(TokenType.THEN)
3434            then = self._parse_conjunction()
3435            ifs.append(self.expression(exp.If, this=this, true=then))
3436
3437        if self._match(TokenType.ELSE):
3438            default = self._parse_conjunction()
3439
3440        if not self._match(TokenType.END):
3441            self.raise_error("Expected END after CASE", self._prev)
3442
3443        return self._parse_window(
3444            self.expression(exp.Case, this=expression, ifs=ifs, default=default)
3445        )
3446
3447    def _parse_if(self) -> t.Optional[exp.Expression]:
3448        if self._match(TokenType.L_PAREN):
3449            args = self._parse_csv(self._parse_conjunction)
3450            this = exp.If.from_arg_list(args)
3451            self.validate_expression(this, args)
3452            self._match_r_paren()
3453        else:
3454            condition = self._parse_conjunction()
3455            self._match(TokenType.THEN)
3456            true = self._parse_conjunction()
3457            false = self._parse_conjunction() if self._match(TokenType.ELSE) else None
3458            self._match(TokenType.END)
3459            this = self.expression(exp.If, this=condition, true=true, false=false)
3460
3461        return self._parse_window(this)
3462
3463    def _parse_extract(self) -> exp.Expression:
3464        this = self._parse_function() or self._parse_var() or self._parse_type()
3465
3466        if self._match(TokenType.FROM):
3467            return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3468
3469        if not self._match(TokenType.COMMA):
3470            self.raise_error("Expected FROM or comma after EXTRACT", self._prev)
3471
3472        return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3473
3474    def _parse_cast(self, strict: bool) -> exp.Expression:
3475        this = self._parse_conjunction()
3476
3477        if not self._match(TokenType.ALIAS):
3478            self.raise_error("Expected AS after CAST")
3479
3480        to = self._parse_types()
3481
3482        if not to:
3483            self.raise_error("Expected TYPE after CAST")
3484        elif to.this == exp.DataType.Type.CHAR:
3485            if self._match(TokenType.CHARACTER_SET):
3486                to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())
3487
3488        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3489
3490    def _parse_string_agg(self) -> exp.Expression:
3491        expression: t.Optional[exp.Expression]
3492
3493        if self._match(TokenType.DISTINCT):
3494            args = self._parse_csv(self._parse_conjunction)
3495            expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)])
3496        else:
3497            args = self._parse_csv(self._parse_conjunction)
3498            expression = seq_get(args, 0)
3499
3500        index = self._index
3501        if not self._match(TokenType.R_PAREN):
3502            # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]])
3503            order = self._parse_order(this=expression)
3504            return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3505
3506        # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]).
3507        # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that
3508        # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them.
3509        if not self._match(TokenType.WITHIN_GROUP):
3510            self._retreat(index)
3511            this = exp.GroupConcat.from_arg_list(args)
3512            self.validate_expression(this, args)
3513            return this
3514
3515        self._match_l_paren()  # The corresponding match_r_paren will be called in parse_function (caller)
3516        order = self._parse_order(this=expression)
3517        return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3518
3519    def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]:
3520        to: t.Optional[exp.Expression]
3521        this = self._parse_bitwise()
3522
3523        if self._match(TokenType.USING):
3524            to = self.expression(exp.CharacterSet, this=self._parse_var())
3525        elif self._match(TokenType.COMMA):
3526            to = self._parse_bitwise()
3527        else:
3528            to = None
3529
3530        # Swap the argument order if needed to produce the correct AST
3531        if self.CONVERT_TYPE_FIRST:
3532            this, to = to, this
3533
3534        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3535
3536    def _parse_decode(self) -> t.Optional[exp.Expression]:
3537        """
3538        There are generally two variants of the DECODE function:
3539
3540        - DECODE(bin, charset)
3541        - DECODE(expression, search, result [, search, result] ... [, default])
3542
3543        The second variant will always be parsed into a CASE expression. Note that NULL
3544        needs special treatment, since we need to explicitly check for it with `IS NULL`,
3545        instead of relying on pattern matching.
3546        """
3547        args = self._parse_csv(self._parse_conjunction)
3548
3549        if len(args) < 3:
3550            return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1))
3551
3552        expression, *expressions = args
3553        if not expression:
3554            return None
3555
3556        ifs = []
3557        for search, result in zip(expressions[::2], expressions[1::2]):
3558            if not search or not result:
3559                return None
3560
3561            if isinstance(search, exp.Literal):
3562                ifs.append(
3563                    exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result)
3564                )
3565            elif isinstance(search, exp.Null):
3566                ifs.append(
3567                    exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result)
3568                )
3569            else:
3570                cond = exp.or_(
3571                    exp.EQ(this=expression.copy(), expression=search),
3572                    exp.and_(
3573                        exp.Is(this=expression.copy(), expression=exp.Null()),
3574                        exp.Is(this=search.copy(), expression=exp.Null()),
3575                    ),
3576                )
3577                ifs.append(exp.If(this=cond, true=result))
3578
3579        return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None)
3580
3581    def _parse_json_key_value(self) -> t.Optional[exp.Expression]:
3582        self._match_text_seq("KEY")
3583        key = self._parse_field()
3584        self._match(TokenType.COLON)
3585        self._match_text_seq("VALUE")
3586        value = self._parse_field()
3587        if not key and not value:
3588            return None
3589        return self.expression(exp.JSONKeyValue, this=key, expression=value)
3590
3591    def _parse_json_object(self) -> exp.Expression:
3592        expressions = self._parse_csv(self._parse_json_key_value)
3593
3594        null_handling = None
3595        if self._match_text_seq("NULL", "ON", "NULL"):
3596            null_handling = "NULL ON NULL"
3597        elif self._match_text_seq("ABSENT", "ON", "NULL"):
3598            null_handling = "ABSENT ON NULL"
3599
3600        unique_keys = None
3601        if self._match_text_seq("WITH", "UNIQUE"):
3602            unique_keys = True
3603        elif self._match_text_seq("WITHOUT", "UNIQUE"):
3604            unique_keys = False
3605
3606        self._match_text_seq("KEYS")
3607
3608        return_type = self._match_text_seq("RETURNING") and self._parse_type()
3609        format_json = self._match_text_seq("FORMAT", "JSON")
3610        encoding = self._match_text_seq("ENCODING") and self._parse_var()
3611
3612        return self.expression(
3613            exp.JSONObject,
3614            expressions=expressions,
3615            null_handling=null_handling,
3616            unique_keys=unique_keys,
3617            return_type=return_type,
3618            format_json=format_json,
3619            encoding=encoding,
3620        )
3621
3622    def _parse_logarithm(self) -> exp.Expression:
3623        # Default argument order is base, expression
3624        args = self._parse_csv(self._parse_range)
3625
3626        if len(args) > 1:
3627            if not self.LOG_BASE_FIRST:
3628                args.reverse()
3629            return exp.Log.from_arg_list(args)
3630
3631        return self.expression(
3632            exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0)
3633        )
3634
3635    def _parse_match_against(self) -> exp.Expression:
3636        expressions = self._parse_csv(self._parse_column)
3637
3638        self._match_text_seq(")", "AGAINST", "(")
3639
3640        this = self._parse_string()
3641
3642        if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"):
3643            modifier = "IN NATURAL LANGUAGE MODE"
3644            if self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3645                modifier = f"{modifier} WITH QUERY EXPANSION"
3646        elif self._match_text_seq("IN", "BOOLEAN", "MODE"):
3647            modifier = "IN BOOLEAN MODE"
3648        elif self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3649            modifier = "WITH QUERY EXPANSION"
3650        else:
3651            modifier = None
3652
3653        return self.expression(
3654            exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier
3655        )
3656
3657    def _parse_position(self, haystack_first: bool = False) -> exp.Expression:
3658        args = self._parse_csv(self._parse_bitwise)
3659
3660        if self._match(TokenType.IN):
3661            return self.expression(
3662                exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0)
3663            )
3664
3665        if haystack_first:
3666            haystack = seq_get(args, 0)
3667            needle = seq_get(args, 1)
3668        else:
3669            needle = seq_get(args, 0)
3670            haystack = seq_get(args, 1)
3671
3672        this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2))
3673
3674        self.validate_expression(this, args)
3675
3676        return this
3677
3678    def _parse_join_hint(self, func_name: str) -> exp.Expression:
3679        args = self._parse_csv(self._parse_table)
3680        return exp.JoinHint(this=func_name.upper(), expressions=args)
3681
3682    def _parse_substring(self) -> exp.Expression:
3683        # Postgres supports the form: substring(string [from int] [for int])
3684        # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
3685
3686        args = self._parse_csv(self._parse_bitwise)
3687
3688        if self._match(TokenType.FROM):
3689            args.append(self._parse_bitwise())
3690            if self._match(TokenType.FOR):
3691                args.append(self._parse_bitwise())
3692
3693        this = exp.Substring.from_arg_list(args)
3694        self.validate_expression(this, args)
3695
3696        return this
3697
3698    def _parse_trim(self) -> exp.Expression:
3699        # https://www.w3resource.com/sql/character-functions/trim.php
3700        # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html
3701
3702        position = None
3703        collation = None
3704
3705        if self._match_set(self.TRIM_TYPES):
3706            position = self._prev.text.upper()
3707
3708        expression = self._parse_term()
3709        if self._match_set((TokenType.FROM, TokenType.COMMA)):
3710            this = self._parse_term()
3711        else:
3712            this = expression
3713            expression = None
3714
3715        if self._match(TokenType.COLLATE):
3716            collation = self._parse_term()
3717
3718        return self.expression(
3719            exp.Trim,
3720            this=this,
3721            position=position,
3722            expression=expression,
3723            collation=collation,
3724        )
3725
3726    def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3727        return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window)
3728
3729    def _parse_named_window(self) -> t.Optional[exp.Expression]:
3730        return self._parse_window(self._parse_id_var(), alias=True)
3731
3732    def _parse_window(
3733        self, this: t.Optional[exp.Expression], alias: bool = False
3734    ) -> t.Optional[exp.Expression]:
3735        if self._match_pair(TokenType.FILTER, TokenType.L_PAREN):
3736            this = self.expression(exp.Filter, this=this, expression=self._parse_where())
3737            self._match_r_paren()
3738
3739        # T-SQL allows the OVER (...) syntax after WITHIN GROUP.
3740        # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
3741        if self._match(TokenType.WITHIN_GROUP):
3742            order = self._parse_wrapped(self._parse_order)
3743            this = self.expression(exp.WithinGroup, this=this, expression=order)
3744
3745        # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
3746        # Some dialects choose to implement and some do not.
3747        # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
3748
3749        # There is some code above in _parse_lambda that handles
3750        #   SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ...
3751
3752        # The below changes handle
3753        #   SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ...
3754
3755        # Oracle allows both formats
3756        #   (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html)
3757        #   and Snowflake chose to do the same for familiarity
3758        #   https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes
3759        if self._match(TokenType.IGNORE_NULLS):
3760            this = self.expression(exp.IgnoreNulls, this=this)
3761        elif self._match(TokenType.RESPECT_NULLS):
3762            this = self.expression(exp.RespectNulls, this=this)
3763
3764        # bigquery select from window x AS (partition by ...)
3765        if alias:
3766            self._match(TokenType.ALIAS)
3767        elif not self._match(TokenType.OVER):
3768            return this
3769
3770        if not self._match(TokenType.L_PAREN):
3771            return self.expression(exp.Window, this=this, alias=self._parse_id_var(False))
3772
3773        window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
3774        partition = self._parse_partition_by()
3775        order = self._parse_order()
3776        kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text
3777
3778        if kind:
3779            self._match(TokenType.BETWEEN)
3780            start = self._parse_window_spec()
3781            self._match(TokenType.AND)
3782            end = self._parse_window_spec()
3783
3784            spec = self.expression(
3785                exp.WindowSpec,
3786                kind=kind,
3787                start=start["value"],
3788                start_side=start["side"],
3789                end=end["value"],
3790                end_side=end["side"],
3791            )
3792        else:
3793            spec = None
3794
3795        self._match_r_paren()
3796
3797        return self.expression(
3798            exp.Window,
3799            this=this,
3800            partition_by=partition,
3801            order=order,
3802            spec=spec,
3803            alias=window_alias,
3804        )
3805
3806    def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]:
3807        self._match(TokenType.BETWEEN)
3808
3809        return {
3810            "value": (
3811                self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text
3812            )
3813            or self._parse_bitwise(),
3814            "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text,
3815        }
3816
3817    def _parse_alias(
3818        self, this: t.Optional[exp.Expression], explicit: bool = False
3819    ) -> t.Optional[exp.Expression]:
3820        any_token = self._match(TokenType.ALIAS)
3821
3822        if explicit and not any_token:
3823            return this
3824
3825        if self._match(TokenType.L_PAREN):
3826            aliases = self.expression(
3827                exp.Aliases,
3828                this=this,
3829                expressions=self._parse_csv(lambda: self._parse_id_var(any_token)),
3830            )
3831            self._match_r_paren(aliases)
3832            return aliases
3833
3834        alias = self._parse_id_var(any_token)
3835
3836        if alias:
3837            return self.expression(exp.Alias, this=this, alias=alias)
3838
3839        return this
3840
3841    def _parse_id_var(
3842        self,
3843        any_token: bool = True,
3844        tokens: t.Optional[t.Collection[TokenType]] = None,
3845        prefix_tokens: t.Optional[t.Collection[TokenType]] = None,
3846    ) -> t.Optional[exp.Expression]:
3847        identifier = self._parse_identifier()
3848
3849        if identifier:
3850            return identifier
3851
3852        prefix = ""
3853
3854        if prefix_tokens:
3855            while self._match_set(prefix_tokens):
3856                prefix += self._prev.text
3857
3858        if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS):
3859            quoted = self._prev.token_type == TokenType.STRING
3860            return exp.Identifier(this=prefix + self._prev.text, quoted=quoted)
3861
3862        return None
3863
3864    def _parse_string(self) -> t.Optional[exp.Expression]:
3865        if self._match(TokenType.STRING):
3866            return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev)
3867        return self._parse_placeholder()
3868
3869    def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]:
3870        return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True)
3871
3872    def _parse_number(self) -> t.Optional[exp.Expression]:
3873        if self._match(TokenType.NUMBER):
3874            return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev)
3875        return self._parse_placeholder()
3876
3877    def _parse_identifier(self) -> t.Optional[exp.Expression]:
3878        if self._match(TokenType.IDENTIFIER):
3879            return self.expression(exp.Identifier, this=self._prev.text, quoted=True)
3880        return self._parse_placeholder()
3881
3882    def _parse_var(
3883        self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None
3884    ) -> t.Optional[exp.Expression]:
3885        if (
3886            (any_token and self._advance_any())
3887            or self._match(TokenType.VAR)
3888            or (self._match_set(tokens) if tokens else False)
3889        ):
3890            return self.expression(exp.Var, this=self._prev.text)
3891        return self._parse_placeholder()
3892
3893    def _advance_any(self) -> t.Optional[Token]:
3894        if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS:
3895            self._advance()
3896            return self._prev
3897        return None
3898
3899    def _parse_var_or_string(self) -> t.Optional[exp.Expression]:
3900        return self._parse_var() or self._parse_string()
3901
3902    def _parse_null(self) -> t.Optional[exp.Expression]:
3903        if self._match(TokenType.NULL):
3904            return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev)
3905        return None
3906
3907    def _parse_boolean(self) -> t.Optional[exp.Expression]:
3908        if self._match(TokenType.TRUE):
3909            return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev)
3910        if self._match(TokenType.FALSE):
3911            return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev)
3912        return None
3913
3914    def _parse_star(self) -> t.Optional[exp.Expression]:
3915        if self._match(TokenType.STAR):
3916            return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev)
3917        return None
3918
3919    def _parse_parameter(self) -> exp.Expression:
3920        wrapped = self._match(TokenType.L_BRACE)
3921        this = self._parse_var() or self._parse_primary()
3922        self._match(TokenType.R_BRACE)
3923        return self.expression(exp.Parameter, this=this, wrapped=wrapped)
3924
3925    def _parse_placeholder(self) -> t.Optional[exp.Expression]:
3926        if self._match_set(self.PLACEHOLDER_PARSERS):
3927            placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self)
3928            if placeholder:
3929                return placeholder
3930            self._advance(-1)
3931        return None
3932
3933    def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3934        if not self._match(TokenType.EXCEPT):
3935            return None
3936        if self._match(TokenType.L_PAREN, advance=False):
3937            return self._parse_wrapped_csv(self._parse_column)
3938        return self._parse_csv(self._parse_column)
3939
3940    def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3941        if not self._match(TokenType.REPLACE):
3942            return None
3943        if self._match(TokenType.L_PAREN, advance=False):
3944            return self._parse_wrapped_csv(self._parse_expression)
3945        return self._parse_csv(self._parse_expression)
3946
3947    def _parse_csv(
3948        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
3949    ) -> t.List[t.Optional[exp.Expression]]:
3950        parse_result = parse_method()
3951        items = [parse_result] if parse_result is not None else []
3952
3953        while self._match(sep):
3954            if parse_result and self._prev_comments:
3955                parse_result.comments = self._prev_comments
3956
3957            parse_result = parse_method()
3958            if parse_result is not None:
3959                items.append(parse_result)
3960
3961        return items
3962
3963    def _parse_tokens(
3964        self, parse_method: t.Callable, expressions: t.Dict
3965    ) -> t.Optional[exp.Expression]:
3966        this = parse_method()
3967
3968        while self._match_set(expressions):
3969            this = self.expression(
3970                expressions[self._prev.token_type],
3971                this=this,
3972                comments=self._prev_comments,
3973                expression=parse_method(),
3974            )
3975
3976        return this
3977
3978    def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]:
3979        return self._parse_wrapped_csv(self._parse_id_var)
3980
3981    def _parse_wrapped_csv(
3982        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
3983    ) -> t.List[t.Optional[exp.Expression]]:
3984        return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep))
3985
3986    def _parse_wrapped(self, parse_method: t.Callable) -> t.Any:
3987        self._match_l_paren()
3988        parse_result = parse_method()
3989        self._match_r_paren()
3990        return parse_result
3991
3992    def _parse_select_or_expression(self) -> t.Optional[exp.Expression]:
3993        return self._parse_select() or self._parse_set_operations(self._parse_expression())
3994
3995    def _parse_ddl_select(self) -> t.Optional[exp.Expression]:
3996        return self._parse_set_operations(
3997            self._parse_select(nested=True, parse_subquery_alias=False)
3998        )
3999
4000    def _parse_transaction(self) -> exp.Expression:
4001        this = None
4002        if self._match_texts(self.TRANSACTION_KIND):
4003            this = self._prev.text
4004
4005        self._match_texts({"TRANSACTION", "WORK"})
4006
4007        modes = []
4008        while True:
4009            mode = []
4010            while self._match(TokenType.VAR):
4011                mode.append(self._prev.text)
4012
4013            if mode:
4014                modes.append(" ".join(mode))
4015            if not self._match(TokenType.COMMA):
4016                break
4017
4018        return self.expression(exp.Transaction, this=this, modes=modes)
4019
4020    def _parse_commit_or_rollback(self) -> exp.Expression:
4021        chain = None
4022        savepoint = None
4023        is_rollback = self._prev.token_type == TokenType.ROLLBACK
4024
4025        self._match_texts({"TRANSACTION", "WORK"})
4026
4027        if self._match_text_seq("TO"):
4028            self._match_text_seq("SAVEPOINT")
4029            savepoint = self._parse_id_var()
4030
4031        if self._match(TokenType.AND):
4032            chain = not self._match_text_seq("NO")
4033            self._match_text_seq("CHAIN")
4034
4035        if is_rollback:
4036            return self.expression(exp.Rollback, savepoint=savepoint)
4037        return self.expression(exp.Commit, chain=chain)
4038
4039    def _parse_add_column(self) -> t.Optional[exp.Expression]:
4040        if not self._match_text_seq("ADD"):
4041            return None
4042
4043        self._match(TokenType.COLUMN)
4044        exists_column = self._parse_exists(not_=True)
4045        expression = self._parse_column_def(self._parse_field(any_token=True))
4046
4047        if expression:
4048            expression.set("exists", exists_column)
4049
4050            # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns
4051            if self._match_texts(("FIRST", "AFTER")):
4052                position = self._prev.text
4053                column_position = self.expression(
4054                    exp.ColumnPosition, this=self._parse_column(), position=position
4055                )
4056                expression.set("position", column_position)
4057
4058        return expression
4059
4060    def _parse_drop_column(self) -> t.Optional[exp.Expression]:
4061        drop = self._match(TokenType.DROP) and self._parse_drop()
4062        if drop and not isinstance(drop, exp.Command):
4063            drop.set("kind", drop.args.get("kind", "COLUMN"))
4064        return drop
4065
4066    # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html
4067    def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression:
4068        return self.expression(
4069            exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists
4070        )
4071
4072    def _parse_add_constraint(self) -> t.Optional[exp.Expression]:
4073        this = None
4074        kind = self._prev.token_type
4075
4076        if kind == TokenType.CONSTRAINT:
4077            this = self._parse_id_var()
4078
4079            if self._match_text_seq("CHECK"):
4080                expression = self._parse_wrapped(self._parse_conjunction)
4081                enforced = self._match_text_seq("ENFORCED")
4082
4083                return self.expression(
4084                    exp.AddConstraint, this=this, expression=expression, enforced=enforced
4085                )
4086
4087        if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY):
4088            expression = self._parse_foreign_key()
4089        elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY):
4090            expression = self._parse_primary_key()
4091        else:
4092            expression = None
4093
4094        return self.expression(exp.AddConstraint, this=this, expression=expression)
4095
4096    def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]:
4097        index = self._index - 1
4098
4099        if self._match_set(self.ADD_CONSTRAINT_TOKENS):
4100            return self._parse_csv(self._parse_add_constraint)
4101
4102        self._retreat(index)
4103        return self._parse_csv(self._parse_add_column)
4104
4105    def _parse_alter_table_alter(self) -> exp.Expression:
4106        self._match(TokenType.COLUMN)
4107        column = self._parse_field(any_token=True)
4108
4109        if self._match_pair(TokenType.DROP, TokenType.DEFAULT):
4110            return self.expression(exp.AlterColumn, this=column, drop=True)
4111        if self._match_pair(TokenType.SET, TokenType.DEFAULT):
4112            return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction())
4113
4114        self._match_text_seq("SET", "DATA")
4115        return self.expression(
4116            exp.AlterColumn,
4117            this=column,
4118            dtype=self._match_text_seq("TYPE") and self._parse_types(),
4119            collate=self._match(TokenType.COLLATE) and self._parse_term(),
4120            using=self._match(TokenType.USING) and self._parse_conjunction(),
4121        )
4122
4123    def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]:
4124        index = self._index - 1
4125
4126        partition_exists = self._parse_exists()
4127        if self._match(TokenType.PARTITION, advance=False):
4128            return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists))
4129
4130        self._retreat(index)
4131        return self._parse_csv(self._parse_drop_column)
4132
4133    def _parse_alter_table_rename(self) -> exp.Expression:
4134        self._match_text_seq("TO")
4135        return self.expression(exp.RenameTable, this=self._parse_table(schema=True))
4136
4137    def _parse_alter(self) -> t.Optional[exp.Expression]:
4138        start = self._prev
4139
4140        if not self._match(TokenType.TABLE):
4141            return self._parse_as_command(start)
4142
4143        exists = self._parse_exists()
4144        this = self._parse_table(schema=True)
4145
4146        if self._next:
4147            self._advance()
4148        parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None
4149
4150        if parser:
4151            actions = ensure_list(parser(self))
4152
4153            if not self._curr:
4154                return self.expression(
4155                    exp.AlterTable,
4156                    this=this,
4157                    exists=exists,
4158                    actions=actions,
4159                )
4160        return self._parse_as_command(start)
4161
4162    def _parse_merge(self) -> exp.Expression:
4163        self._match(TokenType.INTO)
4164        target = self._parse_table()
4165
4166        self._match(TokenType.USING)
4167        using = self._parse_table()
4168
4169        self._match(TokenType.ON)
4170        on = self._parse_conjunction()
4171
4172        whens = []
4173        while self._match(TokenType.WHEN):
4174            matched = not self._match(TokenType.NOT)
4175            self._match_text_seq("MATCHED")
4176            source = (
4177                False
4178                if self._match_text_seq("BY", "TARGET")
4179                else self._match_text_seq("BY", "SOURCE")
4180            )
4181            condition = self._parse_conjunction() if self._match(TokenType.AND) else None
4182
4183            self._match(TokenType.THEN)
4184
4185            if self._match(TokenType.INSERT):
4186                _this = self._parse_star()
4187                if _this:
4188                    then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this)
4189                else:
4190                    then = self.expression(
4191                        exp.Insert,
4192                        this=self._parse_value(),
4193                        expression=self._match(TokenType.VALUES) and self._parse_value(),
4194                    )
4195            elif self._match(TokenType.UPDATE):
4196                expressions = self._parse_star()
4197                if expressions:
4198                    then = self.expression(exp.Update, expressions=expressions)
4199                else:
4200                    then = self.expression(
4201                        exp.Update,
4202                        expressions=self._match(TokenType.SET)
4203                        and self._parse_csv(self._parse_equality),
4204                    )
4205            elif self._match(TokenType.DELETE):
4206                then = self.expression(exp.Var, this=self._prev.text)
4207            else:
4208                then = None
4209
4210            whens.append(
4211                self.expression(
4212                    exp.When,
4213                    matched=matched,
4214                    source=source,
4215                    condition=condition,
4216                    then=then,
4217                )
4218            )
4219
4220        return self.expression(
4221            exp.Merge,
4222            this=target,
4223            using=using,
4224            on=on,
4225            expressions=whens,
4226        )
4227
4228    def _parse_show(self) -> t.Optional[exp.Expression]:
4229        parser = self._find_parser(self.SHOW_PARSERS, self._show_trie)  # type: ignore
4230        if parser:
4231            return parser(self)
4232        self._advance()
4233        return self.expression(exp.Show, this=self._prev.text.upper())
4234
4235    def _parse_set_item_assignment(
4236        self, kind: t.Optional[str] = None
4237    ) -> t.Optional[exp.Expression]:
4238        index = self._index
4239
4240        if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"):
4241            return self._parse_set_transaction(global_=kind == "GLOBAL")
4242
4243        left = self._parse_primary() or self._parse_id_var()
4244
4245        if not self._match_texts(("=", "TO")):
4246            self._retreat(index)
4247            return None
4248
4249        right = self._parse_statement() or self._parse_id_var()
4250        this = self.expression(
4251            exp.EQ,
4252            this=left,
4253            expression=right,
4254        )
4255
4256        return self.expression(
4257            exp.SetItem,
4258            this=this,
4259            kind=kind,
4260        )
4261
4262    def _parse_set_transaction(self, global_: bool = False) -> exp.Expression:
4263        self._match_text_seq("TRANSACTION")
4264        characteristics = self._parse_csv(
4265            lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS)
4266        )
4267        return self.expression(
4268            exp.SetItem,
4269            expressions=characteristics,
4270            kind="TRANSACTION",
4271            **{"global": global_},  # type: ignore
4272        )
4273
4274    def _parse_set_item(self) -> t.Optional[exp.Expression]:
4275        parser = self._find_parser(self.SET_PARSERS, self._set_trie)  # type: ignore
4276        return parser(self) if parser else self._parse_set_item_assignment(kind=None)
4277
4278    def _parse_set(self) -> exp.Expression:
4279        index = self._index
4280        set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item))
4281
4282        if self._curr:
4283            self._retreat(index)
4284            return self._parse_as_command(self._prev)
4285
4286        return set_
4287
4288    def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]:
4289        for option in options:
4290            if self._match_text_seq(*option.split(" ")):
4291                return exp.Var(this=option)
4292        return None
4293
4294    def _parse_as_command(self, start: Token) -> exp.Command:
4295        while self._curr:
4296            self._advance()
4297        text = self._find_sql(start, self._prev)
4298        size = len(start.text)
4299        return exp.Command(this=text[:size], expression=text[size:])
4300
4301    def _find_parser(
4302        self, parsers: t.Dict[str, t.Callable], trie: t.Dict
4303    ) -> t.Optional[t.Callable]:
4304        if not self._curr:
4305            return None
4306
4307        index = self._index
4308        this = []
4309        while True:
4310            # The current token might be multiple words
4311            curr = self._curr.text.upper()
4312            key = curr.split(" ")
4313            this.append(curr)
4314            self._advance()
4315            result, trie = in_trie(trie, key)
4316            if result == 0:
4317                break
4318            if result == 2:
4319                subparser = parsers[" ".join(this)]
4320                return subparser
4321        self._retreat(index)
4322        return None
4323
4324    def _match(self, token_type, advance=True):
4325        if not self._curr:
4326            return None
4327
4328        if self._curr.token_type == token_type:
4329            if advance:
4330                self._advance()
4331            return True
4332
4333        return None
4334
4335    def _match_set(self, types, advance=True):
4336        if not self._curr:
4337            return None
4338
4339        if self._curr.token_type in types:
4340            if advance:
4341                self._advance()
4342            return True
4343
4344        return None
4345
4346    def _match_pair(self, token_type_a, token_type_b, advance=True):
4347        if not self._curr or not self._next:
4348            return None
4349
4350        if self._curr.token_type == token_type_a and self._next.token_type == token_type_b:
4351            if advance:
4352                self._advance(2)
4353            return True
4354
4355        return None
4356
4357    def _match_l_paren(self, expression=None):
4358        if not self._match(TokenType.L_PAREN):
4359            self.raise_error("Expecting (")
4360        if expression and self._prev_comments:
4361            expression.comments = self._prev_comments
4362
4363    def _match_r_paren(self, expression=None):
4364        if not self._match(TokenType.R_PAREN):
4365            self.raise_error("Expecting )")
4366        if expression and self._prev_comments:
4367            expression.comments = self._prev_comments
4368
4369    def _match_texts(self, texts, advance=True):
4370        if self._curr and self._curr.text.upper() in texts:
4371            if advance:
4372                self._advance()
4373            return True
4374        return False
4375
4376    def _match_text_seq(self, *texts, advance=True):
4377        index = self._index
4378        for text in texts:
4379            if self._curr and self._curr.text.upper() == text:
4380                self._advance()
4381            else:
4382                self._retreat(index)
4383                return False
4384
4385        if not advance:
4386            self._retreat(index)
4387
4388        return True
4389
4390    def _replace_columns_with_dots(self, this):
4391        if isinstance(this, exp.Dot):
4392            exp.replace_children(this, self._replace_columns_with_dots)
4393        elif isinstance(this, exp.Column):
4394            exp.replace_children(this, self._replace_columns_with_dots)
4395            table = this.args.get("table")
4396            this = (
4397                self.expression(exp.Dot, this=table, expression=this.this)
4398                if table
4399                else self.expression(exp.Var, this=this.name)
4400            )
4401        elif isinstance(this, exp.Identifier):
4402            this = self.expression(exp.Var, this=this.name)
4403        return this
4404
4405    def _replace_lambda(self, node, lambda_variables):
4406        for column in node.find_all(exp.Column):
4407            if column.parts[0].name in lambda_variables:
4408                dot_or_id = column.to_dot() if column.table else column.this
4409                parent = column.parent
4410
4411                while isinstance(parent, exp.Dot):
4412                    if not isinstance(parent.parent, exp.Dot):
4413                        parent.replace(dot_or_id)
4414                        break
4415                    parent = parent.parent
4416                else:
4417                    if column is node:
4418                        node = dot_or_id
4419                    else:
4420                        column.replace(dot_or_id)
4421        return node
def parse_var_map(args: Sequence) -> sqlglot.expressions.Expression:
25def parse_var_map(args: t.Sequence) -> exp.Expression:
26    if len(args) == 1 and args[0].is_star:
27        return exp.StarMap(this=args[0])
28
29    keys = []
30    values = []
31    for i in range(0, len(args), 2):
32        keys.append(args[i])
33        values.append(args[i + 1])
34    return exp.VarMap(
35        keys=exp.Array(expressions=keys),
36        values=exp.Array(expressions=values),
37    )
def parse_like(args):
40def parse_like(args):
41    like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0))
42    return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like
def binary_range_parser( expr_type: Type[sqlglot.expressions.Expression]) -> Callable[[sqlglot.parser.Parser, Optional[sqlglot.expressions.Expression]], Optional[sqlglot.expressions.Expression]]:
45def binary_range_parser(
46    expr_type: t.Type[exp.Expression],
47) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]:
48    return lambda self, this: self._parse_escape(
49        self.expression(expr_type, this=this, expression=self._parse_bitwise())
50    )
class Parser:
  62class Parser(metaclass=_Parser):
  63    """
  64    Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces
  65    a parsed syntax tree.
  66
  67    Args:
  68        error_level: the desired error level.
  69            Default: ErrorLevel.RAISE
  70        error_message_context: determines the amount of context to capture from a
  71            query string when displaying the error message (in number of characters).
  72            Default: 50.
  73        index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list.
  74            Default: 0
  75        alias_post_tablesample: If the table alias comes after tablesample.
  76            Default: False
  77        max_errors: Maximum number of error messages to include in a raised ParseError.
  78            This is only relevant if error_level is ErrorLevel.RAISE.
  79            Default: 3
  80        null_ordering: Indicates the default null ordering method to use if not explicitly set.
  81            Options are "nulls_are_small", "nulls_are_large", "nulls_are_last".
  82            Default: "nulls_are_small"
  83    """
  84
  85    FUNCTIONS: t.Dict[str, t.Callable] = {
  86        **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()},
  87        "DATE_TO_DATE_STR": lambda args: exp.Cast(
  88            this=seq_get(args, 0),
  89            to=exp.DataType(this=exp.DataType.Type.TEXT),
  90        ),
  91        "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)),
  92        "IFNULL": exp.Coalesce.from_arg_list,
  93        "LIKE": parse_like,
  94        "TIME_TO_TIME_STR": lambda args: exp.Cast(
  95            this=seq_get(args, 0),
  96            to=exp.DataType(this=exp.DataType.Type.TEXT),
  97        ),
  98        "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
  99            this=exp.Cast(
 100                this=seq_get(args, 0),
 101                to=exp.DataType(this=exp.DataType.Type.TEXT),
 102            ),
 103            start=exp.Literal.number(1),
 104            length=exp.Literal.number(10),
 105        ),
 106        "VAR_MAP": parse_var_map,
 107    }
 108
 109    NO_PAREN_FUNCTIONS = {
 110        TokenType.CURRENT_DATE: exp.CurrentDate,
 111        TokenType.CURRENT_DATETIME: exp.CurrentDate,
 112        TokenType.CURRENT_TIME: exp.CurrentTime,
 113        TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
 114        TokenType.CURRENT_USER: exp.CurrentUser,
 115    }
 116
 117    JOIN_HINTS: t.Set[str] = set()
 118
 119    NESTED_TYPE_TOKENS = {
 120        TokenType.ARRAY,
 121        TokenType.MAP,
 122        TokenType.STRUCT,
 123        TokenType.NULLABLE,
 124    }
 125
 126    TYPE_TOKENS = {
 127        TokenType.BIT,
 128        TokenType.BOOLEAN,
 129        TokenType.TINYINT,
 130        TokenType.SMALLINT,
 131        TokenType.INT,
 132        TokenType.BIGINT,
 133        TokenType.FLOAT,
 134        TokenType.DOUBLE,
 135        TokenType.CHAR,
 136        TokenType.NCHAR,
 137        TokenType.VARCHAR,
 138        TokenType.NVARCHAR,
 139        TokenType.TEXT,
 140        TokenType.MEDIUMTEXT,
 141        TokenType.LONGTEXT,
 142        TokenType.MEDIUMBLOB,
 143        TokenType.LONGBLOB,
 144        TokenType.BINARY,
 145        TokenType.VARBINARY,
 146        TokenType.JSON,
 147        TokenType.JSONB,
 148        TokenType.INTERVAL,
 149        TokenType.TIME,
 150        TokenType.TIMESTAMP,
 151        TokenType.TIMESTAMPTZ,
 152        TokenType.TIMESTAMPLTZ,
 153        TokenType.DATETIME,
 154        TokenType.DATE,
 155        TokenType.DECIMAL,
 156        TokenType.BIGDECIMAL,
 157        TokenType.UUID,
 158        TokenType.GEOGRAPHY,
 159        TokenType.GEOMETRY,
 160        TokenType.HLLSKETCH,
 161        TokenType.HSTORE,
 162        TokenType.PSEUDO_TYPE,
 163        TokenType.SUPER,
 164        TokenType.SERIAL,
 165        TokenType.SMALLSERIAL,
 166        TokenType.BIGSERIAL,
 167        TokenType.XML,
 168        TokenType.UNIQUEIDENTIFIER,
 169        TokenType.MONEY,
 170        TokenType.SMALLMONEY,
 171        TokenType.ROWVERSION,
 172        TokenType.IMAGE,
 173        TokenType.VARIANT,
 174        TokenType.OBJECT,
 175        TokenType.INET,
 176        *NESTED_TYPE_TOKENS,
 177    }
 178
 179    SUBQUERY_PREDICATES = {
 180        TokenType.ANY: exp.Any,
 181        TokenType.ALL: exp.All,
 182        TokenType.EXISTS: exp.Exists,
 183        TokenType.SOME: exp.Any,
 184    }
 185
 186    RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT}
 187
 188    DB_CREATABLES = {
 189        TokenType.DATABASE,
 190        TokenType.SCHEMA,
 191        TokenType.TABLE,
 192        TokenType.VIEW,
 193    }
 194
 195    CREATABLES = {
 196        TokenType.COLUMN,
 197        TokenType.FUNCTION,
 198        TokenType.INDEX,
 199        TokenType.PROCEDURE,
 200        *DB_CREATABLES,
 201    }
 202
 203    ID_VAR_TOKENS = {
 204        TokenType.VAR,
 205        TokenType.ANTI,
 206        TokenType.APPLY,
 207        TokenType.AUTO_INCREMENT,
 208        TokenType.BEGIN,
 209        TokenType.BOTH,
 210        TokenType.BUCKET,
 211        TokenType.CACHE,
 212        TokenType.CASCADE,
 213        TokenType.COLLATE,
 214        TokenType.COMMAND,
 215        TokenType.COMMENT,
 216        TokenType.COMMIT,
 217        TokenType.COMPOUND,
 218        TokenType.CONSTRAINT,
 219        TokenType.DEFAULT,
 220        TokenType.DELETE,
 221        TokenType.DESCRIBE,
 222        TokenType.DIV,
 223        TokenType.END,
 224        TokenType.EXECUTE,
 225        TokenType.ESCAPE,
 226        TokenType.FALSE,
 227        TokenType.FIRST,
 228        TokenType.FILTER,
 229        TokenType.FOLLOWING,
 230        TokenType.FORMAT,
 231        TokenType.FULL,
 232        TokenType.IF,
 233        TokenType.IS,
 234        TokenType.ISNULL,
 235        TokenType.INTERVAL,
 236        TokenType.LAZY,
 237        TokenType.LEADING,
 238        TokenType.LEFT,
 239        TokenType.LOCAL,
 240        TokenType.MATERIALIZED,
 241        TokenType.MERGE,
 242        TokenType.NATURAL,
 243        TokenType.NEXT,
 244        TokenType.OFFSET,
 245        TokenType.ONLY,
 246        TokenType.OPTIONS,
 247        TokenType.ORDINALITY,
 248        TokenType.OVERWRITE,
 249        TokenType.PARTITION,
 250        TokenType.PERCENT,
 251        TokenType.PIVOT,
 252        TokenType.PRAGMA,
 253        TokenType.PRECEDING,
 254        TokenType.RANGE,
 255        TokenType.REFERENCES,
 256        TokenType.RIGHT,
 257        TokenType.ROW,
 258        TokenType.ROWS,
 259        TokenType.SEED,
 260        TokenType.SEMI,
 261        TokenType.SET,
 262        TokenType.SHOW,
 263        TokenType.SORTKEY,
 264        TokenType.TEMPORARY,
 265        TokenType.TOP,
 266        TokenType.TRAILING,
 267        TokenType.TRUE,
 268        TokenType.UNBOUNDED,
 269        TokenType.UNIQUE,
 270        TokenType.UNLOGGED,
 271        TokenType.UNPIVOT,
 272        TokenType.VOLATILE,
 273        TokenType.WINDOW,
 274        *CREATABLES,
 275        *SUBQUERY_PREDICATES,
 276        *TYPE_TOKENS,
 277        *NO_PAREN_FUNCTIONS,
 278    }
 279
 280    INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END}
 281
 282    TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {
 283        TokenType.APPLY,
 284        TokenType.FULL,
 285        TokenType.LEFT,
 286        TokenType.NATURAL,
 287        TokenType.OFFSET,
 288        TokenType.RIGHT,
 289        TokenType.WINDOW,
 290    }
 291
 292    COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS}
 293
 294    UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
 295
 296    TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH}
 297
 298    FUNC_TOKENS = {
 299        TokenType.COMMAND,
 300        TokenType.CURRENT_DATE,
 301        TokenType.CURRENT_DATETIME,
 302        TokenType.CURRENT_TIMESTAMP,
 303        TokenType.CURRENT_TIME,
 304        TokenType.CURRENT_USER,
 305        TokenType.FILTER,
 306        TokenType.FIRST,
 307        TokenType.FORMAT,
 308        TokenType.GLOB,
 309        TokenType.IDENTIFIER,
 310        TokenType.INDEX,
 311        TokenType.ISNULL,
 312        TokenType.ILIKE,
 313        TokenType.LIKE,
 314        TokenType.MERGE,
 315        TokenType.OFFSET,
 316        TokenType.PRIMARY_KEY,
 317        TokenType.REPLACE,
 318        TokenType.ROW,
 319        TokenType.UNNEST,
 320        TokenType.VAR,
 321        TokenType.LEFT,
 322        TokenType.RIGHT,
 323        TokenType.DATE,
 324        TokenType.DATETIME,
 325        TokenType.TABLE,
 326        TokenType.TIMESTAMP,
 327        TokenType.TIMESTAMPTZ,
 328        TokenType.WINDOW,
 329        *TYPE_TOKENS,
 330        *SUBQUERY_PREDICATES,
 331    }
 332
 333    CONJUNCTION = {
 334        TokenType.AND: exp.And,
 335        TokenType.OR: exp.Or,
 336    }
 337
 338    EQUALITY = {
 339        TokenType.EQ: exp.EQ,
 340        TokenType.NEQ: exp.NEQ,
 341        TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
 342    }
 343
 344    COMPARISON = {
 345        TokenType.GT: exp.GT,
 346        TokenType.GTE: exp.GTE,
 347        TokenType.LT: exp.LT,
 348        TokenType.LTE: exp.LTE,
 349    }
 350
 351    BITWISE = {
 352        TokenType.AMP: exp.BitwiseAnd,
 353        TokenType.CARET: exp.BitwiseXor,
 354        TokenType.PIPE: exp.BitwiseOr,
 355        TokenType.DPIPE: exp.DPipe,
 356    }
 357
 358    TERM = {
 359        TokenType.DASH: exp.Sub,
 360        TokenType.PLUS: exp.Add,
 361        TokenType.MOD: exp.Mod,
 362        TokenType.COLLATE: exp.Collate,
 363    }
 364
 365    FACTOR = {
 366        TokenType.DIV: exp.IntDiv,
 367        TokenType.LR_ARROW: exp.Distance,
 368        TokenType.SLASH: exp.Div,
 369        TokenType.STAR: exp.Mul,
 370    }
 371
 372    TIMESTAMPS = {
 373        TokenType.TIME,
 374        TokenType.TIMESTAMP,
 375        TokenType.TIMESTAMPTZ,
 376        TokenType.TIMESTAMPLTZ,
 377    }
 378
 379    SET_OPERATIONS = {
 380        TokenType.UNION,
 381        TokenType.INTERSECT,
 382        TokenType.EXCEPT,
 383    }
 384
 385    JOIN_SIDES = {
 386        TokenType.LEFT,
 387        TokenType.RIGHT,
 388        TokenType.FULL,
 389    }
 390
 391    JOIN_KINDS = {
 392        TokenType.INNER,
 393        TokenType.OUTER,
 394        TokenType.CROSS,
 395        TokenType.SEMI,
 396        TokenType.ANTI,
 397    }
 398
 399    LAMBDAS = {
 400        TokenType.ARROW: lambda self, expressions: self.expression(
 401            exp.Lambda,
 402            this=self._replace_lambda(
 403                self._parse_conjunction(),
 404                {node.name for node in expressions},
 405            ),
 406            expressions=expressions,
 407        ),
 408        TokenType.FARROW: lambda self, expressions: self.expression(
 409            exp.Kwarg,
 410            this=exp.Var(this=expressions[0].name),
 411            expression=self._parse_conjunction(),
 412        ),
 413    }
 414
 415    COLUMN_OPERATORS = {
 416        TokenType.DOT: None,
 417        TokenType.DCOLON: lambda self, this, to: self.expression(
 418            exp.Cast if self.STRICT_CAST else exp.TryCast,
 419            this=this,
 420            to=to,
 421        ),
 422        TokenType.ARROW: lambda self, this, path: self.expression(
 423            exp.JSONExtract,
 424            this=this,
 425            expression=path,
 426        ),
 427        TokenType.DARROW: lambda self, this, path: self.expression(
 428            exp.JSONExtractScalar,
 429            this=this,
 430            expression=path,
 431        ),
 432        TokenType.HASH_ARROW: lambda self, this, path: self.expression(
 433            exp.JSONBExtract,
 434            this=this,
 435            expression=path,
 436        ),
 437        TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
 438            exp.JSONBExtractScalar,
 439            this=this,
 440            expression=path,
 441        ),
 442        TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
 443            exp.JSONBContains,
 444            this=this,
 445            expression=key,
 446        ),
 447    }
 448
 449    EXPRESSION_PARSERS = {
 450        exp.Column: lambda self: self._parse_column(),
 451        exp.DataType: lambda self: self._parse_types(),
 452        exp.From: lambda self: self._parse_from(),
 453        exp.Group: lambda self: self._parse_group(),
 454        exp.Identifier: lambda self: self._parse_id_var(),
 455        exp.Lateral: lambda self: self._parse_lateral(),
 456        exp.Join: lambda self: self._parse_join(),
 457        exp.Order: lambda self: self._parse_order(),
 458        exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
 459        exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
 460        exp.Lambda: lambda self: self._parse_lambda(),
 461        exp.Limit: lambda self: self._parse_limit(),
 462        exp.Offset: lambda self: self._parse_offset(),
 463        exp.TableAlias: lambda self: self._parse_table_alias(),
 464        exp.Table: lambda self: self._parse_table(),
 465        exp.Condition: lambda self: self._parse_conjunction(),
 466        exp.Expression: lambda self: self._parse_statement(),
 467        exp.Properties: lambda self: self._parse_properties(),
 468        exp.Where: lambda self: self._parse_where(),
 469        exp.Ordered: lambda self: self._parse_ordered(),
 470        exp.Having: lambda self: self._parse_having(),
 471        exp.With: lambda self: self._parse_with(),
 472        exp.Window: lambda self: self._parse_named_window(),
 473        exp.Qualify: lambda self: self._parse_qualify(),
 474        exp.Returning: lambda self: self._parse_returning(),
 475        "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(),
 476    }
 477
 478    STATEMENT_PARSERS = {
 479        TokenType.ALTER: lambda self: self._parse_alter(),
 480        TokenType.BEGIN: lambda self: self._parse_transaction(),
 481        TokenType.CACHE: lambda self: self._parse_cache(),
 482        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
 483        TokenType.COMMENT: lambda self: self._parse_comment(),
 484        TokenType.CREATE: lambda self: self._parse_create(),
 485        TokenType.DELETE: lambda self: self._parse_delete(),
 486        TokenType.DESC: lambda self: self._parse_describe(),
 487        TokenType.DESCRIBE: lambda self: self._parse_describe(),
 488        TokenType.DROP: lambda self: self._parse_drop(),
 489        TokenType.END: lambda self: self._parse_commit_or_rollback(),
 490        TokenType.INSERT: lambda self: self._parse_insert(),
 491        TokenType.LOAD_DATA: lambda self: self._parse_load_data(),
 492        TokenType.MERGE: lambda self: self._parse_merge(),
 493        TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()),
 494        TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
 495        TokenType.SET: lambda self: self._parse_set(),
 496        TokenType.UNCACHE: lambda self: self._parse_uncache(),
 497        TokenType.UPDATE: lambda self: self._parse_update(),
 498        TokenType.USE: lambda self: self.expression(
 499            exp.Use,
 500            kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"))
 501            and exp.Var(this=self._prev.text),
 502            this=self._parse_table(schema=False),
 503        ),
 504    }
 505
 506    UNARY_PARSERS = {
 507        TokenType.PLUS: lambda self: self._parse_unary(),  # Unary + is handled as a no-op
 508        TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()),
 509        TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()),
 510        TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()),
 511    }
 512
 513    PRIMARY_PARSERS = {
 514        TokenType.STRING: lambda self, token: self.expression(
 515            exp.Literal, this=token.text, is_string=True
 516        ),
 517        TokenType.NUMBER: lambda self, token: self.expression(
 518            exp.Literal, this=token.text, is_string=False
 519        ),
 520        TokenType.STAR: lambda self, _: self.expression(
 521            exp.Star,
 522            **{"except": self._parse_except(), "replace": self._parse_replace()},
 523        ),
 524        TokenType.NULL: lambda self, _: self.expression(exp.Null),
 525        TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True),
 526        TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False),
 527        TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text),
 528        TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text),
 529        TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text),
 530        TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
 531        TokenType.NATIONAL: lambda self, token: self._parse_national(token),
 532        TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
 533    }
 534
 535    PLACEHOLDER_PARSERS = {
 536        TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
 537        TokenType.PARAMETER: lambda self: self._parse_parameter(),
 538        TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text)
 539        if self._match_set((TokenType.NUMBER, TokenType.VAR))
 540        else None,
 541    }
 542
 543    RANGE_PARSERS = {
 544        TokenType.BETWEEN: lambda self, this: self._parse_between(this),
 545        TokenType.GLOB: binary_range_parser(exp.Glob),
 546        TokenType.OVERLAPS: binary_range_parser(exp.Overlaps),
 547        TokenType.IN: lambda self, this: self._parse_in(this),
 548        TokenType.IS: lambda self, this: self._parse_is(this),
 549        TokenType.LIKE: binary_range_parser(exp.Like),
 550        TokenType.ILIKE: binary_range_parser(exp.ILike),
 551        TokenType.IRLIKE: binary_range_parser(exp.RegexpILike),
 552        TokenType.RLIKE: binary_range_parser(exp.RegexpLike),
 553        TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo),
 554    }
 555
 556    PROPERTY_PARSERS = {
 557        "AFTER": lambda self: self._parse_afterjournal(
 558            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 559        ),
 560        "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
 561        "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
 562        "BEFORE": lambda self: self._parse_journal(
 563            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 564        ),
 565        "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
 566        "CHARACTER SET": lambda self: self._parse_character_set(),
 567        "CHECKSUM": lambda self: self._parse_checksum(),
 568        "CLUSTER BY": lambda self: self.expression(
 569            exp.Cluster, expressions=self._parse_csv(self._parse_ordered)
 570        ),
 571        "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty),
 572        "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
 573        "DATABLOCKSIZE": lambda self: self._parse_datablocksize(
 574            default=self._prev.text.upper() == "DEFAULT"
 575        ),
 576        "DEFINER": lambda self: self._parse_definer(),
 577        "DETERMINISTIC": lambda self: self.expression(
 578            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 579        ),
 580        "DISTKEY": lambda self: self._parse_distkey(),
 581        "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
 582        "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
 583        "EXTERNAL": lambda self: self.expression(exp.ExternalProperty),
 584        "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"),
 585        "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 586        "FREESPACE": lambda self: self._parse_freespace(),
 587        "GLOBAL": lambda self: self._parse_temporary(global_=True),
 588        "IMMUTABLE": lambda self: self.expression(
 589            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 590        ),
 591        "JOURNAL": lambda self: self._parse_journal(
 592            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 593        ),
 594        "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
 595        "LIKE": lambda self: self._parse_create_like(),
 596        "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True),
 597        "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
 598        "LOCK": lambda self: self._parse_locking(),
 599        "LOCKING": lambda self: self._parse_locking(),
 600        "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"),
 601        "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty),
 602        "MAX": lambda self: self._parse_datablocksize(),
 603        "MAXIMUM": lambda self: self._parse_datablocksize(),
 604        "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio(
 605            no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT"
 606        ),
 607        "MIN": lambda self: self._parse_datablocksize(),
 608        "MINIMUM": lambda self: self._parse_datablocksize(),
 609        "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True),
 610        "NO": lambda self: self._parse_noprimaryindex(),
 611        "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False),
 612        "ON": lambda self: self._parse_oncommit(),
 613        "PARTITION BY": lambda self: self._parse_partitioned_by(),
 614        "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
 615        "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
 616        "RETURNS": lambda self: self._parse_returns(),
 617        "ROW": lambda self: self._parse_row(),
 618        "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty),
 619        "SET": lambda self: self.expression(exp.SetProperty, multi=False),
 620        "SORTKEY": lambda self: self._parse_sortkey(),
 621        "STABLE": lambda self: self.expression(
 622            exp.StabilityProperty, this=exp.Literal.string("STABLE")
 623        ),
 624        "STORED": lambda self: self._parse_stored(),
 625        "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty),
 626        "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property),
 627        "TEMP": lambda self: self._parse_temporary(global_=False),
 628        "TEMPORARY": lambda self: self._parse_temporary(global_=False),
 629        "TRANSIENT": lambda self: self.expression(exp.TransientProperty),
 630        "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty),
 631        "VOLATILE": lambda self: self._parse_volatile_property(),
 632        "WITH": lambda self: self._parse_with_property(),
 633    }
 634
 635    CONSTRAINT_PARSERS = {
 636        "AUTOINCREMENT": lambda self: self._parse_auto_increment(),
 637        "AUTO_INCREMENT": lambda self: self._parse_auto_increment(),
 638        "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False),
 639        "CHARACTER SET": lambda self: self.expression(
 640            exp.CharacterSetColumnConstraint, this=self._parse_var_or_string()
 641        ),
 642        "CHECK": lambda self: self.expression(
 643            exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction)
 644        ),
 645        "COLLATE": lambda self: self.expression(
 646            exp.CollateColumnConstraint, this=self._parse_var()
 647        ),
 648        "COMMENT": lambda self: self.expression(
 649            exp.CommentColumnConstraint, this=self._parse_string()
 650        ),
 651        "COMPRESS": lambda self: self._parse_compress(),
 652        "DEFAULT": lambda self: self.expression(
 653            exp.DefaultColumnConstraint, this=self._parse_bitwise()
 654        ),
 655        "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()),
 656        "FOREIGN KEY": lambda self: self._parse_foreign_key(),
 657        "FORMAT": lambda self: self.expression(
 658            exp.DateFormatColumnConstraint, this=self._parse_var_or_string()
 659        ),
 660        "GENERATED": lambda self: self._parse_generated_as_identity(),
 661        "IDENTITY": lambda self: self._parse_auto_increment(),
 662        "INLINE": lambda self: self._parse_inline(),
 663        "LIKE": lambda self: self._parse_create_like(),
 664        "NOT": lambda self: self._parse_not_constraint(),
 665        "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True),
 666        "ON": lambda self: self._match(TokenType.UPDATE)
 667        and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()),
 668        "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()),
 669        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 670        "REFERENCES": lambda self: self._parse_references(match=False),
 671        "TITLE": lambda self: self.expression(
 672            exp.TitleColumnConstraint, this=self._parse_var_or_string()
 673        ),
 674        "UNIQUE": lambda self: self._parse_unique(),
 675        "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint),
 676    }
 677
 678    ALTER_PARSERS = {
 679        "ADD": lambda self: self._parse_alter_table_add(),
 680        "ALTER": lambda self: self._parse_alter_table_alter(),
 681        "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()),
 682        "DROP": lambda self: self._parse_alter_table_drop(),
 683        "RENAME": lambda self: self._parse_alter_table_rename(),
 684    }
 685
 686    SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"}
 687
 688    NO_PAREN_FUNCTION_PARSERS = {
 689        TokenType.CASE: lambda self: self._parse_case(),
 690        TokenType.IF: lambda self: self._parse_if(),
 691        TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()),
 692    }
 693
 694    FUNCTION_PARSERS: t.Dict[str, t.Callable] = {
 695        "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
 696        "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
 697        "DECODE": lambda self: self._parse_decode(),
 698        "EXTRACT": lambda self: self._parse_extract(),
 699        "JSON_OBJECT": lambda self: self._parse_json_object(),
 700        "LOG": lambda self: self._parse_logarithm(),
 701        "MATCH": lambda self: self._parse_match_against(),
 702        "POSITION": lambda self: self._parse_position(),
 703        "STRING_AGG": lambda self: self._parse_string_agg(),
 704        "SUBSTRING": lambda self: self._parse_substring(),
 705        "TRIM": lambda self: self._parse_trim(),
 706        "TRY_CAST": lambda self: self._parse_cast(False),
 707        "TRY_CONVERT": lambda self: self._parse_convert(False),
 708    }
 709
 710    QUERY_MODIFIER_PARSERS = {
 711        "match": lambda self: self._parse_match_recognize(),
 712        "where": lambda self: self._parse_where(),
 713        "group": lambda self: self._parse_group(),
 714        "having": lambda self: self._parse_having(),
 715        "qualify": lambda self: self._parse_qualify(),
 716        "windows": lambda self: self._parse_window_clause(),
 717        "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute),
 718        "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
 719        "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
 720        "order": lambda self: self._parse_order(),
 721        "limit": lambda self: self._parse_limit(),
 722        "offset": lambda self: self._parse_offset(),
 723        "lock": lambda self: self._parse_lock(),
 724        "sample": lambda self: self._parse_table_sample(as_modifier=True),
 725    }
 726
 727    SET_PARSERS = {
 728        "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"),
 729        "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"),
 730        "SESSION": lambda self: self._parse_set_item_assignment("SESSION"),
 731        "TRANSACTION": lambda self: self._parse_set_transaction(),
 732    }
 733
 734    SHOW_PARSERS: t.Dict[str, t.Callable] = {}
 735
 736    TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {}
 737
 738    MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
 739
 740    TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
 741
 742    TRANSACTION_CHARACTERISTICS = {
 743        "ISOLATION LEVEL REPEATABLE READ",
 744        "ISOLATION LEVEL READ COMMITTED",
 745        "ISOLATION LEVEL READ UNCOMMITTED",
 746        "ISOLATION LEVEL SERIALIZABLE",
 747        "READ WRITE",
 748        "READ ONLY",
 749    }
 750
 751    INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
 752
 753    WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
 754
 755    ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY}
 756
 757    STRICT_CAST = True
 758
 759    CONVERT_TYPE_FIRST = False
 760
 761    QUOTED_PIVOT_COLUMNS: t.Optional[bool] = None
 762    PREFIXED_PIVOT_COLUMNS = False
 763
 764    LOG_BASE_FIRST = True
 765    LOG_DEFAULTS_TO_LN = False
 766
 767    __slots__ = (
 768        "error_level",
 769        "error_message_context",
 770        "sql",
 771        "errors",
 772        "index_offset",
 773        "unnest_column_only",
 774        "alias_post_tablesample",
 775        "max_errors",
 776        "null_ordering",
 777        "_tokens",
 778        "_index",
 779        "_curr",
 780        "_next",
 781        "_prev",
 782        "_prev_comments",
 783        "_show_trie",
 784        "_set_trie",
 785    )
 786
 787    def __init__(
 788        self,
 789        error_level: t.Optional[ErrorLevel] = None,
 790        error_message_context: int = 100,
 791        index_offset: int = 0,
 792        unnest_column_only: bool = False,
 793        alias_post_tablesample: bool = False,
 794        max_errors: int = 3,
 795        null_ordering: t.Optional[str] = None,
 796    ):
 797        self.error_level = error_level or ErrorLevel.IMMEDIATE
 798        self.error_message_context = error_message_context
 799        self.index_offset = index_offset
 800        self.unnest_column_only = unnest_column_only
 801        self.alias_post_tablesample = alias_post_tablesample
 802        self.max_errors = max_errors
 803        self.null_ordering = null_ordering
 804        self.reset()
 805
 806    def reset(self):
 807        self.sql = ""
 808        self.errors = []
 809        self._tokens = []
 810        self._index = 0
 811        self._curr = None
 812        self._next = None
 813        self._prev = None
 814        self._prev_comments = None
 815
 816    def parse(
 817        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
 818    ) -> t.List[t.Optional[exp.Expression]]:
 819        """
 820        Parses a list of tokens and returns a list of syntax trees, one tree
 821        per parsed SQL statement.
 822
 823        Args:
 824            raw_tokens: the list of tokens.
 825            sql: the original SQL string, used to produce helpful debug messages.
 826
 827        Returns:
 828            The list of syntax trees.
 829        """
 830        return self._parse(
 831            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
 832        )
 833
 834    def parse_into(
 835        self,
 836        expression_types: exp.IntoType,
 837        raw_tokens: t.List[Token],
 838        sql: t.Optional[str] = None,
 839    ) -> t.List[t.Optional[exp.Expression]]:
 840        """
 841        Parses a list of tokens into a given Expression type. If a collection of Expression
 842        types is given instead, this method will try to parse the token list into each one
 843        of them, stopping at the first for which the parsing succeeds.
 844
 845        Args:
 846            expression_types: the expression type(s) to try and parse the token list into.
 847            raw_tokens: the list of tokens.
 848            sql: the original SQL string, used to produce helpful debug messages.
 849
 850        Returns:
 851            The target Expression.
 852        """
 853        errors = []
 854        for expression_type in ensure_collection(expression_types):
 855            parser = self.EXPRESSION_PARSERS.get(expression_type)
 856            if not parser:
 857                raise TypeError(f"No parser registered for {expression_type}")
 858            try:
 859                return self._parse(parser, raw_tokens, sql)
 860            except ParseError as e:
 861                e.errors[0]["into_expression"] = expression_type
 862                errors.append(e)
 863        raise ParseError(
 864            f"Failed to parse into {expression_types}",
 865            errors=merge_errors(errors),
 866        ) from errors[-1]
 867
 868    def _parse(
 869        self,
 870        parse_method: t.Callable[[Parser], t.Optional[exp.Expression]],
 871        raw_tokens: t.List[Token],
 872        sql: t.Optional[str] = None,
 873    ) -> t.List[t.Optional[exp.Expression]]:
 874        self.reset()
 875        self.sql = sql or ""
 876        total = len(raw_tokens)
 877        chunks: t.List[t.List[Token]] = [[]]
 878
 879        for i, token in enumerate(raw_tokens):
 880            if token.token_type == TokenType.SEMICOLON:
 881                if i < total - 1:
 882                    chunks.append([])
 883            else:
 884                chunks[-1].append(token)
 885
 886        expressions = []
 887
 888        for tokens in chunks:
 889            self._index = -1
 890            self._tokens = tokens
 891            self._advance()
 892
 893            expressions.append(parse_method(self))
 894
 895            if self._index < len(self._tokens):
 896                self.raise_error("Invalid expression / Unexpected token")
 897
 898            self.check_errors()
 899
 900        return expressions
 901
 902    def check_errors(self) -> None:
 903        """
 904        Logs or raises any found errors, depending on the chosen error level setting.
 905        """
 906        if self.error_level == ErrorLevel.WARN:
 907            for error in self.errors:
 908                logger.error(str(error))
 909        elif self.error_level == ErrorLevel.RAISE and self.errors:
 910            raise ParseError(
 911                concat_messages(self.errors, self.max_errors),
 912                errors=merge_errors(self.errors),
 913            )
 914
 915    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
 916        """
 917        Appends an error in the list of recorded errors or raises it, depending on the chosen
 918        error level setting.
 919        """
 920        token = token or self._curr or self._prev or Token.string("")
 921        start = token.start
 922        end = token.end
 923        start_context = self.sql[max(start - self.error_message_context, 0) : start]
 924        highlight = self.sql[start:end]
 925        end_context = self.sql[end : end + self.error_message_context]
 926
 927        error = ParseError.new(
 928            f"{message}. Line {token.line}, Col: {token.col}.\n"
 929            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
 930            description=message,
 931            line=token.line,
 932            col=token.col,
 933            start_context=start_context,
 934            highlight=highlight,
 935            end_context=end_context,
 936        )
 937
 938        if self.error_level == ErrorLevel.IMMEDIATE:
 939            raise error
 940
 941        self.errors.append(error)
 942
 943    def expression(
 944        self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs
 945    ) -> E:
 946        """
 947        Creates a new, validated Expression.
 948
 949        Args:
 950            exp_class: the expression class to instantiate.
 951            comments: an optional list of comments to attach to the expression.
 952            kwargs: the arguments to set for the expression along with their respective values.
 953
 954        Returns:
 955            The target expression.
 956        """
 957        instance = exp_class(**kwargs)
 958        if self._prev_comments:
 959            instance.comments = self._prev_comments
 960            self._prev_comments = None
 961        if comments:
 962            instance.comments = comments
 963        self.validate_expression(instance)
 964        return instance
 965
 966    def validate_expression(
 967        self, expression: exp.Expression, args: t.Optional[t.List] = None
 968    ) -> None:
 969        """
 970        Validates an already instantiated expression, making sure that all its mandatory arguments
 971        are set.
 972
 973        Args:
 974            expression: the expression to validate.
 975            args: an optional list of items that was used to instantiate the expression, if it's a Func.
 976        """
 977        if self.error_level == ErrorLevel.IGNORE:
 978            return
 979
 980        for error_message in expression.error_messages(args):
 981            self.raise_error(error_message)
 982
 983    def _find_sql(self, start: Token, end: Token) -> str:
 984        return self.sql[start.start : end.end]
 985
 986    def _advance(self, times: int = 1) -> None:
 987        self._index += times
 988        self._curr = seq_get(self._tokens, self._index)
 989        self._next = seq_get(self._tokens, self._index + 1)
 990        if self._index > 0:
 991            self._prev = self._tokens[self._index - 1]
 992            self._prev_comments = self._prev.comments
 993        else:
 994            self._prev = None
 995            self._prev_comments = None
 996
 997    def _retreat(self, index: int) -> None:
 998        if index != self._index:
 999            self._advance(index - self._index)
1000
1001    def _parse_command(self) -> exp.Command:
1002        return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string())
1003
1004    def _parse_comment(self, allow_exists: bool = True) -> exp.Expression:
1005        start = self._prev
1006        exists = self._parse_exists() if allow_exists else None
1007
1008        self._match(TokenType.ON)
1009
1010        kind = self._match_set(self.CREATABLES) and self._prev
1011
1012        if not kind:
1013            return self._parse_as_command(start)
1014
1015        if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1016            this = self._parse_user_defined_function(kind=kind.token_type)
1017        elif kind.token_type == TokenType.TABLE:
1018            this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS)
1019        elif kind.token_type == TokenType.COLUMN:
1020            this = self._parse_column()
1021        else:
1022            this = self._parse_id_var()
1023
1024        self._match(TokenType.IS)
1025
1026        return self.expression(
1027            exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists
1028        )
1029
1030    def _parse_statement(self) -> t.Optional[exp.Expression]:
1031        if self._curr is None:
1032            return None
1033
1034        if self._match_set(self.STATEMENT_PARSERS):
1035            return self.STATEMENT_PARSERS[self._prev.token_type](self)
1036
1037        if self._match_set(Tokenizer.COMMANDS):
1038            return self._parse_command()
1039
1040        expression = self._parse_expression()
1041        expression = self._parse_set_operations(expression) if expression else self._parse_select()
1042
1043        self._parse_query_modifiers(expression)
1044        return expression
1045
1046    def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]:
1047        start = self._prev
1048        temporary = self._match(TokenType.TEMPORARY)
1049        materialized = self._match(TokenType.MATERIALIZED)
1050        kind = self._match_set(self.CREATABLES) and self._prev.text
1051        if not kind:
1052            return self._parse_as_command(start)
1053
1054        return self.expression(
1055            exp.Drop,
1056            exists=self._parse_exists(),
1057            this=self._parse_table(schema=True),
1058            kind=kind,
1059            temporary=temporary,
1060            materialized=materialized,
1061            cascade=self._match(TokenType.CASCADE),
1062            constraints=self._match_text_seq("CONSTRAINTS"),
1063            purge=self._match_text_seq("PURGE"),
1064        )
1065
1066    def _parse_exists(self, not_: bool = False) -> t.Optional[bool]:
1067        return (
1068            self._match(TokenType.IF)
1069            and (not not_ or self._match(TokenType.NOT))
1070            and self._match(TokenType.EXISTS)
1071        )
1072
1073    def _parse_create(self) -> t.Optional[exp.Expression]:
1074        start = self._prev
1075        replace = self._prev.text.upper() == "REPLACE" or self._match_pair(
1076            TokenType.OR, TokenType.REPLACE
1077        )
1078        unique = self._match(TokenType.UNIQUE)
1079
1080        if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
1081            self._match(TokenType.TABLE)
1082
1083        properties = None
1084        create_token = self._match_set(self.CREATABLES) and self._prev
1085
1086        if not create_token:
1087            properties = self._parse_properties()  # exp.Properties.Location.POST_CREATE
1088            create_token = self._match_set(self.CREATABLES) and self._prev
1089
1090            if not properties or not create_token:
1091                return self._parse_as_command(start)
1092
1093        exists = self._parse_exists(not_=True)
1094        this = None
1095        expression = None
1096        indexes = None
1097        no_schema_binding = None
1098        begin = None
1099
1100        if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1101            this = self._parse_user_defined_function(kind=create_token.token_type)
1102            temp_properties = self._parse_properties()
1103            if properties and temp_properties:
1104                properties.expressions.extend(temp_properties.expressions)
1105            elif temp_properties:
1106                properties = temp_properties
1107
1108            self._match(TokenType.ALIAS)
1109            begin = self._match(TokenType.BEGIN)
1110            return_ = self._match_text_seq("RETURN")
1111            expression = self._parse_statement()
1112
1113            if return_:
1114                expression = self.expression(exp.Return, this=expression)
1115        elif create_token.token_type == TokenType.INDEX:
1116            this = self._parse_index()
1117        elif create_token.token_type in self.DB_CREATABLES:
1118            table_parts = self._parse_table_parts(schema=True)
1119
1120            # exp.Properties.Location.POST_NAME
1121            if self._match(TokenType.COMMA):
1122                temp_properties = self._parse_properties(before=True)
1123                if properties and temp_properties:
1124                    properties.expressions.extend(temp_properties.expressions)
1125                elif temp_properties:
1126                    properties = temp_properties
1127
1128            this = self._parse_schema(this=table_parts)
1129
1130            # exp.Properties.Location.POST_SCHEMA and POST_WITH
1131            temp_properties = self._parse_properties()
1132            if properties and temp_properties:
1133                properties.expressions.extend(temp_properties.expressions)
1134            elif temp_properties:
1135                properties = temp_properties
1136
1137            self._match(TokenType.ALIAS)
1138
1139            # exp.Properties.Location.POST_ALIAS
1140            if not (
1141                self._match(TokenType.SELECT, advance=False)
1142                or self._match(TokenType.WITH, advance=False)
1143                or self._match(TokenType.L_PAREN, advance=False)
1144            ):
1145                temp_properties = self._parse_properties()
1146                if properties and temp_properties:
1147                    properties.expressions.extend(temp_properties.expressions)
1148                elif temp_properties:
1149                    properties = temp_properties
1150
1151            expression = self._parse_ddl_select()
1152
1153            if create_token.token_type == TokenType.TABLE:
1154                # exp.Properties.Location.POST_EXPRESSION
1155                temp_properties = self._parse_properties()
1156                if properties and temp_properties:
1157                    properties.expressions.extend(temp_properties.expressions)
1158                elif temp_properties:
1159                    properties = temp_properties
1160
1161                indexes = []
1162                while True:
1163                    index = self._parse_create_table_index()
1164
1165                    # exp.Properties.Location.POST_INDEX
1166                    if self._match(TokenType.PARTITION_BY, advance=False):
1167                        temp_properties = self._parse_properties()
1168                        if properties and temp_properties:
1169                            properties.expressions.extend(temp_properties.expressions)
1170                        elif temp_properties:
1171                            properties = temp_properties
1172
1173                    if not index:
1174                        break
1175                    else:
1176                        indexes.append(index)
1177            elif create_token.token_type == TokenType.VIEW:
1178                if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
1179                    no_schema_binding = True
1180
1181        return self.expression(
1182            exp.Create,
1183            this=this,
1184            kind=create_token.text,
1185            replace=replace,
1186            unique=unique,
1187            expression=expression,
1188            exists=exists,
1189            properties=properties,
1190            indexes=indexes,
1191            no_schema_binding=no_schema_binding,
1192            begin=begin,
1193        )
1194
1195    def _parse_property_before(self) -> t.Optional[exp.Expression]:
1196        self._match(TokenType.COMMA)
1197
1198        # parsers look to _prev for no/dual/default, so need to consume first
1199        self._match_text_seq("NO")
1200        self._match_text_seq("DUAL")
1201        self._match_text_seq("DEFAULT")
1202
1203        if self.PROPERTY_PARSERS.get(self._curr.text.upper()):
1204            return self.PROPERTY_PARSERS[self._curr.text.upper()](self)
1205
1206        return None
1207
1208    def _parse_property(self) -> t.Optional[exp.Expression]:
1209        if self._match_texts(self.PROPERTY_PARSERS):
1210            return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
1211
1212        if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET):
1213            return self._parse_character_set(default=True)
1214
1215        if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY):
1216            return self._parse_sortkey(compound=True)
1217
1218        if self._match_text_seq("SQL", "SECURITY"):
1219            return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER"))
1220
1221        assignment = self._match_pair(
1222            TokenType.VAR, TokenType.EQ, advance=False
1223        ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False)
1224
1225        if assignment:
1226            key = self._parse_var_or_string()
1227            self._match(TokenType.EQ)
1228            return self.expression(exp.Property, this=key, value=self._parse_column())
1229
1230        return None
1231
1232    def _parse_stored(self) -> exp.Expression:
1233        self._match(TokenType.ALIAS)
1234
1235        input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None
1236        output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None
1237
1238        return self.expression(
1239            exp.FileFormatProperty,
1240            this=self.expression(
1241                exp.InputOutputFormat, input_format=input_format, output_format=output_format
1242            )
1243            if input_format or output_format
1244            else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
1245        )
1246
1247    def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression:
1248        self._match(TokenType.EQ)
1249        self._match(TokenType.ALIAS)
1250        return self.expression(
1251            exp_class,
1252            this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
1253        )
1254
1255    def _parse_properties(self, before=None) -> t.Optional[exp.Expression]:
1256        properties = []
1257
1258        while True:
1259            if before:
1260                identified_property = self._parse_property_before()
1261            else:
1262                identified_property = self._parse_property()
1263
1264            if not identified_property:
1265                break
1266            for p in ensure_list(identified_property):
1267                properties.append(p)
1268
1269        if properties:
1270            return self.expression(exp.Properties, expressions=properties)
1271
1272        return None
1273
1274    def _parse_fallback(self, no=False) -> exp.Expression:
1275        self._match_text_seq("FALLBACK")
1276        return self.expression(
1277            exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
1278        )
1279
1280    def _parse_volatile_property(self) -> exp.Expression:
1281        if self._index >= 2:
1282            pre_volatile_token = self._tokens[self._index - 2]
1283        else:
1284            pre_volatile_token = None
1285
1286        if pre_volatile_token and pre_volatile_token.token_type in (
1287            TokenType.CREATE,
1288            TokenType.REPLACE,
1289            TokenType.UNIQUE,
1290        ):
1291            return exp.VolatileProperty()
1292
1293        return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE"))
1294
1295    def _parse_with_property(
1296        self,
1297    ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]:
1298        self._match(TokenType.WITH)
1299        if self._match(TokenType.L_PAREN, advance=False):
1300            return self._parse_wrapped_csv(self._parse_property)
1301
1302        if self._match_text_seq("JOURNAL"):
1303            return self._parse_withjournaltable()
1304
1305        if self._match_text_seq("DATA"):
1306            return self._parse_withdata(no=False)
1307        elif self._match_text_seq("NO", "DATA"):
1308            return self._parse_withdata(no=True)
1309
1310        if not self._next:
1311            return None
1312
1313        return self._parse_withisolatedloading()
1314
1315    # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
1316    def _parse_definer(self) -> t.Optional[exp.Expression]:
1317        self._match(TokenType.EQ)
1318
1319        user = self._parse_id_var()
1320        self._match(TokenType.PARAMETER)
1321        host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
1322
1323        if not user or not host:
1324            return None
1325
1326        return exp.DefinerProperty(this=f"{user}@{host}")
1327
1328    def _parse_withjournaltable(self) -> exp.Expression:
1329        self._match(TokenType.TABLE)
1330        self._match(TokenType.EQ)
1331        return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
1332
1333    def _parse_log(self, no=False) -> exp.Expression:
1334        self._match_text_seq("LOG")
1335        return self.expression(exp.LogProperty, no=no)
1336
1337    def _parse_journal(self, no=False, dual=False) -> exp.Expression:
1338        before = self._match_text_seq("BEFORE")
1339        self._match_text_seq("JOURNAL")
1340        return self.expression(exp.JournalProperty, no=no, dual=dual, before=before)
1341
1342    def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression:
1343        self._match_text_seq("NOT")
1344        self._match_text_seq("LOCAL")
1345        self._match_text_seq("AFTER", "JOURNAL")
1346        return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local)
1347
1348    def _parse_checksum(self) -> exp.Expression:
1349        self._match_text_seq("CHECKSUM")
1350        self._match(TokenType.EQ)
1351
1352        on = None
1353        if self._match(TokenType.ON):
1354            on = True
1355        elif self._match_text_seq("OFF"):
1356            on = False
1357        default = self._match(TokenType.DEFAULT)
1358
1359        return self.expression(
1360            exp.ChecksumProperty,
1361            on=on,
1362            default=default,
1363        )
1364
1365    def _parse_freespace(self) -> exp.Expression:
1366        self._match_text_seq("FREESPACE")
1367        self._match(TokenType.EQ)
1368        return self.expression(
1369            exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
1370        )
1371
1372    def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression:
1373        self._match_text_seq("MERGEBLOCKRATIO")
1374        if self._match(TokenType.EQ):
1375            return self.expression(
1376                exp.MergeBlockRatioProperty,
1377                this=self._parse_number(),
1378                percent=self._match(TokenType.PERCENT),
1379            )
1380        else:
1381            return self.expression(
1382                exp.MergeBlockRatioProperty,
1383                no=no,
1384                default=default,
1385            )
1386
1387    def _parse_datablocksize(self, default=None) -> exp.Expression:
1388        if default:
1389            self._match_text_seq("DATABLOCKSIZE")
1390            return self.expression(exp.DataBlocksizeProperty, default=True)
1391        elif self._match_texts(("MIN", "MINIMUM")):
1392            self._match_text_seq("DATABLOCKSIZE")
1393            return self.expression(exp.DataBlocksizeProperty, min=True)
1394        elif self._match_texts(("MAX", "MAXIMUM")):
1395            self._match_text_seq("DATABLOCKSIZE")
1396            return self.expression(exp.DataBlocksizeProperty, min=False)
1397
1398        self._match_text_seq("DATABLOCKSIZE")
1399        self._match(TokenType.EQ)
1400        size = self._parse_number()
1401        units = None
1402        if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
1403            units = self._prev.text
1404        return self.expression(exp.DataBlocksizeProperty, size=size, units=units)
1405
1406    def _parse_blockcompression(self) -> exp.Expression:
1407        self._match_text_seq("BLOCKCOMPRESSION")
1408        self._match(TokenType.EQ)
1409        always = self._match_text_seq("ALWAYS")
1410        manual = self._match_text_seq("MANUAL")
1411        never = self._match_text_seq("NEVER")
1412        default = self._match_text_seq("DEFAULT")
1413        autotemp = None
1414        if self._match_text_seq("AUTOTEMP"):
1415            autotemp = self._parse_schema()
1416
1417        return self.expression(
1418            exp.BlockCompressionProperty,
1419            always=always,
1420            manual=manual,
1421            never=never,
1422            default=default,
1423            autotemp=autotemp,
1424        )
1425
1426    def _parse_withisolatedloading(self) -> exp.Expression:
1427        no = self._match_text_seq("NO")
1428        concurrent = self._match_text_seq("CONCURRENT")
1429        self._match_text_seq("ISOLATED", "LOADING")
1430        for_all = self._match_text_seq("FOR", "ALL")
1431        for_insert = self._match_text_seq("FOR", "INSERT")
1432        for_none = self._match_text_seq("FOR", "NONE")
1433        return self.expression(
1434            exp.IsolatedLoadingProperty,
1435            no=no,
1436            concurrent=concurrent,
1437            for_all=for_all,
1438            for_insert=for_insert,
1439            for_none=for_none,
1440        )
1441
1442    def _parse_locking(self) -> exp.Expression:
1443        if self._match(TokenType.TABLE):
1444            kind = "TABLE"
1445        elif self._match(TokenType.VIEW):
1446            kind = "VIEW"
1447        elif self._match(TokenType.ROW):
1448            kind = "ROW"
1449        elif self._match_text_seq("DATABASE"):
1450            kind = "DATABASE"
1451        else:
1452            kind = None
1453
1454        if kind in ("DATABASE", "TABLE", "VIEW"):
1455            this = self._parse_table_parts()
1456        else:
1457            this = None
1458
1459        if self._match(TokenType.FOR):
1460            for_or_in = "FOR"
1461        elif self._match(TokenType.IN):
1462            for_or_in = "IN"
1463        else:
1464            for_or_in = None
1465
1466        if self._match_text_seq("ACCESS"):
1467            lock_type = "ACCESS"
1468        elif self._match_texts(("EXCL", "EXCLUSIVE")):
1469            lock_type = "EXCLUSIVE"
1470        elif self._match_text_seq("SHARE"):
1471            lock_type = "SHARE"
1472        elif self._match_text_seq("READ"):
1473            lock_type = "READ"
1474        elif self._match_text_seq("WRITE"):
1475            lock_type = "WRITE"
1476        elif self._match_text_seq("CHECKSUM"):
1477            lock_type = "CHECKSUM"
1478        else:
1479            lock_type = None
1480
1481        override = self._match_text_seq("OVERRIDE")
1482
1483        return self.expression(
1484            exp.LockingProperty,
1485            this=this,
1486            kind=kind,
1487            for_or_in=for_or_in,
1488            lock_type=lock_type,
1489            override=override,
1490        )
1491
1492    def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]:
1493        if self._match(TokenType.PARTITION_BY):
1494            return self._parse_csv(self._parse_conjunction)
1495        return []
1496
1497    def _parse_partitioned_by(self) -> exp.Expression:
1498        self._match(TokenType.EQ)
1499        return self.expression(
1500            exp.PartitionedByProperty,
1501            this=self._parse_schema() or self._parse_bracket(self._parse_field()),
1502        )
1503
1504    def _parse_withdata(self, no=False) -> exp.Expression:
1505        if self._match_text_seq("AND", "STATISTICS"):
1506            statistics = True
1507        elif self._match_text_seq("AND", "NO", "STATISTICS"):
1508            statistics = False
1509        else:
1510            statistics = None
1511
1512        return self.expression(exp.WithDataProperty, no=no, statistics=statistics)
1513
1514    def _parse_noprimaryindex(self) -> exp.Expression:
1515        self._match_text_seq("PRIMARY", "INDEX")
1516        return exp.NoPrimaryIndexProperty()
1517
1518    def _parse_oncommit(self) -> exp.Expression:
1519        self._match_text_seq("COMMIT", "PRESERVE", "ROWS")
1520        return exp.OnCommitProperty()
1521
1522    def _parse_distkey(self) -> exp.Expression:
1523        return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
1524
1525    def _parse_create_like(self) -> t.Optional[exp.Expression]:
1526        table = self._parse_table(schema=True)
1527        options = []
1528        while self._match_texts(("INCLUDING", "EXCLUDING")):
1529            this = self._prev.text.upper()
1530            id_var = self._parse_id_var()
1531
1532            if not id_var:
1533                return None
1534
1535            options.append(
1536                self.expression(
1537                    exp.Property,
1538                    this=this,
1539                    value=exp.Var(this=id_var.this.upper()),
1540                )
1541            )
1542        return self.expression(exp.LikeProperty, this=table, expressions=options)
1543
1544    def _parse_sortkey(self, compound: bool = False) -> exp.Expression:
1545        return self.expression(
1546            exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound
1547        )
1548
1549    def _parse_character_set(self, default: bool = False) -> exp.Expression:
1550        self._match(TokenType.EQ)
1551        return self.expression(
1552            exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default
1553        )
1554
1555    def _parse_returns(self) -> exp.Expression:
1556        value: t.Optional[exp.Expression]
1557        is_table = self._match(TokenType.TABLE)
1558
1559        if is_table:
1560            if self._match(TokenType.LT):
1561                value = self.expression(
1562                    exp.Schema,
1563                    this="TABLE",
1564                    expressions=self._parse_csv(self._parse_struct_kwargs),
1565                )
1566                if not self._match(TokenType.GT):
1567                    self.raise_error("Expecting >")
1568            else:
1569                value = self._parse_schema(exp.Var(this="TABLE"))
1570        else:
1571            value = self._parse_types()
1572
1573        return self.expression(exp.ReturnsProperty, this=value, is_table=is_table)
1574
1575    def _parse_temporary(self, global_=False) -> exp.Expression:
1576        self._match(TokenType.TEMPORARY)  # in case calling from "GLOBAL"
1577        return self.expression(exp.TemporaryProperty, global_=global_)
1578
1579    def _parse_describe(self) -> exp.Expression:
1580        kind = self._match_set(self.CREATABLES) and self._prev.text
1581        this = self._parse_table()
1582
1583        return self.expression(exp.Describe, this=this, kind=kind)
1584
1585    def _parse_insert(self) -> exp.Expression:
1586        overwrite = self._match(TokenType.OVERWRITE)
1587        local = self._match(TokenType.LOCAL)
1588        alternative = None
1589
1590        if self._match_text_seq("DIRECTORY"):
1591            this: t.Optional[exp.Expression] = self.expression(
1592                exp.Directory,
1593                this=self._parse_var_or_string(),
1594                local=local,
1595                row_format=self._parse_row_format(match_row=True),
1596            )
1597        else:
1598            if self._match(TokenType.OR):
1599                alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text
1600
1601            self._match(TokenType.INTO)
1602            self._match(TokenType.TABLE)
1603            this = self._parse_table(schema=True)
1604
1605        return self.expression(
1606            exp.Insert,
1607            this=this,
1608            exists=self._parse_exists(),
1609            partition=self._parse_partition(),
1610            expression=self._parse_ddl_select(),
1611            conflict=self._parse_on_conflict(),
1612            returning=self._parse_returning(),
1613            overwrite=overwrite,
1614            alternative=alternative,
1615        )
1616
1617    def _parse_on_conflict(self) -> t.Optional[exp.Expression]:
1618        conflict = self._match_text_seq("ON", "CONFLICT")
1619        duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY")
1620
1621        if not (conflict or duplicate):
1622            return None
1623
1624        nothing = None
1625        expressions = None
1626        key = None
1627        constraint = None
1628
1629        if conflict:
1630            if self._match_text_seq("ON", "CONSTRAINT"):
1631                constraint = self._parse_id_var()
1632            else:
1633                key = self._parse_csv(self._parse_value)
1634
1635        self._match_text_seq("DO")
1636        if self._match_text_seq("NOTHING"):
1637            nothing = True
1638        else:
1639            self._match(TokenType.UPDATE)
1640            expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality)
1641
1642        return self.expression(
1643            exp.OnConflict,
1644            duplicate=duplicate,
1645            expressions=expressions,
1646            nothing=nothing,
1647            key=key,
1648            constraint=constraint,
1649        )
1650
1651    def _parse_returning(self) -> t.Optional[exp.Expression]:
1652        if not self._match(TokenType.RETURNING):
1653            return None
1654
1655        return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column))
1656
1657    def _parse_row(self) -> t.Optional[exp.Expression]:
1658        if not self._match(TokenType.FORMAT):
1659            return None
1660        return self._parse_row_format()
1661
1662    def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]:
1663        if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
1664            return None
1665
1666        if self._match_text_seq("SERDE"):
1667            return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string())
1668
1669        self._match_text_seq("DELIMITED")
1670
1671        kwargs = {}
1672
1673        if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
1674            kwargs["fields"] = self._parse_string()
1675            if self._match_text_seq("ESCAPED", "BY"):
1676                kwargs["escaped"] = self._parse_string()
1677        if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
1678            kwargs["collection_items"] = self._parse_string()
1679        if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
1680            kwargs["map_keys"] = self._parse_string()
1681        if self._match_text_seq("LINES", "TERMINATED", "BY"):
1682            kwargs["lines"] = self._parse_string()
1683        if self._match_text_seq("NULL", "DEFINED", "AS"):
1684            kwargs["null"] = self._parse_string()
1685
1686        return self.expression(exp.RowFormatDelimitedProperty, **kwargs)  # type: ignore
1687
1688    def _parse_load_data(self) -> exp.Expression:
1689        local = self._match(TokenType.LOCAL)
1690        self._match_text_seq("INPATH")
1691        inpath = self._parse_string()
1692        overwrite = self._match(TokenType.OVERWRITE)
1693        self._match_pair(TokenType.INTO, TokenType.TABLE)
1694
1695        return self.expression(
1696            exp.LoadData,
1697            this=self._parse_table(schema=True),
1698            local=local,
1699            overwrite=overwrite,
1700            inpath=inpath,
1701            partition=self._parse_partition(),
1702            input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
1703            serde=self._match_text_seq("SERDE") and self._parse_string(),
1704        )
1705
1706    def _parse_delete(self) -> exp.Expression:
1707        self._match(TokenType.FROM)
1708
1709        return self.expression(
1710            exp.Delete,
1711            this=self._parse_table(),
1712            using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()),
1713            where=self._parse_where(),
1714            returning=self._parse_returning(),
1715        )
1716
1717    def _parse_update(self) -> exp.Expression:
1718        return self.expression(
1719            exp.Update,
1720            **{  # type: ignore
1721                "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS),
1722                "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality),
1723                "from": self._parse_from(),
1724                "where": self._parse_where(),
1725                "returning": self._parse_returning(),
1726            },
1727        )
1728
1729    def _parse_uncache(self) -> exp.Expression:
1730        if not self._match(TokenType.TABLE):
1731            self.raise_error("Expecting TABLE after UNCACHE")
1732
1733        return self.expression(
1734            exp.Uncache,
1735            exists=self._parse_exists(),
1736            this=self._parse_table(schema=True),
1737        )
1738
1739    def _parse_cache(self) -> exp.Expression:
1740        lazy = self._match(TokenType.LAZY)
1741        self._match(TokenType.TABLE)
1742        table = self._parse_table(schema=True)
1743        options = []
1744
1745        if self._match(TokenType.OPTIONS):
1746            self._match_l_paren()
1747            k = self._parse_string()
1748            self._match(TokenType.EQ)
1749            v = self._parse_string()
1750            options = [k, v]
1751            self._match_r_paren()
1752
1753        self._match(TokenType.ALIAS)
1754        return self.expression(
1755            exp.Cache,
1756            this=table,
1757            lazy=lazy,
1758            options=options,
1759            expression=self._parse_select(nested=True),
1760        )
1761
1762    def _parse_partition(self) -> t.Optional[exp.Expression]:
1763        if not self._match(TokenType.PARTITION):
1764            return None
1765
1766        return self.expression(
1767            exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction)
1768        )
1769
1770    def _parse_value(self) -> exp.Expression:
1771        if self._match(TokenType.L_PAREN):
1772            expressions = self._parse_csv(self._parse_conjunction)
1773            self._match_r_paren()
1774            return self.expression(exp.Tuple, expressions=expressions)
1775
1776        # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows.
1777        # Source: https://prestodb.io/docs/current/sql/values.html
1778        return self.expression(exp.Tuple, expressions=[self._parse_conjunction()])
1779
1780    def _parse_select(
1781        self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True
1782    ) -> t.Optional[exp.Expression]:
1783        cte = self._parse_with()
1784        if cte:
1785            this = self._parse_statement()
1786
1787            if not this:
1788                self.raise_error("Failed to parse any statement following CTE")
1789                return cte
1790
1791            if "with" in this.arg_types:
1792                this.set("with", cte)
1793            else:
1794                self.raise_error(f"{this.key} does not support CTE")
1795                this = cte
1796        elif self._match(TokenType.SELECT):
1797            comments = self._prev_comments
1798
1799            kind = (
1800                self._match(TokenType.ALIAS)
1801                and self._match_texts(("STRUCT", "VALUE"))
1802                and self._prev.text
1803            )
1804            hint = self._parse_hint()
1805            all_ = self._match(TokenType.ALL)
1806            distinct = self._match(TokenType.DISTINCT)
1807
1808            if distinct:
1809                distinct = self.expression(
1810                    exp.Distinct,
1811                    on=self._parse_value() if self._match(TokenType.ON) else None,
1812                )
1813
1814            if all_ and distinct:
1815                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
1816
1817            limit = self._parse_limit(top=True)
1818            expressions = self._parse_csv(self._parse_expression)
1819
1820            this = self.expression(
1821                exp.Select,
1822                kind=kind,
1823                hint=hint,
1824                distinct=distinct,
1825                expressions=expressions,
1826                limit=limit,
1827            )
1828            this.comments = comments
1829
1830            into = self._parse_into()
1831            if into:
1832                this.set("into", into)
1833
1834            from_ = self._parse_from()
1835            if from_:
1836                this.set("from", from_)
1837
1838            self._parse_query_modifiers(this)
1839        elif (table or nested) and self._match(TokenType.L_PAREN):
1840            this = self._parse_table() if table else self._parse_select(nested=True)
1841            self._parse_query_modifiers(this)
1842            this = self._parse_set_operations(this)
1843            self._match_r_paren()
1844
1845            # early return so that subquery unions aren't parsed again
1846            # SELECT * FROM (SELECT 1) UNION ALL SELECT 1
1847            # Union ALL should be a property of the top select node, not the subquery
1848            return self._parse_subquery(this, parse_alias=parse_subquery_alias)
1849        elif self._match(TokenType.VALUES):
1850            this = self.expression(
1851                exp.Values,
1852                expressions=self._parse_csv(self._parse_value),
1853                alias=self._parse_table_alias(),
1854            )
1855        else:
1856            this = None
1857
1858        return self._parse_set_operations(this)
1859
1860    def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]:
1861        if not skip_with_token and not self._match(TokenType.WITH):
1862            return None
1863
1864        comments = self._prev_comments
1865        recursive = self._match(TokenType.RECURSIVE)
1866
1867        expressions = []
1868        while True:
1869            expressions.append(self._parse_cte())
1870
1871            if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
1872                break
1873            else:
1874                self._match(TokenType.WITH)
1875
1876        return self.expression(
1877            exp.With, comments=comments, expressions=expressions, recursive=recursive
1878        )
1879
1880    def _parse_cte(self) -> exp.Expression:
1881        alias = self._parse_table_alias()
1882        if not alias or not alias.this:
1883            self.raise_error("Expected CTE to have alias")
1884
1885        self._match(TokenType.ALIAS)
1886
1887        return self.expression(
1888            exp.CTE,
1889            this=self._parse_wrapped(self._parse_statement),
1890            alias=alias,
1891        )
1892
1893    def _parse_table_alias(
1894        self, alias_tokens: t.Optional[t.Collection[TokenType]] = None
1895    ) -> t.Optional[exp.Expression]:
1896        any_token = self._match(TokenType.ALIAS)
1897        alias = (
1898            self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
1899            or self._parse_string_as_identifier()
1900        )
1901
1902        index = self._index
1903        if self._match(TokenType.L_PAREN):
1904            columns = self._parse_csv(self._parse_function_parameter)
1905            self._match_r_paren() if columns else self._retreat(index)
1906        else:
1907            columns = None
1908
1909        if not alias and not columns:
1910            return None
1911
1912        return self.expression(exp.TableAlias, this=alias, columns=columns)
1913
1914    def _parse_subquery(
1915        self, this: t.Optional[exp.Expression], parse_alias: bool = True
1916    ) -> exp.Expression:
1917        return self.expression(
1918            exp.Subquery,
1919            this=this,
1920            pivots=self._parse_pivots(),
1921            alias=self._parse_table_alias() if parse_alias else None,
1922        )
1923
1924    def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None:
1925        if not isinstance(this, self.MODIFIABLES):
1926            return
1927
1928        table = isinstance(this, exp.Table)
1929
1930        while True:
1931            join = self._parse_join()
1932            if join:
1933                this.append("joins", join)
1934
1935            lateral = None
1936            if not join:
1937                lateral = self._parse_lateral()
1938                if lateral:
1939                    this.append("laterals", lateral)
1940
1941            comma = None if table else self._match(TokenType.COMMA)
1942            if comma:
1943                this.args["from"].append("expressions", self._parse_table())
1944
1945            if not (lateral or join or comma):
1946                break
1947
1948        for key, parser in self.QUERY_MODIFIER_PARSERS.items():
1949            expression = parser(self)
1950
1951            if expression:
1952                this.set(key, expression)
1953
1954    def _parse_hint(self) -> t.Optional[exp.Expression]:
1955        if self._match(TokenType.HINT):
1956            hints = self._parse_csv(self._parse_function)
1957            if not self._match_pair(TokenType.STAR, TokenType.SLASH):
1958                self.raise_error("Expected */ after HINT")
1959            return self.expression(exp.Hint, expressions=hints)
1960
1961        return None
1962
1963    def _parse_into(self) -> t.Optional[exp.Expression]:
1964        if not self._match(TokenType.INTO):
1965            return None
1966
1967        temp = self._match(TokenType.TEMPORARY)
1968        unlogged = self._match(TokenType.UNLOGGED)
1969        self._match(TokenType.TABLE)
1970
1971        return self.expression(
1972            exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged
1973        )
1974
1975    def _parse_from(self) -> t.Optional[exp.Expression]:
1976        if not self._match(TokenType.FROM):
1977            return None
1978
1979        return self.expression(
1980            exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table)
1981        )
1982
1983    def _parse_match_recognize(self) -> t.Optional[exp.Expression]:
1984        if not self._match(TokenType.MATCH_RECOGNIZE):
1985            return None
1986
1987        self._match_l_paren()
1988
1989        partition = self._parse_partition_by()
1990        order = self._parse_order()
1991        measures = (
1992            self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None
1993        )
1994
1995        if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
1996            rows = exp.Var(this="ONE ROW PER MATCH")
1997        elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
1998            text = "ALL ROWS PER MATCH"
1999            if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
2000                text += f" SHOW EMPTY MATCHES"
2001            elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
2002                text += f" OMIT EMPTY MATCHES"
2003            elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
2004                text += f" WITH UNMATCHED ROWS"
2005            rows = exp.Var(this=text)
2006        else:
2007            rows = None
2008
2009        if self._match_text_seq("AFTER", "MATCH", "SKIP"):
2010            text = "AFTER MATCH SKIP"
2011            if self._match_text_seq("PAST", "LAST", "ROW"):
2012                text += f" PAST LAST ROW"
2013            elif self._match_text_seq("TO", "NEXT", "ROW"):
2014                text += f" TO NEXT ROW"
2015            elif self._match_text_seq("TO", "FIRST"):
2016                text += f" TO FIRST {self._advance_any().text}"  # type: ignore
2017            elif self._match_text_seq("TO", "LAST"):
2018                text += f" TO LAST {self._advance_any().text}"  # type: ignore
2019            after = exp.Var(this=text)
2020        else:
2021            after = None
2022
2023        if self._match_text_seq("PATTERN"):
2024            self._match_l_paren()
2025
2026            if not self._curr:
2027                self.raise_error("Expecting )", self._curr)
2028
2029            paren = 1
2030            start = self._curr
2031
2032            while self._curr and paren > 0:
2033                if self._curr.token_type == TokenType.L_PAREN:
2034                    paren += 1
2035                if self._curr.token_type == TokenType.R_PAREN:
2036                    paren -= 1
2037                end = self._prev
2038                self._advance()
2039            if paren > 0:
2040                self.raise_error("Expecting )", self._curr)
2041            pattern = exp.Var(this=self._find_sql(start, end))
2042        else:
2043            pattern = None
2044
2045        define = (
2046            self._parse_csv(
2047                lambda: self.expression(
2048                    exp.Alias,
2049                    alias=self._parse_id_var(any_token=True),
2050                    this=self._match(TokenType.ALIAS) and self._parse_conjunction(),
2051                )
2052            )
2053            if self._match_text_seq("DEFINE")
2054            else None
2055        )
2056
2057        self._match_r_paren()
2058
2059        return self.expression(
2060            exp.MatchRecognize,
2061            partition_by=partition,
2062            order=order,
2063            measures=measures,
2064            rows=rows,
2065            after=after,
2066            pattern=pattern,
2067            define=define,
2068            alias=self._parse_table_alias(),
2069        )
2070
2071    def _parse_lateral(self) -> t.Optional[exp.Expression]:
2072        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY)
2073        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
2074
2075        if outer_apply or cross_apply:
2076            this = self._parse_select(table=True)
2077            view = None
2078            outer = not cross_apply
2079        elif self._match(TokenType.LATERAL):
2080            this = self._parse_select(table=True)
2081            view = self._match(TokenType.VIEW)
2082            outer = self._match(TokenType.OUTER)
2083        else:
2084            return None
2085
2086        if not this:
2087            this = self._parse_function() or self._parse_id_var(any_token=False)
2088            while self._match(TokenType.DOT):
2089                this = exp.Dot(
2090                    this=this,
2091                    expression=self._parse_function() or self._parse_id_var(any_token=False),
2092                )
2093
2094        table_alias: t.Optional[exp.Expression]
2095
2096        if view:
2097            table = self._parse_id_var(any_token=False)
2098            columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
2099            table_alias = self.expression(exp.TableAlias, this=table, columns=columns)
2100        else:
2101            table_alias = self._parse_table_alias()
2102
2103        expression = self.expression(
2104            exp.Lateral,
2105            this=this,
2106            view=view,
2107            outer=outer,
2108            alias=table_alias,
2109        )
2110
2111        return expression
2112
2113    def _parse_join_side_and_kind(
2114        self,
2115    ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
2116        return (
2117            self._match(TokenType.NATURAL) and self._prev,
2118            self._match_set(self.JOIN_SIDES) and self._prev,
2119            self._match_set(self.JOIN_KINDS) and self._prev,
2120        )
2121
2122    def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]:
2123        index = self._index
2124        natural, side, kind = self._parse_join_side_and_kind()
2125        hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None
2126        join = self._match(TokenType.JOIN)
2127
2128        if not skip_join_token and not join:
2129            self._retreat(index)
2130            kind = None
2131            natural = None
2132            side = None
2133
2134        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False)
2135        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False)
2136
2137        if not skip_join_token and not join and not outer_apply and not cross_apply:
2138            return None
2139
2140        if outer_apply:
2141            side = Token(TokenType.LEFT, "LEFT")
2142
2143        kwargs: t.Dict[
2144            str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]]
2145        ] = {"this": self._parse_table()}
2146
2147        if natural:
2148            kwargs["natural"] = True
2149        if side:
2150            kwargs["side"] = side.text
2151        if kind:
2152            kwargs["kind"] = kind.text
2153        if hint:
2154            kwargs["hint"] = hint
2155
2156        if self._match(TokenType.ON):
2157            kwargs["on"] = self._parse_conjunction()
2158        elif self._match(TokenType.USING):
2159            kwargs["using"] = self._parse_wrapped_id_vars()
2160
2161        return self.expression(exp.Join, **kwargs)  # type: ignore
2162
2163    def _parse_index(self) -> exp.Expression:
2164        index = self._parse_id_var()
2165        self._match(TokenType.ON)
2166        self._match(TokenType.TABLE)  # hive
2167
2168        return self.expression(
2169            exp.Index,
2170            this=index,
2171            table=self.expression(exp.Table, this=self._parse_id_var()),
2172            columns=self._parse_expression(),
2173        )
2174
2175    def _parse_create_table_index(self) -> t.Optional[exp.Expression]:
2176        unique = self._match(TokenType.UNIQUE)
2177        primary = self._match_text_seq("PRIMARY")
2178        amp = self._match_text_seq("AMP")
2179        if not self._match(TokenType.INDEX):
2180            return None
2181        index = self._parse_id_var()
2182        columns = None
2183        if self._match(TokenType.L_PAREN, advance=False):
2184            columns = self._parse_wrapped_csv(self._parse_column)
2185        return self.expression(
2186            exp.Index,
2187            this=index,
2188            columns=columns,
2189            unique=unique,
2190            primary=primary,
2191            amp=amp,
2192        )
2193
2194    def _parse_table_parts(self, schema: bool = False) -> exp.Expression:
2195        catalog = None
2196        db = None
2197
2198        table = (
2199            (not schema and self._parse_function())
2200            or self._parse_id_var(any_token=False)
2201            or self._parse_string_as_identifier()
2202        )
2203
2204        while self._match(TokenType.DOT):
2205            if catalog:
2206                # This allows nesting the table in arbitrarily many dot expressions if needed
2207                table = self.expression(exp.Dot, this=table, expression=self._parse_id_var())
2208            else:
2209                catalog = db
2210                db = table
2211                table = self._parse_id_var()
2212
2213        if not table:
2214            self.raise_error(f"Expected table name but got {self._curr}")
2215
2216        return self.expression(
2217            exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots()
2218        )
2219
2220    def _parse_table(
2221        self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
2222    ) -> t.Optional[exp.Expression]:
2223        lateral = self._parse_lateral()
2224
2225        if lateral:
2226            return lateral
2227
2228        unnest = self._parse_unnest()
2229
2230        if unnest:
2231            return unnest
2232
2233        values = self._parse_derived_table_values()
2234
2235        if values:
2236            return values
2237
2238        subquery = self._parse_select(table=True)
2239
2240        if subquery:
2241            if not subquery.args.get("pivots"):
2242                subquery.set("pivots", self._parse_pivots())
2243            return subquery
2244
2245        this = self._parse_table_parts(schema=schema)
2246
2247        if schema:
2248            return self._parse_schema(this=this)
2249
2250        if self.alias_post_tablesample:
2251            table_sample = self._parse_table_sample()
2252
2253        alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
2254
2255        if alias:
2256            this.set("alias", alias)
2257
2258        if not this.args.get("pivots"):
2259            this.set("pivots", self._parse_pivots())
2260
2261        if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
2262            this.set(
2263                "hints",
2264                self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)),
2265            )
2266            self._match_r_paren()
2267
2268        if not self.alias_post_tablesample:
2269            table_sample = self._parse_table_sample()
2270
2271        if table_sample:
2272            table_sample.set("this", this)
2273            this = table_sample
2274
2275        return this
2276
2277    def _parse_unnest(self) -> t.Optional[exp.Expression]:
2278        if not self._match(TokenType.UNNEST):
2279            return None
2280
2281        expressions = self._parse_wrapped_csv(self._parse_column)
2282        ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY)
2283        alias = self._parse_table_alias()
2284
2285        if alias and self.unnest_column_only:
2286            if alias.args.get("columns"):
2287                self.raise_error("Unexpected extra column alias in unnest.")
2288            alias.set("columns", [alias.this])
2289            alias.set("this", None)
2290
2291        offset = None
2292        if self._match_pair(TokenType.WITH, TokenType.OFFSET):
2293            self._match(TokenType.ALIAS)
2294            offset = self._parse_id_var() or exp.Identifier(this="offset")
2295
2296        return self.expression(
2297            exp.Unnest,
2298            expressions=expressions,
2299            ordinality=ordinality,
2300            alias=alias,
2301            offset=offset,
2302        )
2303
2304    def _parse_derived_table_values(self) -> t.Optional[exp.Expression]:
2305        is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES)
2306        if not is_derived and not self._match(TokenType.VALUES):
2307            return None
2308
2309        expressions = self._parse_csv(self._parse_value)
2310
2311        if is_derived:
2312            self._match_r_paren()
2313
2314        return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias())
2315
2316    def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]:
2317        if not self._match(TokenType.TABLE_SAMPLE) and not (
2318            as_modifier and self._match_text_seq("USING", "SAMPLE")
2319        ):
2320            return None
2321
2322        bucket_numerator = None
2323        bucket_denominator = None
2324        bucket_field = None
2325        percent = None
2326        rows = None
2327        size = None
2328        seed = None
2329
2330        kind = "TABLESAMPLE" if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE"
2331        method = self._parse_var(tokens=(TokenType.ROW,))
2332
2333        self._match(TokenType.L_PAREN)
2334
2335        num = self._parse_number()
2336
2337        if self._match(TokenType.BUCKET):
2338            bucket_numerator = self._parse_number()
2339            self._match(TokenType.OUT_OF)
2340            bucket_denominator = bucket_denominator = self._parse_number()
2341            self._match(TokenType.ON)
2342            bucket_field = self._parse_field()
2343        elif self._match_set((TokenType.PERCENT, TokenType.MOD)):
2344            percent = num
2345        elif self._match(TokenType.ROWS):
2346            rows = num
2347        else:
2348            size = num
2349
2350        self._match(TokenType.R_PAREN)
2351
2352        if self._match(TokenType.L_PAREN):
2353            method = self._parse_var()
2354            seed = self._match(TokenType.COMMA) and self._parse_number()
2355            self._match_r_paren()
2356        elif self._match_texts(("SEED", "REPEATABLE")):
2357            seed = self._parse_wrapped(self._parse_number)
2358
2359        return self.expression(
2360            exp.TableSample,
2361            method=method,
2362            bucket_numerator=bucket_numerator,
2363            bucket_denominator=bucket_denominator,
2364            bucket_field=bucket_field,
2365            percent=percent,
2366            rows=rows,
2367            size=size,
2368            seed=seed,
2369            kind=kind,
2370        )
2371
2372    def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]:
2373        return list(iter(self._parse_pivot, None))
2374
2375    def _parse_pivot(self) -> t.Optional[exp.Expression]:
2376        index = self._index
2377
2378        if self._match(TokenType.PIVOT):
2379            unpivot = False
2380        elif self._match(TokenType.UNPIVOT):
2381            unpivot = True
2382        else:
2383            return None
2384
2385        expressions = []
2386        field = None
2387
2388        if not self._match(TokenType.L_PAREN):
2389            self._retreat(index)
2390            return None
2391
2392        if unpivot:
2393            expressions = self._parse_csv(self._parse_column)
2394        else:
2395            expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function()))
2396
2397        if not expressions:
2398            self.raise_error("Failed to parse PIVOT's aggregation list")
2399
2400        if not self._match(TokenType.FOR):
2401            self.raise_error("Expecting FOR")
2402
2403        value = self._parse_column()
2404
2405        if not self._match(TokenType.IN):
2406            self.raise_error("Expecting IN")
2407
2408        field = self._parse_in(value)
2409
2410        self._match_r_paren()
2411
2412        pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot)
2413
2414        if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False):
2415            pivot.set("alias", self._parse_table_alias())
2416
2417        if not unpivot:
2418            names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions))
2419
2420            columns: t.List[exp.Expression] = []
2421            for col in pivot.args["field"].expressions:
2422                for name in names:
2423                    if self.PREFIXED_PIVOT_COLUMNS:
2424                        name = f"{name}_{col.alias_or_name}" if name else col.alias_or_name
2425                    else:
2426                        name = f"{col.alias_or_name}_{name}" if name else col.alias_or_name
2427
2428                    columns.append(exp.to_identifier(name, quoted=self.QUOTED_PIVOT_COLUMNS))
2429
2430            pivot.set("columns", columns)
2431
2432        return pivot
2433
2434    def _pivot_column_names(self, pivot_columns: t.List[exp.Expression]) -> t.List[str]:
2435        return [agg.alias for agg in pivot_columns]
2436
2437    def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]:
2438        if not skip_where_token and not self._match(TokenType.WHERE):
2439            return None
2440
2441        return self.expression(
2442            exp.Where, comments=self._prev_comments, this=self._parse_conjunction()
2443        )
2444
2445    def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]:
2446        if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
2447            return None
2448
2449        elements = defaultdict(list)
2450
2451        while True:
2452            expressions = self._parse_csv(self._parse_conjunction)
2453            if expressions:
2454                elements["expressions"].extend(expressions)
2455
2456            grouping_sets = self._parse_grouping_sets()
2457            if grouping_sets:
2458                elements["grouping_sets"].extend(grouping_sets)
2459
2460            rollup = None
2461            cube = None
2462
2463            with_ = self._match(TokenType.WITH)
2464            if self._match(TokenType.ROLLUP):
2465                rollup = with_ or self._parse_wrapped_csv(self._parse_column)
2466                elements["rollup"].extend(ensure_list(rollup))
2467
2468            if self._match(TokenType.CUBE):
2469                cube = with_ or self._parse_wrapped_csv(self._parse_column)
2470                elements["cube"].extend(ensure_list(cube))
2471
2472            if not (expressions or grouping_sets or rollup or cube):
2473                break
2474
2475        return self.expression(exp.Group, **elements)  # type: ignore
2476
2477    def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
2478        if not self._match(TokenType.GROUPING_SETS):
2479            return None
2480
2481        return self._parse_wrapped_csv(self._parse_grouping_set)
2482
2483    def _parse_grouping_set(self) -> t.Optional[exp.Expression]:
2484        if self._match(TokenType.L_PAREN):
2485            grouping_set = self._parse_csv(self._parse_column)
2486            self._match_r_paren()
2487            return self.expression(exp.Tuple, expressions=grouping_set)
2488
2489        return self._parse_column()
2490
2491    def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]:
2492        if not skip_having_token and not self._match(TokenType.HAVING):
2493            return None
2494        return self.expression(exp.Having, this=self._parse_conjunction())
2495
2496    def _parse_qualify(self) -> t.Optional[exp.Expression]:
2497        if not self._match(TokenType.QUALIFY):
2498            return None
2499        return self.expression(exp.Qualify, this=self._parse_conjunction())
2500
2501    def _parse_order(
2502        self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
2503    ) -> t.Optional[exp.Expression]:
2504        if not skip_order_token and not self._match(TokenType.ORDER_BY):
2505            return this
2506
2507        return self.expression(
2508            exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
2509        )
2510
2511    def _parse_sort(
2512        self, token_type: TokenType, exp_class: t.Type[exp.Expression]
2513    ) -> t.Optional[exp.Expression]:
2514        if not self._match(token_type):
2515            return None
2516        return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered))
2517
2518    def _parse_ordered(self) -> exp.Expression:
2519        this = self._parse_conjunction()
2520        self._match(TokenType.ASC)
2521        is_desc = self._match(TokenType.DESC)
2522        is_nulls_first = self._match(TokenType.NULLS_FIRST)
2523        is_nulls_last = self._match(TokenType.NULLS_LAST)
2524        desc = is_desc or False
2525        asc = not desc
2526        nulls_first = is_nulls_first or False
2527        explicitly_null_ordered = is_nulls_first or is_nulls_last
2528        if (
2529            not explicitly_null_ordered
2530            and (
2531                (asc and self.null_ordering == "nulls_are_small")
2532                or (desc and self.null_ordering != "nulls_are_small")
2533            )
2534            and self.null_ordering != "nulls_are_last"
2535        ):
2536            nulls_first = True
2537
2538        return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first)
2539
2540    def _parse_limit(
2541        self, this: t.Optional[exp.Expression] = None, top: bool = False
2542    ) -> t.Optional[exp.Expression]:
2543        if self._match(TokenType.TOP if top else TokenType.LIMIT):
2544            limit_paren = self._match(TokenType.L_PAREN)
2545            limit_exp = self.expression(
2546                exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term()
2547            )
2548
2549            if limit_paren:
2550                self._match_r_paren()
2551
2552            return limit_exp
2553
2554        if self._match(TokenType.FETCH):
2555            direction = self._match_set((TokenType.FIRST, TokenType.NEXT))
2556            direction = self._prev.text if direction else "FIRST"
2557
2558            count = self._parse_number()
2559            percent = self._match(TokenType.PERCENT)
2560
2561            self._match_set((TokenType.ROW, TokenType.ROWS))
2562
2563            only = self._match(TokenType.ONLY)
2564            with_ties = self._match_text_seq("WITH", "TIES")
2565
2566            if only and with_ties:
2567                self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause")
2568
2569            return self.expression(
2570                exp.Fetch,
2571                direction=direction,
2572                count=count,
2573                percent=percent,
2574                with_ties=with_ties,
2575            )
2576
2577        return this
2578
2579    def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2580        if not self._match_set((TokenType.OFFSET, TokenType.COMMA)):
2581            return this
2582
2583        count = self._parse_number()
2584        self._match_set((TokenType.ROW, TokenType.ROWS))
2585        return self.expression(exp.Offset, this=this, expression=count)
2586
2587    def _parse_lock(self) -> t.Optional[exp.Expression]:
2588        if self._match_text_seq("FOR", "UPDATE"):
2589            return self.expression(exp.Lock, update=True)
2590        if self._match_text_seq("FOR", "SHARE"):
2591            return self.expression(exp.Lock, update=False)
2592
2593        return None
2594
2595    def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2596        if not self._match_set(self.SET_OPERATIONS):
2597            return this
2598
2599        token_type = self._prev.token_type
2600
2601        if token_type == TokenType.UNION:
2602            expression = exp.Union
2603        elif token_type == TokenType.EXCEPT:
2604            expression = exp.Except
2605        else:
2606            expression = exp.Intersect
2607
2608        return self.expression(
2609            expression,
2610            this=this,
2611            distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
2612            expression=self._parse_set_operations(self._parse_select(nested=True)),
2613        )
2614
2615    def _parse_expression(self) -> t.Optional[exp.Expression]:
2616        return self._parse_alias(self._parse_conjunction())
2617
2618    def _parse_conjunction(self) -> t.Optional[exp.Expression]:
2619        return self._parse_tokens(self._parse_equality, self.CONJUNCTION)
2620
2621    def _parse_equality(self) -> t.Optional[exp.Expression]:
2622        return self._parse_tokens(self._parse_comparison, self.EQUALITY)
2623
2624    def _parse_comparison(self) -> t.Optional[exp.Expression]:
2625        return self._parse_tokens(self._parse_range, self.COMPARISON)
2626
2627    def _parse_range(self) -> t.Optional[exp.Expression]:
2628        this = self._parse_bitwise()
2629        negate = self._match(TokenType.NOT)
2630
2631        if self._match_set(self.RANGE_PARSERS):
2632            expression = self.RANGE_PARSERS[self._prev.token_type](self, this)
2633            if not expression:
2634                return this
2635
2636            this = expression
2637        elif self._match(TokenType.ISNULL):
2638            this = self.expression(exp.Is, this=this, expression=exp.Null())
2639
2640        # Postgres supports ISNULL and NOTNULL for conditions.
2641        # https://blog.andreiavram.ro/postgresql-null-composite-type/
2642        if self._match(TokenType.NOTNULL):
2643            this = self.expression(exp.Is, this=this, expression=exp.Null())
2644            this = self.expression(exp.Not, this=this)
2645
2646        if negate:
2647            this = self.expression(exp.Not, this=this)
2648
2649        if self._match(TokenType.IS):
2650            this = self._parse_is(this)
2651
2652        return this
2653
2654    def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2655        index = self._index - 1
2656        negate = self._match(TokenType.NOT)
2657        if self._match(TokenType.DISTINCT_FROM):
2658            klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ
2659            return self.expression(klass, this=this, expression=self._parse_expression())
2660
2661        expression = self._parse_null() or self._parse_boolean()
2662        if not expression:
2663            self._retreat(index)
2664            return None
2665
2666        this = self.expression(exp.Is, this=this, expression=expression)
2667        return self.expression(exp.Not, this=this) if negate else this
2668
2669    def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression:
2670        unnest = self._parse_unnest()
2671        if unnest:
2672            this = self.expression(exp.In, this=this, unnest=unnest)
2673        elif self._match(TokenType.L_PAREN):
2674            expressions = self._parse_csv(self._parse_select_or_expression)
2675
2676            if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable):
2677                this = self.expression(exp.In, this=this, query=expressions[0])
2678            else:
2679                this = self.expression(exp.In, this=this, expressions=expressions)
2680
2681            self._match_r_paren()
2682        else:
2683            this = self.expression(exp.In, this=this, field=self._parse_field())
2684
2685        return this
2686
2687    def _parse_between(self, this: exp.Expression) -> exp.Expression:
2688        low = self._parse_bitwise()
2689        self._match(TokenType.AND)
2690        high = self._parse_bitwise()
2691        return self.expression(exp.Between, this=this, low=low, high=high)
2692
2693    def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2694        if not self._match(TokenType.ESCAPE):
2695            return this
2696        return self.expression(exp.Escape, this=this, expression=self._parse_string())
2697
2698    def _parse_interval(self) -> t.Optional[exp.Expression]:
2699        if not self._match(TokenType.INTERVAL):
2700            return None
2701
2702        this = self._parse_primary() or self._parse_term()
2703        unit = self._parse_function() or self._parse_var()
2704
2705        # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse
2706        # each INTERVAL expression into this canonical form so it's easy to transpile
2707        if this and isinstance(this, exp.Literal):
2708            if this.is_number:
2709                this = exp.Literal.string(this.name)
2710
2711            # Try to not clutter Snowflake's multi-part intervals like INTERVAL '1 day, 1 year'
2712            parts = this.name.split()
2713            if not unit and len(parts) <= 2:
2714                this = exp.Literal.string(seq_get(parts, 0))
2715                unit = self.expression(exp.Var, this=seq_get(parts, 1))
2716
2717        return self.expression(exp.Interval, this=this, unit=unit)
2718
2719    def _parse_bitwise(self) -> t.Optional[exp.Expression]:
2720        this = self._parse_term()
2721
2722        while True:
2723            if self._match_set(self.BITWISE):
2724                this = self.expression(
2725                    self.BITWISE[self._prev.token_type],
2726                    this=this,
2727                    expression=self._parse_term(),
2728                )
2729            elif self._match_pair(TokenType.LT, TokenType.LT):
2730                this = self.expression(
2731                    exp.BitwiseLeftShift, this=this, expression=self._parse_term()
2732                )
2733            elif self._match_pair(TokenType.GT, TokenType.GT):
2734                this = self.expression(
2735                    exp.BitwiseRightShift, this=this, expression=self._parse_term()
2736                )
2737            else:
2738                break
2739
2740        return this
2741
2742    def _parse_term(self) -> t.Optional[exp.Expression]:
2743        return self._parse_tokens(self._parse_factor, self.TERM)
2744
2745    def _parse_factor(self) -> t.Optional[exp.Expression]:
2746        return self._parse_tokens(self._parse_unary, self.FACTOR)
2747
2748    def _parse_unary(self) -> t.Optional[exp.Expression]:
2749        if self._match_set(self.UNARY_PARSERS):
2750            return self.UNARY_PARSERS[self._prev.token_type](self)
2751        return self._parse_at_time_zone(self._parse_type())
2752
2753    def _parse_type(self) -> t.Optional[exp.Expression]:
2754        interval = self._parse_interval()
2755        if interval:
2756            return interval
2757
2758        index = self._index
2759        data_type = self._parse_types(check_func=True)
2760        this = self._parse_column()
2761
2762        if data_type:
2763            if isinstance(this, exp.Literal):
2764                parser = self.TYPE_LITERAL_PARSERS.get(data_type.this)
2765                if parser:
2766                    return parser(self, this, data_type)
2767                return self.expression(exp.Cast, this=this, to=data_type)
2768            if not data_type.args.get("expressions"):
2769                self._retreat(index)
2770                return self._parse_column()
2771            return data_type
2772
2773        return this
2774
2775    def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]:
2776        index = self._index
2777
2778        prefix = self._match_text_seq("SYSUDTLIB", ".")
2779
2780        if not self._match_set(self.TYPE_TOKENS):
2781            return None
2782
2783        type_token = self._prev.token_type
2784
2785        if type_token == TokenType.PSEUDO_TYPE:
2786            return self.expression(exp.PseudoType, this=self._prev.text)
2787
2788        nested = type_token in self.NESTED_TYPE_TOKENS
2789        is_struct = type_token == TokenType.STRUCT
2790        expressions = None
2791        maybe_func = False
2792
2793        if self._match(TokenType.L_PAREN):
2794            if is_struct:
2795                expressions = self._parse_csv(self._parse_struct_kwargs)
2796            elif nested:
2797                expressions = self._parse_csv(self._parse_types)
2798            else:
2799                expressions = self._parse_csv(self._parse_conjunction)
2800
2801            if not expressions:
2802                self._retreat(index)
2803                return None
2804
2805            self._match_r_paren()
2806            maybe_func = True
2807
2808        if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2809            this = exp.DataType(
2810                this=exp.DataType.Type.ARRAY,
2811                expressions=[exp.DataType.build(type_token.value, expressions=expressions)],
2812                nested=True,
2813            )
2814
2815            while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2816                this = exp.DataType(
2817                    this=exp.DataType.Type.ARRAY,
2818                    expressions=[this],
2819                    nested=True,
2820                )
2821
2822            return this
2823
2824        if self._match(TokenType.L_BRACKET):
2825            self._retreat(index)
2826            return None
2827
2828        values: t.Optional[t.List[t.Optional[exp.Expression]]] = None
2829        if nested and self._match(TokenType.LT):
2830            if is_struct:
2831                expressions = self._parse_csv(self._parse_struct_kwargs)
2832            else:
2833                expressions = self._parse_csv(self._parse_types)
2834
2835            if not self._match(TokenType.GT):
2836                self.raise_error("Expecting >")
2837
2838            if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)):
2839                values = self._parse_csv(self._parse_conjunction)
2840                self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN))
2841
2842        value: t.Optional[exp.Expression] = None
2843        if type_token in self.TIMESTAMPS:
2844            if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ:
2845                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions)
2846            elif (
2847                self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ
2848            ):
2849                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions)
2850            elif self._match(TokenType.WITHOUT_TIME_ZONE):
2851                if type_token == TokenType.TIME:
2852                    value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions)
2853                else:
2854                    value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2855
2856            maybe_func = maybe_func and value is None
2857
2858            if value is None:
2859                value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2860        elif type_token == TokenType.INTERVAL:
2861            unit = self._parse_var()
2862
2863            if not unit:
2864                value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL)
2865            else:
2866                value = self.expression(exp.Interval, unit=unit)
2867
2868        if maybe_func and check_func:
2869            index2 = self._index
2870            peek = self._parse_string()
2871
2872            if not peek:
2873                self._retreat(index)
2874                return None
2875
2876            self._retreat(index2)
2877
2878        if value:
2879            return value
2880
2881        return exp.DataType(
2882            this=exp.DataType.Type[type_token.value.upper()],
2883            expressions=expressions,
2884            nested=nested,
2885            values=values,
2886            prefix=prefix,
2887        )
2888
2889    def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]:
2890        index = self._index
2891        this = self._parse_id_var()
2892        self._match(TokenType.COLON)
2893        data_type = self._parse_types()
2894
2895        if not data_type:
2896            self._retreat(index)
2897            return self._parse_types()
2898        return self.expression(exp.StructKwarg, this=this, expression=data_type)
2899
2900    def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2901        if not self._match(TokenType.AT_TIME_ZONE):
2902            return this
2903        return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary())
2904
2905    def _parse_column(self) -> t.Optional[exp.Expression]:
2906        this = self._parse_field()
2907        if isinstance(this, exp.Identifier):
2908            this = self.expression(exp.Column, this=this)
2909        elif not this:
2910            return self._parse_bracket(this)
2911        this = self._parse_bracket(this)
2912
2913        while self._match_set(self.COLUMN_OPERATORS):
2914            op_token = self._prev.token_type
2915            op = self.COLUMN_OPERATORS.get(op_token)
2916
2917            if op_token == TokenType.DCOLON:
2918                field = self._parse_types()
2919                if not field:
2920                    self.raise_error("Expected type")
2921            elif op:
2922                self._advance()
2923                value = self._prev.text
2924                field = (
2925                    exp.Literal.number(value)
2926                    if self._prev.token_type == TokenType.NUMBER
2927                    else exp.Literal.string(value)
2928                )
2929            else:
2930                field = self._parse_star() or self._parse_function() or self._parse_id_var()
2931
2932            if isinstance(field, exp.Func):
2933                # bigquery allows function calls like x.y.count(...)
2934                # SAFE.SUBSTR(...)
2935                # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
2936                this = self._replace_columns_with_dots(this)
2937
2938            if op:
2939                this = op(self, this, field)
2940            elif isinstance(this, exp.Column) and not this.args.get("catalog"):
2941                this = self.expression(
2942                    exp.Column,
2943                    this=field,
2944                    table=this.this,
2945                    db=this.args.get("table"),
2946                    catalog=this.args.get("db"),
2947                )
2948            else:
2949                this = self.expression(exp.Dot, this=this, expression=field)
2950            this = self._parse_bracket(this)
2951
2952        return this
2953
2954    def _parse_primary(self) -> t.Optional[exp.Expression]:
2955        if self._match_set(self.PRIMARY_PARSERS):
2956            token_type = self._prev.token_type
2957            primary = self.PRIMARY_PARSERS[token_type](self, self._prev)
2958
2959            if token_type == TokenType.STRING:
2960                expressions = [primary]
2961                while self._match(TokenType.STRING):
2962                    expressions.append(exp.Literal.string(self._prev.text))
2963                if len(expressions) > 1:
2964                    return self.expression(exp.Concat, expressions=expressions)
2965            return primary
2966
2967        if self._match_pair(TokenType.DOT, TokenType.NUMBER):
2968            return exp.Literal.number(f"0.{self._prev.text}")
2969
2970        if self._match(TokenType.L_PAREN):
2971            comments = self._prev_comments
2972            query = self._parse_select()
2973
2974            if query:
2975                expressions = [query]
2976            else:
2977                expressions = self._parse_csv(
2978                    lambda: self._parse_alias(self._parse_conjunction(), explicit=True)
2979                )
2980
2981            this = seq_get(expressions, 0)
2982            self._parse_query_modifiers(this)
2983
2984            if isinstance(this, exp.Subqueryable):
2985                this = self._parse_set_operations(
2986                    self._parse_subquery(this=this, parse_alias=False)
2987                )
2988            elif len(expressions) > 1:
2989                this = self.expression(exp.Tuple, expressions=expressions)
2990            else:
2991                this = self.expression(exp.Paren, this=self._parse_set_operations(this))
2992
2993            self._match_r_paren()
2994            comments.extend(self._prev_comments)
2995
2996            if this and comments:
2997                this.comments = comments
2998
2999            return this
3000
3001        return None
3002
3003    def _parse_field(
3004        self,
3005        any_token: bool = False,
3006        tokens: t.Optional[t.Collection[TokenType]] = None,
3007    ) -> t.Optional[exp.Expression]:
3008        return (
3009            self._parse_primary()
3010            or self._parse_function()
3011            or self._parse_id_var(any_token=any_token, tokens=tokens)
3012        )
3013
3014    def _parse_function(
3015        self, functions: t.Optional[t.Dict[str, t.Callable]] = None
3016    ) -> t.Optional[exp.Expression]:
3017        if not self._curr:
3018            return None
3019
3020        token_type = self._curr.token_type
3021
3022        if self._match_set(self.NO_PAREN_FUNCTION_PARSERS):
3023            return self.NO_PAREN_FUNCTION_PARSERS[token_type](self)
3024
3025        if not self._next or self._next.token_type != TokenType.L_PAREN:
3026            if token_type in self.NO_PAREN_FUNCTIONS:
3027                self._advance()
3028                return self.expression(self.NO_PAREN_FUNCTIONS[token_type])
3029
3030            return None
3031
3032        if token_type not in self.FUNC_TOKENS:
3033            return None
3034
3035        this = self._curr.text
3036        upper = this.upper()
3037        self._advance(2)
3038
3039        parser = self.FUNCTION_PARSERS.get(upper)
3040
3041        if parser:
3042            this = parser(self)
3043        else:
3044            subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
3045
3046            if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH):
3047                this = self.expression(subquery_predicate, this=self._parse_select())
3048                self._match_r_paren()
3049                return this
3050
3051            if functions is None:
3052                functions = self.FUNCTIONS
3053
3054            function = functions.get(upper)
3055            args = self._parse_csv(self._parse_lambda)
3056
3057            if function:
3058                # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the
3059                # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists.
3060                if count_params(function) == 2:
3061                    params = None
3062                    if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN):
3063                        params = self._parse_csv(self._parse_lambda)
3064
3065                    this = function(args, params)
3066                else:
3067                    this = function(args)
3068
3069                self.validate_expression(this, args)
3070            else:
3071                this = self.expression(exp.Anonymous, this=this, expressions=args)
3072
3073        self._match_r_paren(this)
3074        return self._parse_window(this)
3075
3076    def _parse_function_parameter(self) -> t.Optional[exp.Expression]:
3077        return self._parse_column_def(self._parse_id_var())
3078
3079    def _parse_user_defined_function(
3080        self, kind: t.Optional[TokenType] = None
3081    ) -> t.Optional[exp.Expression]:
3082        this = self._parse_id_var()
3083
3084        while self._match(TokenType.DOT):
3085            this = self.expression(exp.Dot, this=this, expression=self._parse_id_var())
3086
3087        if not self._match(TokenType.L_PAREN):
3088            return this
3089
3090        expressions = self._parse_csv(self._parse_function_parameter)
3091        self._match_r_paren()
3092        return self.expression(
3093            exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True
3094        )
3095
3096    def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]:
3097        literal = self._parse_primary()
3098        if literal:
3099            return self.expression(exp.Introducer, this=token.text, expression=literal)
3100
3101        return self.expression(exp.Identifier, this=token.text)
3102
3103    def _parse_national(self, token: Token) -> exp.Expression:
3104        return self.expression(exp.National, this=exp.Literal.string(token.text))
3105
3106    def _parse_session_parameter(self) -> exp.Expression:
3107        kind = None
3108        this = self._parse_id_var() or self._parse_primary()
3109
3110        if this and self._match(TokenType.DOT):
3111            kind = this.name
3112            this = self._parse_var() or self._parse_primary()
3113
3114        return self.expression(exp.SessionParameter, this=this, kind=kind)
3115
3116    def _parse_lambda(self) -> t.Optional[exp.Expression]:
3117        index = self._index
3118
3119        if self._match(TokenType.L_PAREN):
3120            expressions = self._parse_csv(self._parse_id_var)
3121
3122            if not self._match(TokenType.R_PAREN):
3123                self._retreat(index)
3124        else:
3125            expressions = [self._parse_id_var()]
3126
3127        if self._match_set(self.LAMBDAS):
3128            return self.LAMBDAS[self._prev.token_type](self, expressions)
3129
3130        self._retreat(index)
3131
3132        this: t.Optional[exp.Expression]
3133
3134        if self._match(TokenType.DISTINCT):
3135            this = self.expression(
3136                exp.Distinct, expressions=self._parse_csv(self._parse_conjunction)
3137            )
3138        else:
3139            this = self._parse_select_or_expression()
3140
3141            if isinstance(this, exp.EQ):
3142                left = this.this
3143                if isinstance(left, exp.Column):
3144                    left.replace(exp.Var(this=left.text("this")))
3145
3146        if self._match(TokenType.IGNORE_NULLS):
3147            this = self.expression(exp.IgnoreNulls, this=this)
3148        else:
3149            self._match(TokenType.RESPECT_NULLS)
3150
3151        return self._parse_limit(self._parse_order(this))
3152
3153    def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
3154        index = self._index
3155
3156        try:
3157            if self._parse_select(nested=True):
3158                return this
3159        except Exception:
3160            pass
3161        finally:
3162            self._retreat(index)
3163
3164        if not self._match(TokenType.L_PAREN):
3165            return this
3166
3167        args = self._parse_csv(
3168            lambda: self._parse_constraint()
3169            or self._parse_column_def(self._parse_field(any_token=True))
3170        )
3171        self._match_r_paren()
3172        return self.expression(exp.Schema, this=this, expressions=args)
3173
3174    def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3175        kind = self._parse_types()
3176
3177        if self._match_text_seq("FOR", "ORDINALITY"):
3178            return self.expression(exp.ColumnDef, this=this, ordinality=True)
3179
3180        constraints = []
3181        while True:
3182            constraint = self._parse_column_constraint()
3183            if not constraint:
3184                break
3185            constraints.append(constraint)
3186
3187        if not kind and not constraints:
3188            return this
3189
3190        return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints)
3191
3192    def _parse_auto_increment(self) -> exp.Expression:
3193        start = None
3194        increment = None
3195
3196        if self._match(TokenType.L_PAREN, advance=False):
3197            args = self._parse_wrapped_csv(self._parse_bitwise)
3198            start = seq_get(args, 0)
3199            increment = seq_get(args, 1)
3200        elif self._match_text_seq("START"):
3201            start = self._parse_bitwise()
3202            self._match_text_seq("INCREMENT")
3203            increment = self._parse_bitwise()
3204
3205        if start and increment:
3206            return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment)
3207
3208        return exp.AutoIncrementColumnConstraint()
3209
3210    def _parse_compress(self) -> exp.Expression:
3211        if self._match(TokenType.L_PAREN, advance=False):
3212            return self.expression(
3213                exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise)
3214            )
3215
3216        return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise())
3217
3218    def _parse_generated_as_identity(self) -> exp.Expression:
3219        if self._match(TokenType.BY_DEFAULT):
3220            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False)
3221        else:
3222            self._match_text_seq("ALWAYS")
3223            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True)
3224
3225        self._match_text_seq("AS", "IDENTITY")
3226        if self._match(TokenType.L_PAREN):
3227            if self._match_text_seq("START", "WITH"):
3228                this.set("start", self._parse_bitwise())
3229            if self._match_text_seq("INCREMENT", "BY"):
3230                this.set("increment", self._parse_bitwise())
3231            if self._match_text_seq("MINVALUE"):
3232                this.set("minvalue", self._parse_bitwise())
3233            if self._match_text_seq("MAXVALUE"):
3234                this.set("maxvalue", self._parse_bitwise())
3235
3236            if self._match_text_seq("CYCLE"):
3237                this.set("cycle", True)
3238            elif self._match_text_seq("NO", "CYCLE"):
3239                this.set("cycle", False)
3240
3241            self._match_r_paren()
3242
3243        return this
3244
3245    def _parse_inline(self) -> t.Optional[exp.Expression]:
3246        self._match_text_seq("LENGTH")
3247        return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise())
3248
3249    def _parse_not_constraint(self) -> t.Optional[exp.Expression]:
3250        if self._match_text_seq("NULL"):
3251            return self.expression(exp.NotNullColumnConstraint)
3252        if self._match_text_seq("CASESPECIFIC"):
3253            return self.expression(exp.CaseSpecificColumnConstraint, not_=True)
3254        return None
3255
3256    def _parse_column_constraint(self) -> t.Optional[exp.Expression]:
3257        if self._match(TokenType.CONSTRAINT):
3258            this = self._parse_id_var()
3259        else:
3260            this = None
3261
3262        if self._match_texts(self.CONSTRAINT_PARSERS):
3263            return self.expression(
3264                exp.ColumnConstraint,
3265                this=this,
3266                kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self),
3267            )
3268
3269        return this
3270
3271    def _parse_constraint(self) -> t.Optional[exp.Expression]:
3272        if not self._match(TokenType.CONSTRAINT):
3273            return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS)
3274
3275        this = self._parse_id_var()
3276        expressions = []
3277
3278        while True:
3279            constraint = self._parse_unnamed_constraint() or self._parse_function()
3280            if not constraint:
3281                break
3282            expressions.append(constraint)
3283
3284        return self.expression(exp.Constraint, this=this, expressions=expressions)
3285
3286    def _parse_unnamed_constraint(
3287        self, constraints: t.Optional[t.Collection[str]] = None
3288    ) -> t.Optional[exp.Expression]:
3289        if not self._match_texts(constraints or self.CONSTRAINT_PARSERS):
3290            return None
3291
3292        constraint = self._prev.text.upper()
3293        if constraint not in self.CONSTRAINT_PARSERS:
3294            self.raise_error(f"No parser found for schema constraint {constraint}.")
3295
3296        return self.CONSTRAINT_PARSERS[constraint](self)
3297
3298    def _parse_unique(self) -> exp.Expression:
3299        if not self._match(TokenType.L_PAREN, advance=False):
3300            return self.expression(exp.UniqueColumnConstraint)
3301        return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars())
3302
3303    def _parse_key_constraint_options(self) -> t.List[str]:
3304        options = []
3305        while True:
3306            if not self._curr:
3307                break
3308
3309            if self._match(TokenType.ON):
3310                action = None
3311                on = self._advance_any() and self._prev.text
3312
3313                if self._match(TokenType.NO_ACTION):
3314                    action = "NO ACTION"
3315                elif self._match(TokenType.CASCADE):
3316                    action = "CASCADE"
3317                elif self._match_pair(TokenType.SET, TokenType.NULL):
3318                    action = "SET NULL"
3319                elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
3320                    action = "SET DEFAULT"
3321                else:
3322                    self.raise_error("Invalid key constraint")
3323
3324                options.append(f"ON {on} {action}")
3325            elif self._match_text_seq("NOT", "ENFORCED"):
3326                options.append("NOT ENFORCED")
3327            elif self._match_text_seq("DEFERRABLE"):
3328                options.append("DEFERRABLE")
3329            elif self._match_text_seq("INITIALLY", "DEFERRED"):
3330                options.append("INITIALLY DEFERRED")
3331            elif self._match_text_seq("NORELY"):
3332                options.append("NORELY")
3333            elif self._match_text_seq("MATCH", "FULL"):
3334                options.append("MATCH FULL")
3335            else:
3336                break
3337
3338        return options
3339
3340    def _parse_references(self, match=True) -> t.Optional[exp.Expression]:
3341        if match and not self._match(TokenType.REFERENCES):
3342            return None
3343
3344        expressions = None
3345        this = self._parse_id_var()
3346
3347        if self._match(TokenType.L_PAREN, advance=False):
3348            expressions = self._parse_wrapped_id_vars()
3349
3350        options = self._parse_key_constraint_options()
3351        return self.expression(exp.Reference, this=this, expressions=expressions, options=options)
3352
3353    def _parse_foreign_key(self) -> exp.Expression:
3354        expressions = self._parse_wrapped_id_vars()
3355        reference = self._parse_references()
3356        options = {}
3357
3358        while self._match(TokenType.ON):
3359            if not self._match_set((TokenType.DELETE, TokenType.UPDATE)):
3360                self.raise_error("Expected DELETE or UPDATE")
3361
3362            kind = self._prev.text.lower()
3363
3364            if self._match(TokenType.NO_ACTION):
3365                action = "NO ACTION"
3366            elif self._match(TokenType.SET):
3367                self._match_set((TokenType.NULL, TokenType.DEFAULT))
3368                action = "SET " + self._prev.text.upper()
3369            else:
3370                self._advance()
3371                action = self._prev.text.upper()
3372
3373            options[kind] = action
3374
3375        return self.expression(
3376            exp.ForeignKey, expressions=expressions, reference=reference, **options  # type: ignore
3377        )
3378
3379    def _parse_primary_key(self) -> exp.Expression:
3380        desc = (
3381            self._match_set((TokenType.ASC, TokenType.DESC))
3382            and self._prev.token_type == TokenType.DESC
3383        )
3384
3385        if not self._match(TokenType.L_PAREN, advance=False):
3386            return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc)
3387
3388        expressions = self._parse_wrapped_id_vars()
3389        options = self._parse_key_constraint_options()
3390        return self.expression(exp.PrimaryKey, expressions=expressions, options=options)
3391
3392    def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3393        if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)):
3394            return this
3395
3396        bracket_kind = self._prev.token_type
3397        expressions: t.List[t.Optional[exp.Expression]]
3398
3399        if self._match(TokenType.COLON):
3400            expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())]
3401        else:
3402            expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction()))
3403
3404        # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
3405        if bracket_kind == TokenType.L_BRACE:
3406            this = self.expression(exp.Struct, expressions=expressions)
3407        elif not this or this.name.upper() == "ARRAY":
3408            this = self.expression(exp.Array, expressions=expressions)
3409        else:
3410            expressions = apply_index_offset(this, expressions, -self.index_offset)
3411            this = self.expression(exp.Bracket, this=this, expressions=expressions)
3412
3413        if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET:
3414            self.raise_error("Expected ]")
3415        elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE:
3416            self.raise_error("Expected }")
3417
3418        this.comments = self._prev_comments
3419        return self._parse_bracket(this)
3420
3421    def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3422        if self._match(TokenType.COLON):
3423            return self.expression(exp.Slice, this=this, expression=self._parse_conjunction())
3424        return this
3425
3426    def _parse_case(self) -> t.Optional[exp.Expression]:
3427        ifs = []
3428        default = None
3429
3430        expression = self._parse_conjunction()
3431
3432        while self._match(TokenType.WHEN):
3433            this = self._parse_conjunction()
3434            self._match(TokenType.THEN)
3435            then = self._parse_conjunction()
3436            ifs.append(self.expression(exp.If, this=this, true=then))
3437
3438        if self._match(TokenType.ELSE):
3439            default = self._parse_conjunction()
3440
3441        if not self._match(TokenType.END):
3442            self.raise_error("Expected END after CASE", self._prev)
3443
3444        return self._parse_window(
3445            self.expression(exp.Case, this=expression, ifs=ifs, default=default)
3446        )
3447
3448    def _parse_if(self) -> t.Optional[exp.Expression]:
3449        if self._match(TokenType.L_PAREN):
3450            args = self._parse_csv(self._parse_conjunction)
3451            this = exp.If.from_arg_list(args)
3452            self.validate_expression(this, args)
3453            self._match_r_paren()
3454        else:
3455            condition = self._parse_conjunction()
3456            self._match(TokenType.THEN)
3457            true = self._parse_conjunction()
3458            false = self._parse_conjunction() if self._match(TokenType.ELSE) else None
3459            self._match(TokenType.END)
3460            this = self.expression(exp.If, this=condition, true=true, false=false)
3461
3462        return self._parse_window(this)
3463
3464    def _parse_extract(self) -> exp.Expression:
3465        this = self._parse_function() or self._parse_var() or self._parse_type()
3466
3467        if self._match(TokenType.FROM):
3468            return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3469
3470        if not self._match(TokenType.COMMA):
3471            self.raise_error("Expected FROM or comma after EXTRACT", self._prev)
3472
3473        return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3474
3475    def _parse_cast(self, strict: bool) -> exp.Expression:
3476        this = self._parse_conjunction()
3477
3478        if not self._match(TokenType.ALIAS):
3479            self.raise_error("Expected AS after CAST")
3480
3481        to = self._parse_types()
3482
3483        if not to:
3484            self.raise_error("Expected TYPE after CAST")
3485        elif to.this == exp.DataType.Type.CHAR:
3486            if self._match(TokenType.CHARACTER_SET):
3487                to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())
3488
3489        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3490
3491    def _parse_string_agg(self) -> exp.Expression:
3492        expression: t.Optional[exp.Expression]
3493
3494        if self._match(TokenType.DISTINCT):
3495            args = self._parse_csv(self._parse_conjunction)
3496            expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)])
3497        else:
3498            args = self._parse_csv(self._parse_conjunction)
3499            expression = seq_get(args, 0)
3500
3501        index = self._index
3502        if not self._match(TokenType.R_PAREN):
3503            # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]])
3504            order = self._parse_order(this=expression)
3505            return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3506
3507        # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]).
3508        # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that
3509        # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them.
3510        if not self._match(TokenType.WITHIN_GROUP):
3511            self._retreat(index)
3512            this = exp.GroupConcat.from_arg_list(args)
3513            self.validate_expression(this, args)
3514            return this
3515
3516        self._match_l_paren()  # The corresponding match_r_paren will be called in parse_function (caller)
3517        order = self._parse_order(this=expression)
3518        return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3519
3520    def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]:
3521        to: t.Optional[exp.Expression]
3522        this = self._parse_bitwise()
3523
3524        if self._match(TokenType.USING):
3525            to = self.expression(exp.CharacterSet, this=self._parse_var())
3526        elif self._match(TokenType.COMMA):
3527            to = self._parse_bitwise()
3528        else:
3529            to = None
3530
3531        # Swap the argument order if needed to produce the correct AST
3532        if self.CONVERT_TYPE_FIRST:
3533            this, to = to, this
3534
3535        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3536
3537    def _parse_decode(self) -> t.Optional[exp.Expression]:
3538        """
3539        There are generally two variants of the DECODE function:
3540
3541        - DECODE(bin, charset)
3542        - DECODE(expression, search, result [, search, result] ... [, default])
3543
3544        The second variant will always be parsed into a CASE expression. Note that NULL
3545        needs special treatment, since we need to explicitly check for it with `IS NULL`,
3546        instead of relying on pattern matching.
3547        """
3548        args = self._parse_csv(self._parse_conjunction)
3549
3550        if len(args) < 3:
3551            return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1))
3552
3553        expression, *expressions = args
3554        if not expression:
3555            return None
3556
3557        ifs = []
3558        for search, result in zip(expressions[::2], expressions[1::2]):
3559            if not search or not result:
3560                return None
3561
3562            if isinstance(search, exp.Literal):
3563                ifs.append(
3564                    exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result)
3565                )
3566            elif isinstance(search, exp.Null):
3567                ifs.append(
3568                    exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result)
3569                )
3570            else:
3571                cond = exp.or_(
3572                    exp.EQ(this=expression.copy(), expression=search),
3573                    exp.and_(
3574                        exp.Is(this=expression.copy(), expression=exp.Null()),
3575                        exp.Is(this=search.copy(), expression=exp.Null()),
3576                    ),
3577                )
3578                ifs.append(exp.If(this=cond, true=result))
3579
3580        return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None)
3581
3582    def _parse_json_key_value(self) -> t.Optional[exp.Expression]:
3583        self._match_text_seq("KEY")
3584        key = self._parse_field()
3585        self._match(TokenType.COLON)
3586        self._match_text_seq("VALUE")
3587        value = self._parse_field()
3588        if not key and not value:
3589            return None
3590        return self.expression(exp.JSONKeyValue, this=key, expression=value)
3591
3592    def _parse_json_object(self) -> exp.Expression:
3593        expressions = self._parse_csv(self._parse_json_key_value)
3594
3595        null_handling = None
3596        if self._match_text_seq("NULL", "ON", "NULL"):
3597            null_handling = "NULL ON NULL"
3598        elif self._match_text_seq("ABSENT", "ON", "NULL"):
3599            null_handling = "ABSENT ON NULL"
3600
3601        unique_keys = None
3602        if self._match_text_seq("WITH", "UNIQUE"):
3603            unique_keys = True
3604        elif self._match_text_seq("WITHOUT", "UNIQUE"):
3605            unique_keys = False
3606
3607        self._match_text_seq("KEYS")
3608
3609        return_type = self._match_text_seq("RETURNING") and self._parse_type()
3610        format_json = self._match_text_seq("FORMAT", "JSON")
3611        encoding = self._match_text_seq("ENCODING") and self._parse_var()
3612
3613        return self.expression(
3614            exp.JSONObject,
3615            expressions=expressions,
3616            null_handling=null_handling,
3617            unique_keys=unique_keys,
3618            return_type=return_type,
3619            format_json=format_json,
3620            encoding=encoding,
3621        )
3622
3623    def _parse_logarithm(self) -> exp.Expression:
3624        # Default argument order is base, expression
3625        args = self._parse_csv(self._parse_range)
3626
3627        if len(args) > 1:
3628            if not self.LOG_BASE_FIRST:
3629                args.reverse()
3630            return exp.Log.from_arg_list(args)
3631
3632        return self.expression(
3633            exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0)
3634        )
3635
3636    def _parse_match_against(self) -> exp.Expression:
3637        expressions = self._parse_csv(self._parse_column)
3638
3639        self._match_text_seq(")", "AGAINST", "(")
3640
3641        this = self._parse_string()
3642
3643        if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"):
3644            modifier = "IN NATURAL LANGUAGE MODE"
3645            if self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3646                modifier = f"{modifier} WITH QUERY EXPANSION"
3647        elif self._match_text_seq("IN", "BOOLEAN", "MODE"):
3648            modifier = "IN BOOLEAN MODE"
3649        elif self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3650            modifier = "WITH QUERY EXPANSION"
3651        else:
3652            modifier = None
3653
3654        return self.expression(
3655            exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier
3656        )
3657
3658    def _parse_position(self, haystack_first: bool = False) -> exp.Expression:
3659        args = self._parse_csv(self._parse_bitwise)
3660
3661        if self._match(TokenType.IN):
3662            return self.expression(
3663                exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0)
3664            )
3665
3666        if haystack_first:
3667            haystack = seq_get(args, 0)
3668            needle = seq_get(args, 1)
3669        else:
3670            needle = seq_get(args, 0)
3671            haystack = seq_get(args, 1)
3672
3673        this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2))
3674
3675        self.validate_expression(this, args)
3676
3677        return this
3678
3679    def _parse_join_hint(self, func_name: str) -> exp.Expression:
3680        args = self._parse_csv(self._parse_table)
3681        return exp.JoinHint(this=func_name.upper(), expressions=args)
3682
3683    def _parse_substring(self) -> exp.Expression:
3684        # Postgres supports the form: substring(string [from int] [for int])
3685        # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
3686
3687        args = self._parse_csv(self._parse_bitwise)
3688
3689        if self._match(TokenType.FROM):
3690            args.append(self._parse_bitwise())
3691            if self._match(TokenType.FOR):
3692                args.append(self._parse_bitwise())
3693
3694        this = exp.Substring.from_arg_list(args)
3695        self.validate_expression(this, args)
3696
3697        return this
3698
3699    def _parse_trim(self) -> exp.Expression:
3700        # https://www.w3resource.com/sql/character-functions/trim.php
3701        # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html
3702
3703        position = None
3704        collation = None
3705
3706        if self._match_set(self.TRIM_TYPES):
3707            position = self._prev.text.upper()
3708
3709        expression = self._parse_term()
3710        if self._match_set((TokenType.FROM, TokenType.COMMA)):
3711            this = self._parse_term()
3712        else:
3713            this = expression
3714            expression = None
3715
3716        if self._match(TokenType.COLLATE):
3717            collation = self._parse_term()
3718
3719        return self.expression(
3720            exp.Trim,
3721            this=this,
3722            position=position,
3723            expression=expression,
3724            collation=collation,
3725        )
3726
3727    def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3728        return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window)
3729
3730    def _parse_named_window(self) -> t.Optional[exp.Expression]:
3731        return self._parse_window(self._parse_id_var(), alias=True)
3732
3733    def _parse_window(
3734        self, this: t.Optional[exp.Expression], alias: bool = False
3735    ) -> t.Optional[exp.Expression]:
3736        if self._match_pair(TokenType.FILTER, TokenType.L_PAREN):
3737            this = self.expression(exp.Filter, this=this, expression=self._parse_where())
3738            self._match_r_paren()
3739
3740        # T-SQL allows the OVER (...) syntax after WITHIN GROUP.
3741        # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
3742        if self._match(TokenType.WITHIN_GROUP):
3743            order = self._parse_wrapped(self._parse_order)
3744            this = self.expression(exp.WithinGroup, this=this, expression=order)
3745
3746        # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
3747        # Some dialects choose to implement and some do not.
3748        # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
3749
3750        # There is some code above in _parse_lambda that handles
3751        #   SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ...
3752
3753        # The below changes handle
3754        #   SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ...
3755
3756        # Oracle allows both formats
3757        #   (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html)
3758        #   and Snowflake chose to do the same for familiarity
3759        #   https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes
3760        if self._match(TokenType.IGNORE_NULLS):
3761            this = self.expression(exp.IgnoreNulls, this=this)
3762        elif self._match(TokenType.RESPECT_NULLS):
3763            this = self.expression(exp.RespectNulls, this=this)
3764
3765        # bigquery select from window x AS (partition by ...)
3766        if alias:
3767            self._match(TokenType.ALIAS)
3768        elif not self._match(TokenType.OVER):
3769            return this
3770
3771        if not self._match(TokenType.L_PAREN):
3772            return self.expression(exp.Window, this=this, alias=self._parse_id_var(False))
3773
3774        window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
3775        partition = self._parse_partition_by()
3776        order = self._parse_order()
3777        kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text
3778
3779        if kind:
3780            self._match(TokenType.BETWEEN)
3781            start = self._parse_window_spec()
3782            self._match(TokenType.AND)
3783            end = self._parse_window_spec()
3784
3785            spec = self.expression(
3786                exp.WindowSpec,
3787                kind=kind,
3788                start=start["value"],
3789                start_side=start["side"],
3790                end=end["value"],
3791                end_side=end["side"],
3792            )
3793        else:
3794            spec = None
3795
3796        self._match_r_paren()
3797
3798        return self.expression(
3799            exp.Window,
3800            this=this,
3801            partition_by=partition,
3802            order=order,
3803            spec=spec,
3804            alias=window_alias,
3805        )
3806
3807    def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]:
3808        self._match(TokenType.BETWEEN)
3809
3810        return {
3811            "value": (
3812                self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text
3813            )
3814            or self._parse_bitwise(),
3815            "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text,
3816        }
3817
3818    def _parse_alias(
3819        self, this: t.Optional[exp.Expression], explicit: bool = False
3820    ) -> t.Optional[exp.Expression]:
3821        any_token = self._match(TokenType.ALIAS)
3822
3823        if explicit and not any_token:
3824            return this
3825
3826        if self._match(TokenType.L_PAREN):
3827            aliases = self.expression(
3828                exp.Aliases,
3829                this=this,
3830                expressions=self._parse_csv(lambda: self._parse_id_var(any_token)),
3831            )
3832            self._match_r_paren(aliases)
3833            return aliases
3834
3835        alias = self._parse_id_var(any_token)
3836
3837        if alias:
3838            return self.expression(exp.Alias, this=this, alias=alias)
3839
3840        return this
3841
3842    def _parse_id_var(
3843        self,
3844        any_token: bool = True,
3845        tokens: t.Optional[t.Collection[TokenType]] = None,
3846        prefix_tokens: t.Optional[t.Collection[TokenType]] = None,
3847    ) -> t.Optional[exp.Expression]:
3848        identifier = self._parse_identifier()
3849
3850        if identifier:
3851            return identifier
3852
3853        prefix = ""
3854
3855        if prefix_tokens:
3856            while self._match_set(prefix_tokens):
3857                prefix += self._prev.text
3858
3859        if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS):
3860            quoted = self._prev.token_type == TokenType.STRING
3861            return exp.Identifier(this=prefix + self._prev.text, quoted=quoted)
3862
3863        return None
3864
3865    def _parse_string(self) -> t.Optional[exp.Expression]:
3866        if self._match(TokenType.STRING):
3867            return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev)
3868        return self._parse_placeholder()
3869
3870    def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]:
3871        return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True)
3872
3873    def _parse_number(self) -> t.Optional[exp.Expression]:
3874        if self._match(TokenType.NUMBER):
3875            return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev)
3876        return self._parse_placeholder()
3877
3878    def _parse_identifier(self) -> t.Optional[exp.Expression]:
3879        if self._match(TokenType.IDENTIFIER):
3880            return self.expression(exp.Identifier, this=self._prev.text, quoted=True)
3881        return self._parse_placeholder()
3882
3883    def _parse_var(
3884        self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None
3885    ) -> t.Optional[exp.Expression]:
3886        if (
3887            (any_token and self._advance_any())
3888            or self._match(TokenType.VAR)
3889            or (self._match_set(tokens) if tokens else False)
3890        ):
3891            return self.expression(exp.Var, this=self._prev.text)
3892        return self._parse_placeholder()
3893
3894    def _advance_any(self) -> t.Optional[Token]:
3895        if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS:
3896            self._advance()
3897            return self._prev
3898        return None
3899
3900    def _parse_var_or_string(self) -> t.Optional[exp.Expression]:
3901        return self._parse_var() or self._parse_string()
3902
3903    def _parse_null(self) -> t.Optional[exp.Expression]:
3904        if self._match(TokenType.NULL):
3905            return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev)
3906        return None
3907
3908    def _parse_boolean(self) -> t.Optional[exp.Expression]:
3909        if self._match(TokenType.TRUE):
3910            return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev)
3911        if self._match(TokenType.FALSE):
3912            return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev)
3913        return None
3914
3915    def _parse_star(self) -> t.Optional[exp.Expression]:
3916        if self._match(TokenType.STAR):
3917            return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev)
3918        return None
3919
3920    def _parse_parameter(self) -> exp.Expression:
3921        wrapped = self._match(TokenType.L_BRACE)
3922        this = self._parse_var() or self._parse_primary()
3923        self._match(TokenType.R_BRACE)
3924        return self.expression(exp.Parameter, this=this, wrapped=wrapped)
3925
3926    def _parse_placeholder(self) -> t.Optional[exp.Expression]:
3927        if self._match_set(self.PLACEHOLDER_PARSERS):
3928            placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self)
3929            if placeholder:
3930                return placeholder
3931            self._advance(-1)
3932        return None
3933
3934    def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3935        if not self._match(TokenType.EXCEPT):
3936            return None
3937        if self._match(TokenType.L_PAREN, advance=False):
3938            return self._parse_wrapped_csv(self._parse_column)
3939        return self._parse_csv(self._parse_column)
3940
3941    def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3942        if not self._match(TokenType.REPLACE):
3943            return None
3944        if self._match(TokenType.L_PAREN, advance=False):
3945            return self._parse_wrapped_csv(self._parse_expression)
3946        return self._parse_csv(self._parse_expression)
3947
3948    def _parse_csv(
3949        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
3950    ) -> t.List[t.Optional[exp.Expression]]:
3951        parse_result = parse_method()
3952        items = [parse_result] if parse_result is not None else []
3953
3954        while self._match(sep):
3955            if parse_result and self._prev_comments:
3956                parse_result.comments = self._prev_comments
3957
3958            parse_result = parse_method()
3959            if parse_result is not None:
3960                items.append(parse_result)
3961
3962        return items
3963
3964    def _parse_tokens(
3965        self, parse_method: t.Callable, expressions: t.Dict
3966    ) -> t.Optional[exp.Expression]:
3967        this = parse_method()
3968
3969        while self._match_set(expressions):
3970            this = self.expression(
3971                expressions[self._prev.token_type],
3972                this=this,
3973                comments=self._prev_comments,
3974                expression=parse_method(),
3975            )
3976
3977        return this
3978
3979    def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]:
3980        return self._parse_wrapped_csv(self._parse_id_var)
3981
3982    def _parse_wrapped_csv(
3983        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
3984    ) -> t.List[t.Optional[exp.Expression]]:
3985        return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep))
3986
3987    def _parse_wrapped(self, parse_method: t.Callable) -> t.Any:
3988        self._match_l_paren()
3989        parse_result = parse_method()
3990        self._match_r_paren()
3991        return parse_result
3992
3993    def _parse_select_or_expression(self) -> t.Optional[exp.Expression]:
3994        return self._parse_select() or self._parse_set_operations(self._parse_expression())
3995
3996    def _parse_ddl_select(self) -> t.Optional[exp.Expression]:
3997        return self._parse_set_operations(
3998            self._parse_select(nested=True, parse_subquery_alias=False)
3999        )
4000
4001    def _parse_transaction(self) -> exp.Expression:
4002        this = None
4003        if self._match_texts(self.TRANSACTION_KIND):
4004            this = self._prev.text
4005
4006        self._match_texts({"TRANSACTION", "WORK"})
4007
4008        modes = []
4009        while True:
4010            mode = []
4011            while self._match(TokenType.VAR):
4012                mode.append(self._prev.text)
4013
4014            if mode:
4015                modes.append(" ".join(mode))
4016            if not self._match(TokenType.COMMA):
4017                break
4018
4019        return self.expression(exp.Transaction, this=this, modes=modes)
4020
4021    def _parse_commit_or_rollback(self) -> exp.Expression:
4022        chain = None
4023        savepoint = None
4024        is_rollback = self._prev.token_type == TokenType.ROLLBACK
4025
4026        self._match_texts({"TRANSACTION", "WORK"})
4027
4028        if self._match_text_seq("TO"):
4029            self._match_text_seq("SAVEPOINT")
4030            savepoint = self._parse_id_var()
4031
4032        if self._match(TokenType.AND):
4033            chain = not self._match_text_seq("NO")
4034            self._match_text_seq("CHAIN")
4035
4036        if is_rollback:
4037            return self.expression(exp.Rollback, savepoint=savepoint)
4038        return self.expression(exp.Commit, chain=chain)
4039
4040    def _parse_add_column(self) -> t.Optional[exp.Expression]:
4041        if not self._match_text_seq("ADD"):
4042            return None
4043
4044        self._match(TokenType.COLUMN)
4045        exists_column = self._parse_exists(not_=True)
4046        expression = self._parse_column_def(self._parse_field(any_token=True))
4047
4048        if expression:
4049            expression.set("exists", exists_column)
4050
4051            # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns
4052            if self._match_texts(("FIRST", "AFTER")):
4053                position = self._prev.text
4054                column_position = self.expression(
4055                    exp.ColumnPosition, this=self._parse_column(), position=position
4056                )
4057                expression.set("position", column_position)
4058
4059        return expression
4060
4061    def _parse_drop_column(self) -> t.Optional[exp.Expression]:
4062        drop = self._match(TokenType.DROP) and self._parse_drop()
4063        if drop and not isinstance(drop, exp.Command):
4064            drop.set("kind", drop.args.get("kind", "COLUMN"))
4065        return drop
4066
4067    # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html
4068    def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression:
4069        return self.expression(
4070            exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists
4071        )
4072
4073    def _parse_add_constraint(self) -> t.Optional[exp.Expression]:
4074        this = None
4075        kind = self._prev.token_type
4076
4077        if kind == TokenType.CONSTRAINT:
4078            this = self._parse_id_var()
4079
4080            if self._match_text_seq("CHECK"):
4081                expression = self._parse_wrapped(self._parse_conjunction)
4082                enforced = self._match_text_seq("ENFORCED")
4083
4084                return self.expression(
4085                    exp.AddConstraint, this=this, expression=expression, enforced=enforced
4086                )
4087
4088        if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY):
4089            expression = self._parse_foreign_key()
4090        elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY):
4091            expression = self._parse_primary_key()
4092        else:
4093            expression = None
4094
4095        return self.expression(exp.AddConstraint, this=this, expression=expression)
4096
4097    def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]:
4098        index = self._index - 1
4099
4100        if self._match_set(self.ADD_CONSTRAINT_TOKENS):
4101            return self._parse_csv(self._parse_add_constraint)
4102
4103        self._retreat(index)
4104        return self._parse_csv(self._parse_add_column)
4105
4106    def _parse_alter_table_alter(self) -> exp.Expression:
4107        self._match(TokenType.COLUMN)
4108        column = self._parse_field(any_token=True)
4109
4110        if self._match_pair(TokenType.DROP, TokenType.DEFAULT):
4111            return self.expression(exp.AlterColumn, this=column, drop=True)
4112        if self._match_pair(TokenType.SET, TokenType.DEFAULT):
4113            return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction())
4114
4115        self._match_text_seq("SET", "DATA")
4116        return self.expression(
4117            exp.AlterColumn,
4118            this=column,
4119            dtype=self._match_text_seq("TYPE") and self._parse_types(),
4120            collate=self._match(TokenType.COLLATE) and self._parse_term(),
4121            using=self._match(TokenType.USING) and self._parse_conjunction(),
4122        )
4123
4124    def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]:
4125        index = self._index - 1
4126
4127        partition_exists = self._parse_exists()
4128        if self._match(TokenType.PARTITION, advance=False):
4129            return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists))
4130
4131        self._retreat(index)
4132        return self._parse_csv(self._parse_drop_column)
4133
4134    def _parse_alter_table_rename(self) -> exp.Expression:
4135        self._match_text_seq("TO")
4136        return self.expression(exp.RenameTable, this=self._parse_table(schema=True))
4137
4138    def _parse_alter(self) -> t.Optional[exp.Expression]:
4139        start = self._prev
4140
4141        if not self._match(TokenType.TABLE):
4142            return self._parse_as_command(start)
4143
4144        exists = self._parse_exists()
4145        this = self._parse_table(schema=True)
4146
4147        if self._next:
4148            self._advance()
4149        parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None
4150
4151        if parser:
4152            actions = ensure_list(parser(self))
4153
4154            if not self._curr:
4155                return self.expression(
4156                    exp.AlterTable,
4157                    this=this,
4158                    exists=exists,
4159                    actions=actions,
4160                )
4161        return self._parse_as_command(start)
4162
4163    def _parse_merge(self) -> exp.Expression:
4164        self._match(TokenType.INTO)
4165        target = self._parse_table()
4166
4167        self._match(TokenType.USING)
4168        using = self._parse_table()
4169
4170        self._match(TokenType.ON)
4171        on = self._parse_conjunction()
4172
4173        whens = []
4174        while self._match(TokenType.WHEN):
4175            matched = not self._match(TokenType.NOT)
4176            self._match_text_seq("MATCHED")
4177            source = (
4178                False
4179                if self._match_text_seq("BY", "TARGET")
4180                else self._match_text_seq("BY", "SOURCE")
4181            )
4182            condition = self._parse_conjunction() if self._match(TokenType.AND) else None
4183
4184            self._match(TokenType.THEN)
4185
4186            if self._match(TokenType.INSERT):
4187                _this = self._parse_star()
4188                if _this:
4189                    then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this)
4190                else:
4191                    then = self.expression(
4192                        exp.Insert,
4193                        this=self._parse_value(),
4194                        expression=self._match(TokenType.VALUES) and self._parse_value(),
4195                    )
4196            elif self._match(TokenType.UPDATE):
4197                expressions = self._parse_star()
4198                if expressions:
4199                    then = self.expression(exp.Update, expressions=expressions)
4200                else:
4201                    then = self.expression(
4202                        exp.Update,
4203                        expressions=self._match(TokenType.SET)
4204                        and self._parse_csv(self._parse_equality),
4205                    )
4206            elif self._match(TokenType.DELETE):
4207                then = self.expression(exp.Var, this=self._prev.text)
4208            else:
4209                then = None
4210
4211            whens.append(
4212                self.expression(
4213                    exp.When,
4214                    matched=matched,
4215                    source=source,
4216                    condition=condition,
4217                    then=then,
4218                )
4219            )
4220
4221        return self.expression(
4222            exp.Merge,
4223            this=target,
4224            using=using,
4225            on=on,
4226            expressions=whens,
4227        )
4228
4229    def _parse_show(self) -> t.Optional[exp.Expression]:
4230        parser = self._find_parser(self.SHOW_PARSERS, self._show_trie)  # type: ignore
4231        if parser:
4232            return parser(self)
4233        self._advance()
4234        return self.expression(exp.Show, this=self._prev.text.upper())
4235
4236    def _parse_set_item_assignment(
4237        self, kind: t.Optional[str] = None
4238    ) -> t.Optional[exp.Expression]:
4239        index = self._index
4240
4241        if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"):
4242            return self._parse_set_transaction(global_=kind == "GLOBAL")
4243
4244        left = self._parse_primary() or self._parse_id_var()
4245
4246        if not self._match_texts(("=", "TO")):
4247            self._retreat(index)
4248            return None
4249
4250        right = self._parse_statement() or self._parse_id_var()
4251        this = self.expression(
4252            exp.EQ,
4253            this=left,
4254            expression=right,
4255        )
4256
4257        return self.expression(
4258            exp.SetItem,
4259            this=this,
4260            kind=kind,
4261        )
4262
4263    def _parse_set_transaction(self, global_: bool = False) -> exp.Expression:
4264        self._match_text_seq("TRANSACTION")
4265        characteristics = self._parse_csv(
4266            lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS)
4267        )
4268        return self.expression(
4269            exp.SetItem,
4270            expressions=characteristics,
4271            kind="TRANSACTION",
4272            **{"global": global_},  # type: ignore
4273        )
4274
4275    def _parse_set_item(self) -> t.Optional[exp.Expression]:
4276        parser = self._find_parser(self.SET_PARSERS, self._set_trie)  # type: ignore
4277        return parser(self) if parser else self._parse_set_item_assignment(kind=None)
4278
4279    def _parse_set(self) -> exp.Expression:
4280        index = self._index
4281        set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item))
4282
4283        if self._curr:
4284            self._retreat(index)
4285            return self._parse_as_command(self._prev)
4286
4287        return set_
4288
4289    def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]:
4290        for option in options:
4291            if self._match_text_seq(*option.split(" ")):
4292                return exp.Var(this=option)
4293        return None
4294
4295    def _parse_as_command(self, start: Token) -> exp.Command:
4296        while self._curr:
4297            self._advance()
4298        text = self._find_sql(start, self._prev)
4299        size = len(start.text)
4300        return exp.Command(this=text[:size], expression=text[size:])
4301
4302    def _find_parser(
4303        self, parsers: t.Dict[str, t.Callable], trie: t.Dict
4304    ) -> t.Optional[t.Callable]:
4305        if not self._curr:
4306            return None
4307
4308        index = self._index
4309        this = []
4310        while True:
4311            # The current token might be multiple words
4312            curr = self._curr.text.upper()
4313            key = curr.split(" ")
4314            this.append(curr)
4315            self._advance()
4316            result, trie = in_trie(trie, key)
4317            if result == 0:
4318                break
4319            if result == 2:
4320                subparser = parsers[" ".join(this)]
4321                return subparser
4322        self._retreat(index)
4323        return None
4324
4325    def _match(self, token_type, advance=True):
4326        if not self._curr:
4327            return None
4328
4329        if self._curr.token_type == token_type:
4330            if advance:
4331                self._advance()
4332            return True
4333
4334        return None
4335
4336    def _match_set(self, types, advance=True):
4337        if not self._curr:
4338            return None
4339
4340        if self._curr.token_type in types:
4341            if advance:
4342                self._advance()
4343            return True
4344
4345        return None
4346
4347    def _match_pair(self, token_type_a, token_type_b, advance=True):
4348        if not self._curr or not self._next:
4349            return None
4350
4351        if self._curr.token_type == token_type_a and self._next.token_type == token_type_b:
4352            if advance:
4353                self._advance(2)
4354            return True
4355
4356        return None
4357
4358    def _match_l_paren(self, expression=None):
4359        if not self._match(TokenType.L_PAREN):
4360            self.raise_error("Expecting (")
4361        if expression and self._prev_comments:
4362            expression.comments = self._prev_comments
4363
4364    def _match_r_paren(self, expression=None):
4365        if not self._match(TokenType.R_PAREN):
4366            self.raise_error("Expecting )")
4367        if expression and self._prev_comments:
4368            expression.comments = self._prev_comments
4369
4370    def _match_texts(self, texts, advance=True):
4371        if self._curr and self._curr.text.upper() in texts:
4372            if advance:
4373                self._advance()
4374            return True
4375        return False
4376
4377    def _match_text_seq(self, *texts, advance=True):
4378        index = self._index
4379        for text in texts:
4380            if self._curr and self._curr.text.upper() == text:
4381                self._advance()
4382            else:
4383                self._retreat(index)
4384                return False
4385
4386        if not advance:
4387            self._retreat(index)
4388
4389        return True
4390
4391    def _replace_columns_with_dots(self, this):
4392        if isinstance(this, exp.Dot):
4393            exp.replace_children(this, self._replace_columns_with_dots)
4394        elif isinstance(this, exp.Column):
4395            exp.replace_children(this, self._replace_columns_with_dots)
4396            table = this.args.get("table")
4397            this = (
4398                self.expression(exp.Dot, this=table, expression=this.this)
4399                if table
4400                else self.expression(exp.Var, this=this.name)
4401            )
4402        elif isinstance(this, exp.Identifier):
4403            this = self.expression(exp.Var, this=this.name)
4404        return this
4405
4406    def _replace_lambda(self, node, lambda_variables):
4407        for column in node.find_all(exp.Column):
4408            if column.parts[0].name in lambda_variables:
4409                dot_or_id = column.to_dot() if column.table else column.this
4410                parent = column.parent
4411
4412                while isinstance(parent, exp.Dot):
4413                    if not isinstance(parent.parent, exp.Dot):
4414                        parent.replace(dot_or_id)
4415                        break
4416                    parent = parent.parent
4417                else:
4418                    if column is node:
4419                        node = dot_or_id
4420                    else:
4421                        column.replace(dot_or_id)
4422        return node

Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer and produces a parsed syntax tree.

Arguments:
  • error_level: the desired error level. Default: ErrorLevel.RAISE
  • error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
  • index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
  • alias_post_tablesample: If the table alias comes after tablesample. Default: False
  • max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
  • null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
Parser( error_level: Optional[sqlglot.errors.ErrorLevel] = None, error_message_context: int = 100, index_offset: int = 0, unnest_column_only: bool = False, alias_post_tablesample: bool = False, max_errors: int = 3, null_ordering: Optional[str] = None)
787    def __init__(
788        self,
789        error_level: t.Optional[ErrorLevel] = None,
790        error_message_context: int = 100,
791        index_offset: int = 0,
792        unnest_column_only: bool = False,
793        alias_post_tablesample: bool = False,
794        max_errors: int = 3,
795        null_ordering: t.Optional[str] = None,
796    ):
797        self.error_level = error_level or ErrorLevel.IMMEDIATE
798        self.error_message_context = error_message_context
799        self.index_offset = index_offset
800        self.unnest_column_only = unnest_column_only
801        self.alias_post_tablesample = alias_post_tablesample
802        self.max_errors = max_errors
803        self.null_ordering = null_ordering
804        self.reset()
def reset(self):
806    def reset(self):
807        self.sql = ""
808        self.errors = []
809        self._tokens = []
810        self._index = 0
811        self._curr = None
812        self._next = None
813        self._prev = None
814        self._prev_comments = None
def parse( self, raw_tokens: List[sqlglot.tokens.Token], sql: Optional[str] = None) -> List[Optional[sqlglot.expressions.Expression]]:
816    def parse(
817        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
818    ) -> t.List[t.Optional[exp.Expression]]:
819        """
820        Parses a list of tokens and returns a list of syntax trees, one tree
821        per parsed SQL statement.
822
823        Args:
824            raw_tokens: the list of tokens.
825            sql: the original SQL string, used to produce helpful debug messages.
826
827        Returns:
828            The list of syntax trees.
829        """
830        return self._parse(
831            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
832        )

Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.

Arguments:
  • raw_tokens: the list of tokens.
  • sql: the original SQL string, used to produce helpful debug messages.
Returns:

The list of syntax trees.

def parse_into( self, expression_types: Union[str, Type[sqlglot.expressions.Expression], Collection[Union[str, Type[sqlglot.expressions.Expression]]]], raw_tokens: List[sqlglot.tokens.Token], sql: Optional[str] = None) -> List[Optional[sqlglot.expressions.Expression]]:
834    def parse_into(
835        self,
836        expression_types: exp.IntoType,
837        raw_tokens: t.List[Token],
838        sql: t.Optional[str] = None,
839    ) -> t.List[t.Optional[exp.Expression]]:
840        """
841        Parses a list of tokens into a given Expression type. If a collection of Expression
842        types is given instead, this method will try to parse the token list into each one
843        of them, stopping at the first for which the parsing succeeds.
844
845        Args:
846            expression_types: the expression type(s) to try and parse the token list into.
847            raw_tokens: the list of tokens.
848            sql: the original SQL string, used to produce helpful debug messages.
849
850        Returns:
851            The target Expression.
852        """
853        errors = []
854        for expression_type in ensure_collection(expression_types):
855            parser = self.EXPRESSION_PARSERS.get(expression_type)
856            if not parser:
857                raise TypeError(f"No parser registered for {expression_type}")
858            try:
859                return self._parse(parser, raw_tokens, sql)
860            except ParseError as e:
861                e.errors[0]["into_expression"] = expression_type
862                errors.append(e)
863        raise ParseError(
864            f"Failed to parse into {expression_types}",
865            errors=merge_errors(errors),
866        ) from errors[-1]

Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.

Arguments:
  • expression_types: the expression type(s) to try and parse the token list into.
  • raw_tokens: the list of tokens.
  • sql: the original SQL string, used to produce helpful debug messages.
Returns:

The target Expression.

def check_errors(self) -> None:
902    def check_errors(self) -> None:
903        """
904        Logs or raises any found errors, depending on the chosen error level setting.
905        """
906        if self.error_level == ErrorLevel.WARN:
907            for error in self.errors:
908                logger.error(str(error))
909        elif self.error_level == ErrorLevel.RAISE and self.errors:
910            raise ParseError(
911                concat_messages(self.errors, self.max_errors),
912                errors=merge_errors(self.errors),
913            )

Logs or raises any found errors, depending on the chosen error level setting.

def raise_error(self, message: str, token: Optional[sqlglot.tokens.Token] = None) -> None:
915    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
916        """
917        Appends an error in the list of recorded errors or raises it, depending on the chosen
918        error level setting.
919        """
920        token = token or self._curr or self._prev or Token.string("")
921        start = token.start
922        end = token.end
923        start_context = self.sql[max(start - self.error_message_context, 0) : start]
924        highlight = self.sql[start:end]
925        end_context = self.sql[end : end + self.error_message_context]
926
927        error = ParseError.new(
928            f"{message}. Line {token.line}, Col: {token.col}.\n"
929            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
930            description=message,
931            line=token.line,
932            col=token.col,
933            start_context=start_context,
934            highlight=highlight,
935            end_context=end_context,
936        )
937
938        if self.error_level == ErrorLevel.IMMEDIATE:
939            raise error
940
941        self.errors.append(error)

Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.

def expression( self, exp_class: Type[~E], comments: Optional[List[str]] = None, **kwargs) -> ~E:
943    def expression(
944        self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs
945    ) -> E:
946        """
947        Creates a new, validated Expression.
948
949        Args:
950            exp_class: the expression class to instantiate.
951            comments: an optional list of comments to attach to the expression.
952            kwargs: the arguments to set for the expression along with their respective values.
953
954        Returns:
955            The target expression.
956        """
957        instance = exp_class(**kwargs)
958        if self._prev_comments:
959            instance.comments = self._prev_comments
960            self._prev_comments = None
961        if comments:
962            instance.comments = comments
963        self.validate_expression(instance)
964        return instance

Creates a new, validated Expression.

Arguments:
  • exp_class: the expression class to instantiate.
  • comments: an optional list of comments to attach to the expression.
  • kwargs: the arguments to set for the expression along with their respective values.
Returns:

The target expression.

def validate_expression( self, expression: sqlglot.expressions.Expression, args: Optional[List] = None) -> None:
966    def validate_expression(
967        self, expression: exp.Expression, args: t.Optional[t.List] = None
968    ) -> None:
969        """
970        Validates an already instantiated expression, making sure that all its mandatory arguments
971        are set.
972
973        Args:
974            expression: the expression to validate.
975            args: an optional list of items that was used to instantiate the expression, if it's a Func.
976        """
977        if self.error_level == ErrorLevel.IGNORE:
978            return
979
980        for error_message in expression.error_messages(args):
981            self.raise_error(error_message)

Validates an already instantiated expression, making sure that all its mandatory arguments are set.

Arguments:
  • expression: the expression to validate.
  • args: an optional list of items that was used to instantiate the expression, if it's a Func.