Edit on GitHub

sqlglot.parser

   1from __future__ import annotations
   2
   3import logging
   4import typing as t
   5from collections import defaultdict
   6
   7from sqlglot import exp
   8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors
   9from sqlglot.helper import (
  10    apply_index_offset,
  11    count_params,
  12    ensure_collection,
  13    ensure_list,
  14    seq_get,
  15)
  16from sqlglot.tokens import Token, Tokenizer, TokenType
  17from sqlglot.trie import in_trie, new_trie
  18
  19logger = logging.getLogger("sqlglot")
  20
  21
  22def parse_var_map(args):
  23    keys = []
  24    values = []
  25    for i in range(0, len(args), 2):
  26        keys.append(args[i])
  27        values.append(args[i + 1])
  28    return exp.VarMap(
  29        keys=exp.Array(expressions=keys),
  30        values=exp.Array(expressions=values),
  31    )
  32
  33
  34class _Parser(type):
  35    def __new__(cls, clsname, bases, attrs):
  36        klass = super().__new__(cls, clsname, bases, attrs)
  37        klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS)
  38        klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS)
  39        return klass
  40
  41
  42class Parser(metaclass=_Parser):
  43    """
  44    Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces
  45    a parsed syntax tree.
  46
  47    Args:
  48        error_level: the desired error level.
  49            Default: ErrorLevel.RAISE
  50        error_message_context: determines the amount of context to capture from a
  51            query string when displaying the error message (in number of characters).
  52            Default: 50.
  53        index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list.
  54            Default: 0
  55        alias_post_tablesample: If the table alias comes after tablesample.
  56            Default: False
  57        max_errors: Maximum number of error messages to include in a raised ParseError.
  58            This is only relevant if error_level is ErrorLevel.RAISE.
  59            Default: 3
  60        null_ordering: Indicates the default null ordering method to use if not explicitly set.
  61            Options are "nulls_are_small", "nulls_are_large", "nulls_are_last".
  62            Default: "nulls_are_small"
  63    """
  64
  65    FUNCTIONS: t.Dict[str, t.Callable] = {
  66        **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()},
  67        "DATE_TO_DATE_STR": lambda args: exp.Cast(
  68            this=seq_get(args, 0),
  69            to=exp.DataType(this=exp.DataType.Type.TEXT),
  70        ),
  71        "TIME_TO_TIME_STR": lambda args: exp.Cast(
  72            this=seq_get(args, 0),
  73            to=exp.DataType(this=exp.DataType.Type.TEXT),
  74        ),
  75        "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
  76            this=exp.Cast(
  77                this=seq_get(args, 0),
  78                to=exp.DataType(this=exp.DataType.Type.TEXT),
  79            ),
  80            start=exp.Literal.number(1),
  81            length=exp.Literal.number(10),
  82        ),
  83        "VAR_MAP": parse_var_map,
  84        "IFNULL": exp.Coalesce.from_arg_list,
  85    }
  86
  87    NO_PAREN_FUNCTIONS = {
  88        TokenType.CURRENT_DATE: exp.CurrentDate,
  89        TokenType.CURRENT_DATETIME: exp.CurrentDate,
  90        TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
  91    }
  92
  93    NESTED_TYPE_TOKENS = {
  94        TokenType.ARRAY,
  95        TokenType.MAP,
  96        TokenType.STRUCT,
  97        TokenType.NULLABLE,
  98    }
  99
 100    TYPE_TOKENS = {
 101        TokenType.BOOLEAN,
 102        TokenType.TINYINT,
 103        TokenType.SMALLINT,
 104        TokenType.INT,
 105        TokenType.BIGINT,
 106        TokenType.FLOAT,
 107        TokenType.DOUBLE,
 108        TokenType.CHAR,
 109        TokenType.NCHAR,
 110        TokenType.VARCHAR,
 111        TokenType.NVARCHAR,
 112        TokenType.TEXT,
 113        TokenType.MEDIUMTEXT,
 114        TokenType.LONGTEXT,
 115        TokenType.MEDIUMBLOB,
 116        TokenType.LONGBLOB,
 117        TokenType.BINARY,
 118        TokenType.VARBINARY,
 119        TokenType.JSON,
 120        TokenType.JSONB,
 121        TokenType.INTERVAL,
 122        TokenType.TIME,
 123        TokenType.TIMESTAMP,
 124        TokenType.TIMESTAMPTZ,
 125        TokenType.TIMESTAMPLTZ,
 126        TokenType.DATETIME,
 127        TokenType.DATE,
 128        TokenType.DECIMAL,
 129        TokenType.UUID,
 130        TokenType.GEOGRAPHY,
 131        TokenType.GEOMETRY,
 132        TokenType.HLLSKETCH,
 133        TokenType.HSTORE,
 134        TokenType.PSEUDO_TYPE,
 135        TokenType.SUPER,
 136        TokenType.SERIAL,
 137        TokenType.SMALLSERIAL,
 138        TokenType.BIGSERIAL,
 139        TokenType.XML,
 140        TokenType.UNIQUEIDENTIFIER,
 141        TokenType.MONEY,
 142        TokenType.SMALLMONEY,
 143        TokenType.ROWVERSION,
 144        TokenType.IMAGE,
 145        TokenType.VARIANT,
 146        TokenType.OBJECT,
 147        TokenType.INET,
 148        *NESTED_TYPE_TOKENS,
 149    }
 150
 151    SUBQUERY_PREDICATES = {
 152        TokenType.ANY: exp.Any,
 153        TokenType.ALL: exp.All,
 154        TokenType.EXISTS: exp.Exists,
 155        TokenType.SOME: exp.Any,
 156    }
 157
 158    RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT}
 159
 160    ID_VAR_TOKENS = {
 161        TokenType.VAR,
 162        TokenType.ANTI,
 163        TokenType.APPLY,
 164        TokenType.AUTO_INCREMENT,
 165        TokenType.BEGIN,
 166        TokenType.BOTH,
 167        TokenType.BUCKET,
 168        TokenType.CACHE,
 169        TokenType.CASCADE,
 170        TokenType.COLLATE,
 171        TokenType.COLUMN,
 172        TokenType.COMMAND,
 173        TokenType.COMMIT,
 174        TokenType.COMPOUND,
 175        TokenType.CONSTRAINT,
 176        TokenType.CURRENT_TIME,
 177        TokenType.DEFAULT,
 178        TokenType.DELETE,
 179        TokenType.DESCRIBE,
 180        TokenType.DIV,
 181        TokenType.END,
 182        TokenType.EXECUTE,
 183        TokenType.ESCAPE,
 184        TokenType.FALSE,
 185        TokenType.FIRST,
 186        TokenType.FILTER,
 187        TokenType.FOLLOWING,
 188        TokenType.FORMAT,
 189        TokenType.FUNCTION,
 190        TokenType.IF,
 191        TokenType.INDEX,
 192        TokenType.ISNULL,
 193        TokenType.INTERVAL,
 194        TokenType.LAZY,
 195        TokenType.LEADING,
 196        TokenType.LEFT,
 197        TokenType.LOCAL,
 198        TokenType.MATERIALIZED,
 199        TokenType.MERGE,
 200        TokenType.NATURAL,
 201        TokenType.NEXT,
 202        TokenType.OFFSET,
 203        TokenType.ONLY,
 204        TokenType.OPTIONS,
 205        TokenType.ORDINALITY,
 206        TokenType.PERCENT,
 207        TokenType.PIVOT,
 208        TokenType.PRECEDING,
 209        TokenType.RANGE,
 210        TokenType.REFERENCES,
 211        TokenType.RIGHT,
 212        TokenType.ROW,
 213        TokenType.ROWS,
 214        TokenType.SCHEMA,
 215        TokenType.SEED,
 216        TokenType.SEMI,
 217        TokenType.SET,
 218        TokenType.SHOW,
 219        TokenType.SORTKEY,
 220        TokenType.TABLE,
 221        TokenType.TEMPORARY,
 222        TokenType.TOP,
 223        TokenType.TRAILING,
 224        TokenType.TRUE,
 225        TokenType.UNBOUNDED,
 226        TokenType.UNIQUE,
 227        TokenType.UNLOGGED,
 228        TokenType.UNPIVOT,
 229        TokenType.PROCEDURE,
 230        TokenType.VIEW,
 231        TokenType.VOLATILE,
 232        TokenType.WINDOW,
 233        *SUBQUERY_PREDICATES,
 234        *TYPE_TOKENS,
 235        *NO_PAREN_FUNCTIONS,
 236    }
 237
 238    TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {
 239        TokenType.APPLY,
 240        TokenType.LEFT,
 241        TokenType.NATURAL,
 242        TokenType.OFFSET,
 243        TokenType.RIGHT,
 244        TokenType.WINDOW,
 245    }
 246
 247    UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
 248
 249    TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH}
 250
 251    FUNC_TOKENS = {
 252        TokenType.COMMAND,
 253        TokenType.CURRENT_DATE,
 254        TokenType.CURRENT_DATETIME,
 255        TokenType.CURRENT_TIMESTAMP,
 256        TokenType.CURRENT_TIME,
 257        TokenType.FILTER,
 258        TokenType.FIRST,
 259        TokenType.FORMAT,
 260        TokenType.IDENTIFIER,
 261        TokenType.INDEX,
 262        TokenType.ISNULL,
 263        TokenType.ILIKE,
 264        TokenType.LIKE,
 265        TokenType.MERGE,
 266        TokenType.OFFSET,
 267        TokenType.PRIMARY_KEY,
 268        TokenType.REPLACE,
 269        TokenType.ROW,
 270        TokenType.UNNEST,
 271        TokenType.VAR,
 272        TokenType.LEFT,
 273        TokenType.RIGHT,
 274        TokenType.DATE,
 275        TokenType.DATETIME,
 276        TokenType.TABLE,
 277        TokenType.TIMESTAMP,
 278        TokenType.TIMESTAMPTZ,
 279        TokenType.WINDOW,
 280        *TYPE_TOKENS,
 281        *SUBQUERY_PREDICATES,
 282    }
 283
 284    CONJUNCTION = {
 285        TokenType.AND: exp.And,
 286        TokenType.OR: exp.Or,
 287    }
 288
 289    EQUALITY = {
 290        TokenType.EQ: exp.EQ,
 291        TokenType.NEQ: exp.NEQ,
 292        TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
 293    }
 294
 295    COMPARISON = {
 296        TokenType.GT: exp.GT,
 297        TokenType.GTE: exp.GTE,
 298        TokenType.LT: exp.LT,
 299        TokenType.LTE: exp.LTE,
 300    }
 301
 302    BITWISE = {
 303        TokenType.AMP: exp.BitwiseAnd,
 304        TokenType.CARET: exp.BitwiseXor,
 305        TokenType.PIPE: exp.BitwiseOr,
 306        TokenType.DPIPE: exp.DPipe,
 307    }
 308
 309    TERM = {
 310        TokenType.DASH: exp.Sub,
 311        TokenType.PLUS: exp.Add,
 312        TokenType.MOD: exp.Mod,
 313        TokenType.COLLATE: exp.Collate,
 314    }
 315
 316    FACTOR = {
 317        TokenType.DIV: exp.IntDiv,
 318        TokenType.LR_ARROW: exp.Distance,
 319        TokenType.SLASH: exp.Div,
 320        TokenType.STAR: exp.Mul,
 321    }
 322
 323    TIMESTAMPS = {
 324        TokenType.TIME,
 325        TokenType.TIMESTAMP,
 326        TokenType.TIMESTAMPTZ,
 327        TokenType.TIMESTAMPLTZ,
 328    }
 329
 330    SET_OPERATIONS = {
 331        TokenType.UNION,
 332        TokenType.INTERSECT,
 333        TokenType.EXCEPT,
 334    }
 335
 336    JOIN_SIDES = {
 337        TokenType.LEFT,
 338        TokenType.RIGHT,
 339        TokenType.FULL,
 340    }
 341
 342    JOIN_KINDS = {
 343        TokenType.INNER,
 344        TokenType.OUTER,
 345        TokenType.CROSS,
 346        TokenType.SEMI,
 347        TokenType.ANTI,
 348    }
 349
 350    LAMBDAS = {
 351        TokenType.ARROW: lambda self, expressions: self.expression(
 352            exp.Lambda,
 353            this=self._parse_conjunction().transform(
 354                self._replace_lambda, {node.name for node in expressions}
 355            ),
 356            expressions=expressions,
 357        ),
 358        TokenType.FARROW: lambda self, expressions: self.expression(
 359            exp.Kwarg,
 360            this=exp.Var(this=expressions[0].name),
 361            expression=self._parse_conjunction(),
 362        ),
 363    }
 364
 365    COLUMN_OPERATORS = {
 366        TokenType.DOT: None,
 367        TokenType.DCOLON: lambda self, this, to: self.expression(
 368            exp.Cast,
 369            this=this,
 370            to=to,
 371        ),
 372        TokenType.ARROW: lambda self, this, path: self.expression(
 373            exp.JSONExtract,
 374            this=this,
 375            expression=path,
 376        ),
 377        TokenType.DARROW: lambda self, this, path: self.expression(
 378            exp.JSONExtractScalar,
 379            this=this,
 380            expression=path,
 381        ),
 382        TokenType.HASH_ARROW: lambda self, this, path: self.expression(
 383            exp.JSONBExtract,
 384            this=this,
 385            expression=path,
 386        ),
 387        TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
 388            exp.JSONBExtractScalar,
 389            this=this,
 390            expression=path,
 391        ),
 392        TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
 393            exp.JSONBContains,
 394            this=this,
 395            expression=key,
 396        ),
 397    }
 398
 399    EXPRESSION_PARSERS = {
 400        exp.Column: lambda self: self._parse_column(),
 401        exp.DataType: lambda self: self._parse_types(),
 402        exp.From: lambda self: self._parse_from(),
 403        exp.Group: lambda self: self._parse_group(),
 404        exp.Identifier: lambda self: self._parse_id_var(),
 405        exp.Lateral: lambda self: self._parse_lateral(),
 406        exp.Join: lambda self: self._parse_join(),
 407        exp.Order: lambda self: self._parse_order(),
 408        exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
 409        exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
 410        exp.Lambda: lambda self: self._parse_lambda(),
 411        exp.Limit: lambda self: self._parse_limit(),
 412        exp.Offset: lambda self: self._parse_offset(),
 413        exp.TableAlias: lambda self: self._parse_table_alias(),
 414        exp.Table: lambda self: self._parse_table(),
 415        exp.Condition: lambda self: self._parse_conjunction(),
 416        exp.Expression: lambda self: self._parse_statement(),
 417        exp.Properties: lambda self: self._parse_properties(),
 418        exp.Where: lambda self: self._parse_where(),
 419        exp.Ordered: lambda self: self._parse_ordered(),
 420        exp.Having: lambda self: self._parse_having(),
 421        exp.With: lambda self: self._parse_with(),
 422        exp.Window: lambda self: self._parse_named_window(),
 423        "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(),
 424    }
 425
 426    STATEMENT_PARSERS = {
 427        TokenType.ALTER: lambda self: self._parse_alter(),
 428        TokenType.BEGIN: lambda self: self._parse_transaction(),
 429        TokenType.CACHE: lambda self: self._parse_cache(),
 430        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
 431        TokenType.CREATE: lambda self: self._parse_create(),
 432        TokenType.DELETE: lambda self: self._parse_delete(),
 433        TokenType.DESC: lambda self: self._parse_describe(),
 434        TokenType.DESCRIBE: lambda self: self._parse_describe(),
 435        TokenType.DROP: lambda self: self._parse_drop(),
 436        TokenType.END: lambda self: self._parse_commit_or_rollback(),
 437        TokenType.INSERT: lambda self: self._parse_insert(),
 438        TokenType.LOAD_DATA: lambda self: self._parse_load_data(),
 439        TokenType.MERGE: lambda self: self._parse_merge(),
 440        TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
 441        TokenType.UNCACHE: lambda self: self._parse_uncache(),
 442        TokenType.UPDATE: lambda self: self._parse_update(),
 443        TokenType.USE: lambda self: self.expression(
 444            exp.Use,
 445            kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"))
 446            and exp.Var(this=self._prev.text),
 447            this=self._parse_table(schema=False),
 448        ),
 449    }
 450
 451    UNARY_PARSERS = {
 452        TokenType.PLUS: lambda self: self._parse_unary(),  # Unary + is handled as a no-op
 453        TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()),
 454        TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()),
 455        TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()),
 456    }
 457
 458    PRIMARY_PARSERS = {
 459        TokenType.STRING: lambda self, token: self.expression(
 460            exp.Literal, this=token.text, is_string=True
 461        ),
 462        TokenType.NUMBER: lambda self, token: self.expression(
 463            exp.Literal, this=token.text, is_string=False
 464        ),
 465        TokenType.STAR: lambda self, _: self.expression(
 466            exp.Star,
 467            **{"except": self._parse_except(), "replace": self._parse_replace()},
 468        ),
 469        TokenType.NULL: lambda self, _: self.expression(exp.Null),
 470        TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True),
 471        TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False),
 472        TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text),
 473        TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text),
 474        TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text),
 475        TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
 476        TokenType.NATIONAL: lambda self, token: self._parse_national(token),
 477        TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
 478    }
 479
 480    PLACEHOLDER_PARSERS = {
 481        TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
 482        TokenType.PARAMETER: lambda self: self._parse_parameter(),
 483        TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text)
 484        if self._match_set((TokenType.NUMBER, TokenType.VAR))
 485        else None,
 486    }
 487
 488    RANGE_PARSERS = {
 489        TokenType.BETWEEN: lambda self, this: self._parse_between(this),
 490        TokenType.GLOB: lambda self, this: self._parse_escape(
 491            self.expression(exp.Glob, this=this, expression=self._parse_bitwise())
 492        ),
 493        TokenType.IN: lambda self, this: self._parse_in(this),
 494        TokenType.IS: lambda self, this: self._parse_is(this),
 495        TokenType.LIKE: lambda self, this: self._parse_escape(
 496            self.expression(exp.Like, this=this, expression=self._parse_bitwise())
 497        ),
 498        TokenType.ILIKE: lambda self, this: self._parse_escape(
 499            self.expression(exp.ILike, this=this, expression=self._parse_bitwise())
 500        ),
 501        TokenType.IRLIKE: lambda self, this: self.expression(
 502            exp.RegexpILike, this=this, expression=self._parse_bitwise()
 503        ),
 504        TokenType.RLIKE: lambda self, this: self.expression(
 505            exp.RegexpLike, this=this, expression=self._parse_bitwise()
 506        ),
 507        TokenType.SIMILAR_TO: lambda self, this: self.expression(
 508            exp.SimilarTo, this=this, expression=self._parse_bitwise()
 509        ),
 510    }
 511
 512    PROPERTY_PARSERS = {
 513        "AFTER": lambda self: self._parse_afterjournal(
 514            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 515        ),
 516        "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
 517        "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
 518        "BEFORE": lambda self: self._parse_journal(
 519            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 520        ),
 521        "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
 522        "CHARACTER SET": lambda self: self._parse_character_set(),
 523        "CHECKSUM": lambda self: self._parse_checksum(),
 524        "CLUSTER BY": lambda self: self.expression(
 525            exp.Cluster, expressions=self._parse_csv(self._parse_ordered)
 526        ),
 527        "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty),
 528        "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
 529        "DATABLOCKSIZE": lambda self: self._parse_datablocksize(
 530            default=self._prev.text.upper() == "DEFAULT"
 531        ),
 532        "DEFINER": lambda self: self._parse_definer(),
 533        "DETERMINISTIC": lambda self: self.expression(
 534            exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE")
 535        ),
 536        "DISTKEY": lambda self: self._parse_distkey(),
 537        "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
 538        "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
 539        "EXTERNAL": lambda self: self.expression(exp.ExternalProperty),
 540        "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"),
 541        "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 542        "FREESPACE": lambda self: self._parse_freespace(),
 543        "GLOBAL": lambda self: self._parse_temporary(global_=True),
 544        "IMMUTABLE": lambda self: self.expression(
 545            exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE")
 546        ),
 547        "JOURNAL": lambda self: self._parse_journal(
 548            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 549        ),
 550        "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
 551        "LIKE": lambda self: self._parse_create_like(),
 552        "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True),
 553        "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
 554        "LOCK": lambda self: self._parse_locking(),
 555        "LOCKING": lambda self: self._parse_locking(),
 556        "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"),
 557        "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty),
 558        "MAX": lambda self: self._parse_datablocksize(),
 559        "MAXIMUM": lambda self: self._parse_datablocksize(),
 560        "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio(
 561            no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT"
 562        ),
 563        "MIN": lambda self: self._parse_datablocksize(),
 564        "MINIMUM": lambda self: self._parse_datablocksize(),
 565        "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True),
 566        "NO": lambda self: self._parse_noprimaryindex(),
 567        "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False),
 568        "ON": lambda self: self._parse_oncommit(),
 569        "PARTITION BY": lambda self: self._parse_partitioned_by(),
 570        "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
 571        "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
 572        "RETURNS": lambda self: self._parse_returns(),
 573        "ROW": lambda self: self._parse_row(),
 574        "SET": lambda self: self.expression(exp.SetProperty, multi=False),
 575        "SORTKEY": lambda self: self._parse_sortkey(),
 576        "STABLE": lambda self: self.expression(
 577            exp.VolatilityProperty, this=exp.Literal.string("STABLE")
 578        ),
 579        "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 580        "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty),
 581        "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property),
 582        "TEMPORARY": lambda self: self._parse_temporary(global_=False),
 583        "TRANSIENT": lambda self: self.expression(exp.TransientProperty),
 584        "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty),
 585        "VOLATILE": lambda self: self.expression(
 586            exp.VolatilityProperty, this=exp.Literal.string("VOLATILE")
 587        ),
 588        "WITH": lambda self: self._parse_with_property(),
 589    }
 590
 591    CONSTRAINT_PARSERS = {
 592        "AUTOINCREMENT": lambda self: self._parse_auto_increment(),
 593        "AUTO_INCREMENT": lambda self: self._parse_auto_increment(),
 594        "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False),
 595        "CHARACTER SET": lambda self: self.expression(
 596            exp.CharacterSetColumnConstraint, this=self._parse_var_or_string()
 597        ),
 598        "CHECK": lambda self: self.expression(
 599            exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction)
 600        ),
 601        "COLLATE": lambda self: self.expression(
 602            exp.CollateColumnConstraint, this=self._parse_var()
 603        ),
 604        "COMMENT": lambda self: self.expression(
 605            exp.CommentColumnConstraint, this=self._parse_string()
 606        ),
 607        "COMPRESS": lambda self: self._parse_compress(),
 608        "DEFAULT": lambda self: self.expression(
 609            exp.DefaultColumnConstraint, this=self._parse_bitwise()
 610        ),
 611        "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()),
 612        "FOREIGN KEY": lambda self: self._parse_foreign_key(),
 613        "FORMAT": lambda self: self.expression(
 614            exp.DateFormatColumnConstraint, this=self._parse_var_or_string()
 615        ),
 616        "GENERATED": lambda self: self._parse_generated_as_identity(),
 617        "IDENTITY": lambda self: self._parse_auto_increment(),
 618        "INLINE": lambda self: self._parse_inline(),
 619        "LIKE": lambda self: self._parse_create_like(),
 620        "NOT": lambda self: self._parse_not_constraint(),
 621        "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True),
 622        "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()),
 623        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 624        "TITLE": lambda self: self.expression(
 625            exp.TitleColumnConstraint, this=self._parse_var_or_string()
 626        ),
 627        "UNIQUE": lambda self: self._parse_unique(),
 628        "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint),
 629    }
 630
 631    SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"}
 632
 633    NO_PAREN_FUNCTION_PARSERS = {
 634        TokenType.CASE: lambda self: self._parse_case(),
 635        TokenType.IF: lambda self: self._parse_if(),
 636        TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()),
 637    }
 638
 639    FUNCTION_PARSERS: t.Dict[str, t.Callable] = {
 640        "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
 641        "TRY_CONVERT": lambda self: self._parse_convert(False),
 642        "EXTRACT": lambda self: self._parse_extract(),
 643        "POSITION": lambda self: self._parse_position(),
 644        "SUBSTRING": lambda self: self._parse_substring(),
 645        "TRIM": lambda self: self._parse_trim(),
 646        "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
 647        "TRY_CAST": lambda self: self._parse_cast(False),
 648        "STRING_AGG": lambda self: self._parse_string_agg(),
 649    }
 650
 651    QUERY_MODIFIER_PARSERS = {
 652        "match": lambda self: self._parse_match_recognize(),
 653        "where": lambda self: self._parse_where(),
 654        "group": lambda self: self._parse_group(),
 655        "having": lambda self: self._parse_having(),
 656        "qualify": lambda self: self._parse_qualify(),
 657        "windows": lambda self: self._parse_window_clause(),
 658        "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute),
 659        "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
 660        "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
 661        "order": lambda self: self._parse_order(),
 662        "limit": lambda self: self._parse_limit(),
 663        "offset": lambda self: self._parse_offset(),
 664        "lock": lambda self: self._parse_lock(),
 665    }
 666
 667    SHOW_PARSERS: t.Dict[str, t.Callable] = {}
 668    SET_PARSERS: t.Dict[str, t.Callable] = {}
 669
 670    MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
 671
 672    CREATABLES = {
 673        TokenType.COLUMN,
 674        TokenType.FUNCTION,
 675        TokenType.INDEX,
 676        TokenType.PROCEDURE,
 677        TokenType.SCHEMA,
 678        TokenType.TABLE,
 679        TokenType.VIEW,
 680    }
 681
 682    TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
 683
 684    INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
 685
 686    WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
 687
 688    ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY}
 689
 690    STRICT_CAST = True
 691
 692    __slots__ = (
 693        "error_level",
 694        "error_message_context",
 695        "sql",
 696        "errors",
 697        "index_offset",
 698        "unnest_column_only",
 699        "alias_post_tablesample",
 700        "max_errors",
 701        "null_ordering",
 702        "_tokens",
 703        "_index",
 704        "_curr",
 705        "_next",
 706        "_prev",
 707        "_prev_comments",
 708        "_show_trie",
 709        "_set_trie",
 710    )
 711
 712    def __init__(
 713        self,
 714        error_level: t.Optional[ErrorLevel] = None,
 715        error_message_context: int = 100,
 716        index_offset: int = 0,
 717        unnest_column_only: bool = False,
 718        alias_post_tablesample: bool = False,
 719        max_errors: int = 3,
 720        null_ordering: t.Optional[str] = None,
 721    ):
 722        self.error_level = error_level or ErrorLevel.IMMEDIATE
 723        self.error_message_context = error_message_context
 724        self.index_offset = index_offset
 725        self.unnest_column_only = unnest_column_only
 726        self.alias_post_tablesample = alias_post_tablesample
 727        self.max_errors = max_errors
 728        self.null_ordering = null_ordering
 729        self.reset()
 730
 731    def reset(self):
 732        self.sql = ""
 733        self.errors = []
 734        self._tokens = []
 735        self._index = 0
 736        self._curr = None
 737        self._next = None
 738        self._prev = None
 739        self._prev_comments = None
 740
 741    def parse(
 742        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
 743    ) -> t.List[t.Optional[exp.Expression]]:
 744        """
 745        Parses a list of tokens and returns a list of syntax trees, one tree
 746        per parsed SQL statement.
 747
 748        Args:
 749            raw_tokens: the list of tokens.
 750            sql: the original SQL string, used to produce helpful debug messages.
 751
 752        Returns:
 753            The list of syntax trees.
 754        """
 755        return self._parse(
 756            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
 757        )
 758
 759    def parse_into(
 760        self,
 761        expression_types: exp.IntoType,
 762        raw_tokens: t.List[Token],
 763        sql: t.Optional[str] = None,
 764    ) -> t.List[t.Optional[exp.Expression]]:
 765        """
 766        Parses a list of tokens into a given Expression type. If a collection of Expression
 767        types is given instead, this method will try to parse the token list into each one
 768        of them, stopping at the first for which the parsing succeeds.
 769
 770        Args:
 771            expression_types: the expression type(s) to try and parse the token list into.
 772            raw_tokens: the list of tokens.
 773            sql: the original SQL string, used to produce helpful debug messages.
 774
 775        Returns:
 776            The target Expression.
 777        """
 778        errors = []
 779        for expression_type in ensure_collection(expression_types):
 780            parser = self.EXPRESSION_PARSERS.get(expression_type)
 781            if not parser:
 782                raise TypeError(f"No parser registered for {expression_type}")
 783            try:
 784                return self._parse(parser, raw_tokens, sql)
 785            except ParseError as e:
 786                e.errors[0]["into_expression"] = expression_type
 787                errors.append(e)
 788        raise ParseError(
 789            f"Failed to parse into {expression_types}",
 790            errors=merge_errors(errors),
 791        ) from errors[-1]
 792
 793    def _parse(
 794        self,
 795        parse_method: t.Callable[[Parser], t.Optional[exp.Expression]],
 796        raw_tokens: t.List[Token],
 797        sql: t.Optional[str] = None,
 798    ) -> t.List[t.Optional[exp.Expression]]:
 799        self.reset()
 800        self.sql = sql or ""
 801        total = len(raw_tokens)
 802        chunks: t.List[t.List[Token]] = [[]]
 803
 804        for i, token in enumerate(raw_tokens):
 805            if token.token_type == TokenType.SEMICOLON:
 806                if i < total - 1:
 807                    chunks.append([])
 808            else:
 809                chunks[-1].append(token)
 810
 811        expressions = []
 812
 813        for tokens in chunks:
 814            self._index = -1
 815            self._tokens = tokens
 816            self._advance()
 817
 818            expressions.append(parse_method(self))
 819
 820            if self._index < len(self._tokens):
 821                self.raise_error("Invalid expression / Unexpected token")
 822
 823            self.check_errors()
 824
 825        return expressions
 826
 827    def check_errors(self) -> None:
 828        """
 829        Logs or raises any found errors, depending on the chosen error level setting.
 830        """
 831        if self.error_level == ErrorLevel.WARN:
 832            for error in self.errors:
 833                logger.error(str(error))
 834        elif self.error_level == ErrorLevel.RAISE and self.errors:
 835            raise ParseError(
 836                concat_messages(self.errors, self.max_errors),
 837                errors=merge_errors(self.errors),
 838            )
 839
 840    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
 841        """
 842        Appends an error in the list of recorded errors or raises it, depending on the chosen
 843        error level setting.
 844        """
 845        token = token or self._curr or self._prev or Token.string("")
 846        start = self._find_token(token)
 847        end = start + len(token.text)
 848        start_context = self.sql[max(start - self.error_message_context, 0) : start]
 849        highlight = self.sql[start:end]
 850        end_context = self.sql[end : end + self.error_message_context]
 851
 852        error = ParseError.new(
 853            f"{message}. Line {token.line}, Col: {token.col}.\n"
 854            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
 855            description=message,
 856            line=token.line,
 857            col=token.col,
 858            start_context=start_context,
 859            highlight=highlight,
 860            end_context=end_context,
 861        )
 862
 863        if self.error_level == ErrorLevel.IMMEDIATE:
 864            raise error
 865
 866        self.errors.append(error)
 867
 868    def expression(
 869        self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs
 870    ) -> exp.Expression:
 871        """
 872        Creates a new, validated Expression.
 873
 874        Args:
 875            exp_class: the expression class to instantiate.
 876            comments: an optional list of comments to attach to the expression.
 877            kwargs: the arguments to set for the expression along with their respective values.
 878
 879        Returns:
 880            The target expression.
 881        """
 882        instance = exp_class(**kwargs)
 883        if self._prev_comments:
 884            instance.comments = self._prev_comments
 885            self._prev_comments = None
 886        if comments:
 887            instance.comments = comments
 888        self.validate_expression(instance)
 889        return instance
 890
 891    def validate_expression(
 892        self, expression: exp.Expression, args: t.Optional[t.List] = None
 893    ) -> None:
 894        """
 895        Validates an already instantiated expression, making sure that all its mandatory arguments
 896        are set.
 897
 898        Args:
 899            expression: the expression to validate.
 900            args: an optional list of items that was used to instantiate the expression, if it's a Func.
 901        """
 902        if self.error_level == ErrorLevel.IGNORE:
 903            return
 904
 905        for error_message in expression.error_messages(args):
 906            self.raise_error(error_message)
 907
 908    def _find_sql(self, start: Token, end: Token) -> str:
 909        return self.sql[self._find_token(start) : self._find_token(end) + len(end.text)]
 910
 911    def _find_token(self, token: Token) -> int:
 912        line = 1
 913        col = 1
 914        index = 0
 915
 916        while line < token.line or col < token.col:
 917            if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK:
 918                line += 1
 919                col = 1
 920            else:
 921                col += 1
 922            index += 1
 923
 924        return index
 925
 926    def _advance(self, times: int = 1) -> None:
 927        self._index += times
 928        self._curr = seq_get(self._tokens, self._index)
 929        self._next = seq_get(self._tokens, self._index + 1)
 930        if self._index > 0:
 931            self._prev = self._tokens[self._index - 1]
 932            self._prev_comments = self._prev.comments
 933        else:
 934            self._prev = None
 935            self._prev_comments = None
 936
 937    def _retreat(self, index: int) -> None:
 938        self._advance(index - self._index)
 939
 940    def _parse_command(self) -> exp.Expression:
 941        return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string())
 942
 943    def _parse_statement(self) -> t.Optional[exp.Expression]:
 944        if self._curr is None:
 945            return None
 946
 947        if self._match_set(self.STATEMENT_PARSERS):
 948            return self.STATEMENT_PARSERS[self._prev.token_type](self)
 949
 950        if self._match_set(Tokenizer.COMMANDS):
 951            return self._parse_command()
 952
 953        expression = self._parse_expression()
 954        expression = self._parse_set_operations(expression) if expression else self._parse_select()
 955
 956        self._parse_query_modifiers(expression)
 957        return expression
 958
 959    def _parse_drop(self, default_kind: t.Optional[str] = None) -> t.Optional[exp.Expression]:
 960        start = self._prev
 961        temporary = self._match(TokenType.TEMPORARY)
 962        materialized = self._match(TokenType.MATERIALIZED)
 963        kind = self._match_set(self.CREATABLES) and self._prev.text
 964        if not kind:
 965            if default_kind:
 966                kind = default_kind
 967            else:
 968                return self._parse_as_command(start)
 969
 970        return self.expression(
 971            exp.Drop,
 972            exists=self._parse_exists(),
 973            this=self._parse_table(schema=True),
 974            kind=kind,
 975            temporary=temporary,
 976            materialized=materialized,
 977            cascade=self._match(TokenType.CASCADE),
 978        )
 979
 980    def _parse_exists(self, not_: bool = False) -> t.Optional[bool]:
 981        return (
 982            self._match(TokenType.IF)
 983            and (not not_ or self._match(TokenType.NOT))
 984            and self._match(TokenType.EXISTS)
 985        )
 986
 987    def _parse_create(self) -> t.Optional[exp.Expression]:
 988        start = self._prev
 989        replace = self._prev.text.upper() == "REPLACE" or self._match_pair(
 990            TokenType.OR, TokenType.REPLACE
 991        )
 992        unique = self._match(TokenType.UNIQUE)
 993
 994        if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
 995            self._match(TokenType.TABLE)
 996
 997        properties = None
 998        create_token = self._match_set(self.CREATABLES) and self._prev
 999
1000        if not create_token:
1001            properties = self._parse_properties()  # exp.Properties.Location.POST_CREATE
1002            create_token = self._match_set(self.CREATABLES) and self._prev
1003
1004            if not properties or not create_token:
1005                return self._parse_as_command(start)
1006
1007        exists = self._parse_exists(not_=True)
1008        this = None
1009        expression = None
1010        indexes = None
1011        no_schema_binding = None
1012        begin = None
1013
1014        if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1015            this = self._parse_user_defined_function(kind=create_token.token_type)
1016            temp_properties = self._parse_properties()
1017            if properties and temp_properties:
1018                properties.expressions.extend(temp_properties.expressions)
1019            elif temp_properties:
1020                properties = temp_properties
1021
1022            self._match(TokenType.ALIAS)
1023            begin = self._match(TokenType.BEGIN)
1024            return_ = self._match_text_seq("RETURN")
1025            expression = self._parse_statement()
1026
1027            if return_:
1028                expression = self.expression(exp.Return, this=expression)
1029        elif create_token.token_type == TokenType.INDEX:
1030            this = self._parse_index()
1031        elif create_token.token_type in (
1032            TokenType.TABLE,
1033            TokenType.VIEW,
1034            TokenType.SCHEMA,
1035        ):
1036            table_parts = self._parse_table_parts(schema=True)
1037
1038            # exp.Properties.Location.POST_NAME
1039            if self._match(TokenType.COMMA):
1040                temp_properties = self._parse_properties(before=True)
1041                if properties and temp_properties:
1042                    properties.expressions.extend(temp_properties.expressions)
1043                elif temp_properties:
1044                    properties = temp_properties
1045
1046            this = self._parse_schema(this=table_parts)
1047
1048            # exp.Properties.Location.POST_SCHEMA and POST_WITH
1049            temp_properties = self._parse_properties()
1050            if properties and temp_properties:
1051                properties.expressions.extend(temp_properties.expressions)
1052            elif temp_properties:
1053                properties = temp_properties
1054
1055            self._match(TokenType.ALIAS)
1056
1057            # exp.Properties.Location.POST_ALIAS
1058            if not (
1059                self._match(TokenType.SELECT, advance=False)
1060                or self._match(TokenType.WITH, advance=False)
1061                or self._match(TokenType.L_PAREN, advance=False)
1062            ):
1063                temp_properties = self._parse_properties()
1064                if properties and temp_properties:
1065                    properties.expressions.extend(temp_properties.expressions)
1066                elif temp_properties:
1067                    properties = temp_properties
1068
1069            expression = self._parse_ddl_select()
1070
1071            if create_token.token_type == TokenType.TABLE:
1072                # exp.Properties.Location.POST_EXPRESSION
1073                temp_properties = self._parse_properties()
1074                if properties and temp_properties:
1075                    properties.expressions.extend(temp_properties.expressions)
1076                elif temp_properties:
1077                    properties = temp_properties
1078
1079                indexes = []
1080                while True:
1081                    index = self._parse_create_table_index()
1082
1083                    # exp.Properties.Location.POST_INDEX
1084                    if self._match(TokenType.PARTITION_BY, advance=False):
1085                        temp_properties = self._parse_properties()
1086                        if properties and temp_properties:
1087                            properties.expressions.extend(temp_properties.expressions)
1088                        elif temp_properties:
1089                            properties = temp_properties
1090
1091                    if not index:
1092                        break
1093                    else:
1094                        indexes.append(index)
1095            elif create_token.token_type == TokenType.VIEW:
1096                if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
1097                    no_schema_binding = True
1098
1099        return self.expression(
1100            exp.Create,
1101            this=this,
1102            kind=create_token.text,
1103            unique=unique,
1104            expression=expression,
1105            exists=exists,
1106            properties=properties,
1107            replace=replace,
1108            indexes=indexes,
1109            no_schema_binding=no_schema_binding,
1110            begin=begin,
1111        )
1112
1113    def _parse_property_before(self) -> t.Optional[exp.Expression]:
1114        self._match(TokenType.COMMA)
1115
1116        # parsers look to _prev for no/dual/default, so need to consume first
1117        self._match_text_seq("NO")
1118        self._match_text_seq("DUAL")
1119        self._match_text_seq("DEFAULT")
1120
1121        if self.PROPERTY_PARSERS.get(self._curr.text.upper()):
1122            return self.PROPERTY_PARSERS[self._curr.text.upper()](self)
1123
1124        return None
1125
1126    def _parse_property(self) -> t.Optional[exp.Expression]:
1127        if self._match_texts(self.PROPERTY_PARSERS):
1128            return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
1129
1130        if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET):
1131            return self._parse_character_set(default=True)
1132
1133        if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY):
1134            return self._parse_sortkey(compound=True)
1135
1136        if self._match_text_seq("SQL", "SECURITY"):
1137            return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER"))
1138
1139        assignment = self._match_pair(
1140            TokenType.VAR, TokenType.EQ, advance=False
1141        ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False)
1142
1143        if assignment:
1144            key = self._parse_var_or_string()
1145            self._match(TokenType.EQ)
1146            return self.expression(exp.Property, this=key, value=self._parse_column())
1147
1148        return None
1149
1150    def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression:
1151        self._match(TokenType.EQ)
1152        self._match(TokenType.ALIAS)
1153        return self.expression(
1154            exp_class,
1155            this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
1156        )
1157
1158    def _parse_properties(self, before=None) -> t.Optional[exp.Expression]:
1159        properties = []
1160
1161        while True:
1162            if before:
1163                identified_property = self._parse_property_before()
1164            else:
1165                identified_property = self._parse_property()
1166
1167            if not identified_property:
1168                break
1169            for p in ensure_collection(identified_property):
1170                properties.append(p)
1171
1172        if properties:
1173            return self.expression(exp.Properties, expressions=properties)
1174
1175        return None
1176
1177    def _parse_fallback(self, no=False) -> exp.Expression:
1178        self._match_text_seq("FALLBACK")
1179        return self.expression(
1180            exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
1181        )
1182
1183    def _parse_with_property(
1184        self,
1185    ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]:
1186        self._match(TokenType.WITH)
1187        if self._match(TokenType.L_PAREN, advance=False):
1188            return self._parse_wrapped_csv(self._parse_property)
1189
1190        if self._match_text_seq("JOURNAL"):
1191            return self._parse_withjournaltable()
1192
1193        if self._match_text_seq("DATA"):
1194            return self._parse_withdata(no=False)
1195        elif self._match_text_seq("NO", "DATA"):
1196            return self._parse_withdata(no=True)
1197
1198        if not self._next:
1199            return None
1200
1201        return self._parse_withisolatedloading()
1202
1203    # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
1204    def _parse_definer(self) -> t.Optional[exp.Expression]:
1205        self._match(TokenType.EQ)
1206
1207        user = self._parse_id_var()
1208        self._match(TokenType.PARAMETER)
1209        host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
1210
1211        if not user or not host:
1212            return None
1213
1214        return exp.DefinerProperty(this=f"{user}@{host}")
1215
1216    def _parse_withjournaltable(self) -> exp.Expression:
1217        self._match(TokenType.TABLE)
1218        self._match(TokenType.EQ)
1219        return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
1220
1221    def _parse_log(self, no=False) -> exp.Expression:
1222        self._match_text_seq("LOG")
1223        return self.expression(exp.LogProperty, no=no)
1224
1225    def _parse_journal(self, no=False, dual=False) -> exp.Expression:
1226        before = self._match_text_seq("BEFORE")
1227        self._match_text_seq("JOURNAL")
1228        return self.expression(exp.JournalProperty, no=no, dual=dual, before=before)
1229
1230    def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression:
1231        self._match_text_seq("NOT")
1232        self._match_text_seq("LOCAL")
1233        self._match_text_seq("AFTER", "JOURNAL")
1234        return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local)
1235
1236    def _parse_checksum(self) -> exp.Expression:
1237        self._match_text_seq("CHECKSUM")
1238        self._match(TokenType.EQ)
1239
1240        on = None
1241        if self._match(TokenType.ON):
1242            on = True
1243        elif self._match_text_seq("OFF"):
1244            on = False
1245        default = self._match(TokenType.DEFAULT)
1246
1247        return self.expression(
1248            exp.ChecksumProperty,
1249            on=on,
1250            default=default,
1251        )
1252
1253    def _parse_freespace(self) -> exp.Expression:
1254        self._match_text_seq("FREESPACE")
1255        self._match(TokenType.EQ)
1256        return self.expression(
1257            exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
1258        )
1259
1260    def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression:
1261        self._match_text_seq("MERGEBLOCKRATIO")
1262        if self._match(TokenType.EQ):
1263            return self.expression(
1264                exp.MergeBlockRatioProperty,
1265                this=self._parse_number(),
1266                percent=self._match(TokenType.PERCENT),
1267            )
1268        else:
1269            return self.expression(
1270                exp.MergeBlockRatioProperty,
1271                no=no,
1272                default=default,
1273            )
1274
1275    def _parse_datablocksize(self, default=None) -> exp.Expression:
1276        if default:
1277            self._match_text_seq("DATABLOCKSIZE")
1278            return self.expression(exp.DataBlocksizeProperty, default=True)
1279        elif self._match_texts(("MIN", "MINIMUM")):
1280            self._match_text_seq("DATABLOCKSIZE")
1281            return self.expression(exp.DataBlocksizeProperty, min=True)
1282        elif self._match_texts(("MAX", "MAXIMUM")):
1283            self._match_text_seq("DATABLOCKSIZE")
1284            return self.expression(exp.DataBlocksizeProperty, min=False)
1285
1286        self._match_text_seq("DATABLOCKSIZE")
1287        self._match(TokenType.EQ)
1288        size = self._parse_number()
1289        units = None
1290        if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
1291            units = self._prev.text
1292        return self.expression(exp.DataBlocksizeProperty, size=size, units=units)
1293
1294    def _parse_blockcompression(self) -> exp.Expression:
1295        self._match_text_seq("BLOCKCOMPRESSION")
1296        self._match(TokenType.EQ)
1297        always = self._match_text_seq("ALWAYS")
1298        manual = self._match_text_seq("MANUAL")
1299        never = self._match_text_seq("NEVER")
1300        default = self._match_text_seq("DEFAULT")
1301        autotemp = None
1302        if self._match_text_seq("AUTOTEMP"):
1303            autotemp = self._parse_schema()
1304
1305        return self.expression(
1306            exp.BlockCompressionProperty,
1307            always=always,
1308            manual=manual,
1309            never=never,
1310            default=default,
1311            autotemp=autotemp,
1312        )
1313
1314    def _parse_withisolatedloading(self) -> exp.Expression:
1315        no = self._match_text_seq("NO")
1316        concurrent = self._match_text_seq("CONCURRENT")
1317        self._match_text_seq("ISOLATED", "LOADING")
1318        for_all = self._match_text_seq("FOR", "ALL")
1319        for_insert = self._match_text_seq("FOR", "INSERT")
1320        for_none = self._match_text_seq("FOR", "NONE")
1321        return self.expression(
1322            exp.IsolatedLoadingProperty,
1323            no=no,
1324            concurrent=concurrent,
1325            for_all=for_all,
1326            for_insert=for_insert,
1327            for_none=for_none,
1328        )
1329
1330    def _parse_locking(self) -> exp.Expression:
1331        if self._match(TokenType.TABLE):
1332            kind = "TABLE"
1333        elif self._match(TokenType.VIEW):
1334            kind = "VIEW"
1335        elif self._match(TokenType.ROW):
1336            kind = "ROW"
1337        elif self._match_text_seq("DATABASE"):
1338            kind = "DATABASE"
1339        else:
1340            kind = None
1341
1342        if kind in ("DATABASE", "TABLE", "VIEW"):
1343            this = self._parse_table_parts()
1344        else:
1345            this = None
1346
1347        if self._match(TokenType.FOR):
1348            for_or_in = "FOR"
1349        elif self._match(TokenType.IN):
1350            for_or_in = "IN"
1351        else:
1352            for_or_in = None
1353
1354        if self._match_text_seq("ACCESS"):
1355            lock_type = "ACCESS"
1356        elif self._match_texts(("EXCL", "EXCLUSIVE")):
1357            lock_type = "EXCLUSIVE"
1358        elif self._match_text_seq("SHARE"):
1359            lock_type = "SHARE"
1360        elif self._match_text_seq("READ"):
1361            lock_type = "READ"
1362        elif self._match_text_seq("WRITE"):
1363            lock_type = "WRITE"
1364        elif self._match_text_seq("CHECKSUM"):
1365            lock_type = "CHECKSUM"
1366        else:
1367            lock_type = None
1368
1369        override = self._match_text_seq("OVERRIDE")
1370
1371        return self.expression(
1372            exp.LockingProperty,
1373            this=this,
1374            kind=kind,
1375            for_or_in=for_or_in,
1376            lock_type=lock_type,
1377            override=override,
1378        )
1379
1380    def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]:
1381        if self._match(TokenType.PARTITION_BY):
1382            return self._parse_csv(self._parse_conjunction)
1383        return []
1384
1385    def _parse_partitioned_by(self) -> exp.Expression:
1386        self._match(TokenType.EQ)
1387        return self.expression(
1388            exp.PartitionedByProperty,
1389            this=self._parse_schema() or self._parse_bracket(self._parse_field()),
1390        )
1391
1392    def _parse_withdata(self, no=False) -> exp.Expression:
1393        if self._match_text_seq("AND", "STATISTICS"):
1394            statistics = True
1395        elif self._match_text_seq("AND", "NO", "STATISTICS"):
1396            statistics = False
1397        else:
1398            statistics = None
1399
1400        return self.expression(exp.WithDataProperty, no=no, statistics=statistics)
1401
1402    def _parse_noprimaryindex(self) -> exp.Expression:
1403        self._match_text_seq("PRIMARY", "INDEX")
1404        return exp.NoPrimaryIndexProperty()
1405
1406    def _parse_oncommit(self) -> exp.Expression:
1407        self._match_text_seq("COMMIT", "PRESERVE", "ROWS")
1408        return exp.OnCommitProperty()
1409
1410    def _parse_distkey(self) -> exp.Expression:
1411        return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
1412
1413    def _parse_create_like(self) -> t.Optional[exp.Expression]:
1414        table = self._parse_table(schema=True)
1415        options = []
1416        while self._match_texts(("INCLUDING", "EXCLUDING")):
1417            this = self._prev.text.upper()
1418            id_var = self._parse_id_var()
1419
1420            if not id_var:
1421                return None
1422
1423            options.append(
1424                self.expression(
1425                    exp.Property,
1426                    this=this,
1427                    value=exp.Var(this=id_var.this.upper()),
1428                )
1429            )
1430        return self.expression(exp.LikeProperty, this=table, expressions=options)
1431
1432    def _parse_sortkey(self, compound: bool = False) -> exp.Expression:
1433        return self.expression(
1434            exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound
1435        )
1436
1437    def _parse_character_set(self, default: bool = False) -> exp.Expression:
1438        self._match(TokenType.EQ)
1439        return self.expression(
1440            exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default
1441        )
1442
1443    def _parse_returns(self) -> exp.Expression:
1444        value: t.Optional[exp.Expression]
1445        is_table = self._match(TokenType.TABLE)
1446
1447        if is_table:
1448            if self._match(TokenType.LT):
1449                value = self.expression(
1450                    exp.Schema,
1451                    this="TABLE",
1452                    expressions=self._parse_csv(self._parse_struct_kwargs),
1453                )
1454                if not self._match(TokenType.GT):
1455                    self.raise_error("Expecting >")
1456            else:
1457                value = self._parse_schema(exp.Var(this="TABLE"))
1458        else:
1459            value = self._parse_types()
1460
1461        return self.expression(exp.ReturnsProperty, this=value, is_table=is_table)
1462
1463    def _parse_temporary(self, global_=False) -> exp.Expression:
1464        self._match(TokenType.TEMPORARY)  # in case calling from "GLOBAL"
1465        return self.expression(exp.TemporaryProperty, global_=global_)
1466
1467    def _parse_describe(self) -> exp.Expression:
1468        kind = self._match_set(self.CREATABLES) and self._prev.text
1469        this = self._parse_table()
1470
1471        return self.expression(exp.Describe, this=this, kind=kind)
1472
1473    def _parse_insert(self) -> exp.Expression:
1474        overwrite = self._match(TokenType.OVERWRITE)
1475        local = self._match(TokenType.LOCAL)
1476
1477        this: t.Optional[exp.Expression]
1478
1479        alternative = None
1480        if self._match_text_seq("DIRECTORY"):
1481            this = self.expression(
1482                exp.Directory,
1483                this=self._parse_var_or_string(),
1484                local=local,
1485                row_format=self._parse_row_format(match_row=True),
1486            )
1487        else:
1488            if self._match(TokenType.OR):
1489                alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text
1490
1491            self._match(TokenType.INTO)
1492            self._match(TokenType.TABLE)
1493            this = self._parse_table(schema=True)
1494
1495        return self.expression(
1496            exp.Insert,
1497            this=this,
1498            exists=self._parse_exists(),
1499            partition=self._parse_partition(),
1500            expression=self._parse_ddl_select(),
1501            overwrite=overwrite,
1502            alternative=alternative,
1503        )
1504
1505    def _parse_row(self) -> t.Optional[exp.Expression]:
1506        if not self._match(TokenType.FORMAT):
1507            return None
1508        return self._parse_row_format()
1509
1510    def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]:
1511        if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
1512            return None
1513
1514        if self._match_text_seq("SERDE"):
1515            return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string())
1516
1517        self._match_text_seq("DELIMITED")
1518
1519        kwargs = {}
1520
1521        if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
1522            kwargs["fields"] = self._parse_string()
1523            if self._match_text_seq("ESCAPED", "BY"):
1524                kwargs["escaped"] = self._parse_string()
1525        if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
1526            kwargs["collection_items"] = self._parse_string()
1527        if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
1528            kwargs["map_keys"] = self._parse_string()
1529        if self._match_text_seq("LINES", "TERMINATED", "BY"):
1530            kwargs["lines"] = self._parse_string()
1531        if self._match_text_seq("NULL", "DEFINED", "AS"):
1532            kwargs["null"] = self._parse_string()
1533
1534        return self.expression(exp.RowFormatDelimitedProperty, **kwargs)  # type: ignore
1535
1536    def _parse_load_data(self) -> exp.Expression:
1537        local = self._match(TokenType.LOCAL)
1538        self._match_text_seq("INPATH")
1539        inpath = self._parse_string()
1540        overwrite = self._match(TokenType.OVERWRITE)
1541        self._match_pair(TokenType.INTO, TokenType.TABLE)
1542
1543        return self.expression(
1544            exp.LoadData,
1545            this=self._parse_table(schema=True),
1546            local=local,
1547            overwrite=overwrite,
1548            inpath=inpath,
1549            partition=self._parse_partition(),
1550            input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
1551            serde=self._match_text_seq("SERDE") and self._parse_string(),
1552        )
1553
1554    def _parse_delete(self) -> exp.Expression:
1555        self._match(TokenType.FROM)
1556
1557        return self.expression(
1558            exp.Delete,
1559            this=self._parse_table(schema=True),
1560            using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()),
1561            where=self._parse_where(),
1562        )
1563
1564    def _parse_update(self) -> exp.Expression:
1565        return self.expression(
1566            exp.Update,
1567            **{  # type: ignore
1568                "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS),
1569                "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality),
1570                "from": self._parse_from(),
1571                "where": self._parse_where(),
1572            },
1573        )
1574
1575    def _parse_uncache(self) -> exp.Expression:
1576        if not self._match(TokenType.TABLE):
1577            self.raise_error("Expecting TABLE after UNCACHE")
1578
1579        return self.expression(
1580            exp.Uncache,
1581            exists=self._parse_exists(),
1582            this=self._parse_table(schema=True),
1583        )
1584
1585    def _parse_cache(self) -> exp.Expression:
1586        lazy = self._match(TokenType.LAZY)
1587        self._match(TokenType.TABLE)
1588        table = self._parse_table(schema=True)
1589        options = []
1590
1591        if self._match(TokenType.OPTIONS):
1592            self._match_l_paren()
1593            k = self._parse_string()
1594            self._match(TokenType.EQ)
1595            v = self._parse_string()
1596            options = [k, v]
1597            self._match_r_paren()
1598
1599        self._match(TokenType.ALIAS)
1600        return self.expression(
1601            exp.Cache,
1602            this=table,
1603            lazy=lazy,
1604            options=options,
1605            expression=self._parse_select(nested=True),
1606        )
1607
1608    def _parse_partition(self) -> t.Optional[exp.Expression]:
1609        if not self._match(TokenType.PARTITION):
1610            return None
1611
1612        return self.expression(
1613            exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction)
1614        )
1615
1616    def _parse_value(self) -> exp.Expression:
1617        if self._match(TokenType.L_PAREN):
1618            expressions = self._parse_csv(self._parse_conjunction)
1619            self._match_r_paren()
1620            return self.expression(exp.Tuple, expressions=expressions)
1621
1622        # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows.
1623        # Source: https://prestodb.io/docs/current/sql/values.html
1624        return self.expression(exp.Tuple, expressions=[self._parse_conjunction()])
1625
1626    def _parse_select(
1627        self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True
1628    ) -> t.Optional[exp.Expression]:
1629        cte = self._parse_with()
1630        if cte:
1631            this = self._parse_statement()
1632
1633            if not this:
1634                self.raise_error("Failed to parse any statement following CTE")
1635                return cte
1636
1637            if "with" in this.arg_types:
1638                this.set("with", cte)
1639            else:
1640                self.raise_error(f"{this.key} does not support CTE")
1641                this = cte
1642        elif self._match(TokenType.SELECT):
1643            comments = self._prev_comments
1644
1645            hint = self._parse_hint()
1646            all_ = self._match(TokenType.ALL)
1647            distinct = self._match(TokenType.DISTINCT)
1648
1649            if distinct:
1650                distinct = self.expression(
1651                    exp.Distinct,
1652                    on=self._parse_value() if self._match(TokenType.ON) else None,
1653                )
1654
1655            if all_ and distinct:
1656                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
1657
1658            limit = self._parse_limit(top=True)
1659            expressions = self._parse_csv(self._parse_expression)
1660
1661            this = self.expression(
1662                exp.Select,
1663                hint=hint,
1664                distinct=distinct,
1665                expressions=expressions,
1666                limit=limit,
1667            )
1668            this.comments = comments
1669
1670            into = self._parse_into()
1671            if into:
1672                this.set("into", into)
1673
1674            from_ = self._parse_from()
1675            if from_:
1676                this.set("from", from_)
1677
1678            self._parse_query_modifiers(this)
1679        elif (table or nested) and self._match(TokenType.L_PAREN):
1680            this = self._parse_table() if table else self._parse_select(nested=True)
1681            self._parse_query_modifiers(this)
1682            this = self._parse_set_operations(this)
1683            self._match_r_paren()
1684
1685            # early return so that subquery unions aren't parsed again
1686            # SELECT * FROM (SELECT 1) UNION ALL SELECT 1
1687            # Union ALL should be a property of the top select node, not the subquery
1688            return self._parse_subquery(this, parse_alias=parse_subquery_alias)
1689        elif self._match(TokenType.VALUES):
1690            this = self.expression(
1691                exp.Values,
1692                expressions=self._parse_csv(self._parse_value),
1693                alias=self._parse_table_alias(),
1694            )
1695        else:
1696            this = None
1697
1698        return self._parse_set_operations(this)
1699
1700    def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]:
1701        if not skip_with_token and not self._match(TokenType.WITH):
1702            return None
1703
1704        recursive = self._match(TokenType.RECURSIVE)
1705
1706        expressions = []
1707        while True:
1708            expressions.append(self._parse_cte())
1709
1710            if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
1711                break
1712            else:
1713                self._match(TokenType.WITH)
1714
1715        return self.expression(exp.With, expressions=expressions, recursive=recursive)
1716
1717    def _parse_cte(self) -> exp.Expression:
1718        alias = self._parse_table_alias()
1719        if not alias or not alias.this:
1720            self.raise_error("Expected CTE to have alias")
1721
1722        self._match(TokenType.ALIAS)
1723
1724        return self.expression(
1725            exp.CTE,
1726            this=self._parse_wrapped(self._parse_statement),
1727            alias=alias,
1728        )
1729
1730    def _parse_table_alias(
1731        self, alias_tokens: t.Optional[t.Collection[TokenType]] = None
1732    ) -> t.Optional[exp.Expression]:
1733        any_token = self._match(TokenType.ALIAS)
1734        alias = self._parse_id_var(
1735            any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS
1736        )
1737        index = self._index
1738
1739        if self._match(TokenType.L_PAREN):
1740            columns = self._parse_csv(self._parse_function_parameter)
1741            self._match_r_paren() if columns else self._retreat(index)
1742        else:
1743            columns = None
1744
1745        if not alias and not columns:
1746            return None
1747
1748        return self.expression(exp.TableAlias, this=alias, columns=columns)
1749
1750    def _parse_subquery(
1751        self, this: t.Optional[exp.Expression], parse_alias: bool = True
1752    ) -> exp.Expression:
1753        return self.expression(
1754            exp.Subquery,
1755            this=this,
1756            pivots=self._parse_pivots(),
1757            alias=self._parse_table_alias() if parse_alias else None,
1758        )
1759
1760    def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None:
1761        if not isinstance(this, self.MODIFIABLES):
1762            return
1763
1764        table = isinstance(this, exp.Table)
1765
1766        while True:
1767            lateral = self._parse_lateral()
1768            join = self._parse_join()
1769            comma = None if table else self._match(TokenType.COMMA)
1770            if lateral:
1771                this.append("laterals", lateral)
1772            if join:
1773                this.append("joins", join)
1774            if comma:
1775                this.args["from"].append("expressions", self._parse_table())
1776            if not (lateral or join or comma):
1777                break
1778
1779        for key, parser in self.QUERY_MODIFIER_PARSERS.items():
1780            expression = parser(self)
1781
1782            if expression:
1783                this.set(key, expression)
1784
1785    def _parse_hint(self) -> t.Optional[exp.Expression]:
1786        if self._match(TokenType.HINT):
1787            hints = self._parse_csv(self._parse_function)
1788            if not self._match_pair(TokenType.STAR, TokenType.SLASH):
1789                self.raise_error("Expected */ after HINT")
1790            return self.expression(exp.Hint, expressions=hints)
1791
1792        return None
1793
1794    def _parse_into(self) -> t.Optional[exp.Expression]:
1795        if not self._match(TokenType.INTO):
1796            return None
1797
1798        temp = self._match(TokenType.TEMPORARY)
1799        unlogged = self._match(TokenType.UNLOGGED)
1800        self._match(TokenType.TABLE)
1801
1802        return self.expression(
1803            exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged
1804        )
1805
1806    def _parse_from(self) -> t.Optional[exp.Expression]:
1807        if not self._match(TokenType.FROM):
1808            return None
1809
1810        return self.expression(
1811            exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table)
1812        )
1813
1814    def _parse_match_recognize(self) -> t.Optional[exp.Expression]:
1815        if not self._match(TokenType.MATCH_RECOGNIZE):
1816            return None
1817        self._match_l_paren()
1818
1819        partition = self._parse_partition_by()
1820        order = self._parse_order()
1821        measures = (
1822            self._parse_alias(self._parse_conjunction())
1823            if self._match_text_seq("MEASURES")
1824            else None
1825        )
1826
1827        if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
1828            rows = exp.Var(this="ONE ROW PER MATCH")
1829        elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
1830            text = "ALL ROWS PER MATCH"
1831            if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
1832                text += f" SHOW EMPTY MATCHES"
1833            elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
1834                text += f" OMIT EMPTY MATCHES"
1835            elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
1836                text += f" WITH UNMATCHED ROWS"
1837            rows = exp.Var(this=text)
1838        else:
1839            rows = None
1840
1841        if self._match_text_seq("AFTER", "MATCH", "SKIP"):
1842            text = "AFTER MATCH SKIP"
1843            if self._match_text_seq("PAST", "LAST", "ROW"):
1844                text += f" PAST LAST ROW"
1845            elif self._match_text_seq("TO", "NEXT", "ROW"):
1846                text += f" TO NEXT ROW"
1847            elif self._match_text_seq("TO", "FIRST"):
1848                text += f" TO FIRST {self._advance_any().text}"  # type: ignore
1849            elif self._match_text_seq("TO", "LAST"):
1850                text += f" TO LAST {self._advance_any().text}"  # type: ignore
1851            after = exp.Var(this=text)
1852        else:
1853            after = None
1854
1855        if self._match_text_seq("PATTERN"):
1856            self._match_l_paren()
1857
1858            if not self._curr:
1859                self.raise_error("Expecting )", self._curr)
1860
1861            paren = 1
1862            start = self._curr
1863
1864            while self._curr and paren > 0:
1865                if self._curr.token_type == TokenType.L_PAREN:
1866                    paren += 1
1867                if self._curr.token_type == TokenType.R_PAREN:
1868                    paren -= 1
1869                end = self._prev
1870                self._advance()
1871            if paren > 0:
1872                self.raise_error("Expecting )", self._curr)
1873            pattern = exp.Var(this=self._find_sql(start, end))
1874        else:
1875            pattern = None
1876
1877        define = (
1878            self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None
1879        )
1880        self._match_r_paren()
1881
1882        return self.expression(
1883            exp.MatchRecognize,
1884            partition_by=partition,
1885            order=order,
1886            measures=measures,
1887            rows=rows,
1888            after=after,
1889            pattern=pattern,
1890            define=define,
1891        )
1892
1893    def _parse_lateral(self) -> t.Optional[exp.Expression]:
1894        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY)
1895        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
1896
1897        if outer_apply or cross_apply:
1898            this = self._parse_select(table=True)
1899            view = None
1900            outer = not cross_apply
1901        elif self._match(TokenType.LATERAL):
1902            this = self._parse_select(table=True)
1903            view = self._match(TokenType.VIEW)
1904            outer = self._match(TokenType.OUTER)
1905        else:
1906            return None
1907
1908        if not this:
1909            this = self._parse_function() or self._parse_id_var(any_token=False)
1910            while self._match(TokenType.DOT):
1911                this = exp.Dot(
1912                    this=this,
1913                    expression=self._parse_function() or self._parse_id_var(any_token=False),
1914                )
1915
1916        table_alias: t.Optional[exp.Expression]
1917
1918        if view:
1919            table = self._parse_id_var(any_token=False)
1920            columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
1921            table_alias = self.expression(exp.TableAlias, this=table, columns=columns)
1922        else:
1923            table_alias = self._parse_table_alias()
1924
1925        expression = self.expression(
1926            exp.Lateral,
1927            this=this,
1928            view=view,
1929            outer=outer,
1930            alias=table_alias,
1931        )
1932
1933        if outer_apply or cross_apply:
1934            return self.expression(exp.Join, this=expression, side=None if cross_apply else "LEFT")
1935
1936        return expression
1937
1938    def _parse_join_side_and_kind(
1939        self,
1940    ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
1941        return (
1942            self._match(TokenType.NATURAL) and self._prev,
1943            self._match_set(self.JOIN_SIDES) and self._prev,
1944            self._match_set(self.JOIN_KINDS) and self._prev,
1945        )
1946
1947    def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]:
1948        natural, side, kind = self._parse_join_side_and_kind()
1949
1950        if not skip_join_token and not self._match(TokenType.JOIN):
1951            return None
1952
1953        kwargs: t.Dict[
1954            str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]]
1955        ] = {"this": self._parse_table()}
1956
1957        if natural:
1958            kwargs["natural"] = True
1959        if side:
1960            kwargs["side"] = side.text
1961        if kind:
1962            kwargs["kind"] = kind.text
1963
1964        if self._match(TokenType.ON):
1965            kwargs["on"] = self._parse_conjunction()
1966        elif self._match(TokenType.USING):
1967            kwargs["using"] = self._parse_wrapped_id_vars()
1968
1969        return self.expression(exp.Join, **kwargs)  # type: ignore
1970
1971    def _parse_index(self) -> exp.Expression:
1972        index = self._parse_id_var()
1973        self._match(TokenType.ON)
1974        self._match(TokenType.TABLE)  # hive
1975
1976        return self.expression(
1977            exp.Index,
1978            this=index,
1979            table=self.expression(exp.Table, this=self._parse_id_var()),
1980            columns=self._parse_expression(),
1981        )
1982
1983    def _parse_create_table_index(self) -> t.Optional[exp.Expression]:
1984        unique = self._match(TokenType.UNIQUE)
1985        primary = self._match_text_seq("PRIMARY")
1986        amp = self._match_text_seq("AMP")
1987        if not self._match(TokenType.INDEX):
1988            return None
1989        index = self._parse_id_var()
1990        columns = None
1991        if self._match(TokenType.L_PAREN, advance=False):
1992            columns = self._parse_wrapped_csv(self._parse_column)
1993        return self.expression(
1994            exp.Index,
1995            this=index,
1996            columns=columns,
1997            unique=unique,
1998            primary=primary,
1999            amp=amp,
2000        )
2001
2002    def _parse_table_parts(self, schema: bool = False) -> exp.Expression:
2003        catalog = None
2004        db = None
2005        table = (not schema and self._parse_function()) or self._parse_id_var(any_token=False)
2006
2007        while self._match(TokenType.DOT):
2008            if catalog:
2009                # This allows nesting the table in arbitrarily many dot expressions if needed
2010                table = self.expression(exp.Dot, this=table, expression=self._parse_id_var())
2011            else:
2012                catalog = db
2013                db = table
2014                table = self._parse_id_var()
2015
2016        if not table:
2017            self.raise_error(f"Expected table name but got {self._curr}")
2018
2019        return self.expression(
2020            exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots()
2021        )
2022
2023    def _parse_table(
2024        self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
2025    ) -> t.Optional[exp.Expression]:
2026        lateral = self._parse_lateral()
2027
2028        if lateral:
2029            return lateral
2030
2031        unnest = self._parse_unnest()
2032
2033        if unnest:
2034            return unnest
2035
2036        values = self._parse_derived_table_values()
2037
2038        if values:
2039            return values
2040
2041        subquery = self._parse_select(table=True)
2042
2043        if subquery:
2044            return subquery
2045
2046        this = self._parse_table_parts(schema=schema)
2047
2048        if schema:
2049            return self._parse_schema(this=this)
2050
2051        if self.alias_post_tablesample:
2052            table_sample = self._parse_table_sample()
2053
2054        alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
2055
2056        if alias:
2057            this.set("alias", alias)
2058
2059        if not this.args.get("pivots"):
2060            this.set("pivots", self._parse_pivots())
2061
2062        if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
2063            this.set(
2064                "hints",
2065                self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)),
2066            )
2067            self._match_r_paren()
2068
2069        if not self.alias_post_tablesample:
2070            table_sample = self._parse_table_sample()
2071
2072        if table_sample:
2073            table_sample.set("this", this)
2074            this = table_sample
2075
2076        return this
2077
2078    def _parse_unnest(self) -> t.Optional[exp.Expression]:
2079        if not self._match(TokenType.UNNEST):
2080            return None
2081
2082        expressions = self._parse_wrapped_csv(self._parse_column)
2083        ordinality = bool(self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY))
2084        alias = self._parse_table_alias()
2085
2086        if alias and self.unnest_column_only:
2087            if alias.args.get("columns"):
2088                self.raise_error("Unexpected extra column alias in unnest.")
2089            alias.set("columns", [alias.this])
2090            alias.set("this", None)
2091
2092        offset = None
2093        if self._match_pair(TokenType.WITH, TokenType.OFFSET):
2094            self._match(TokenType.ALIAS)
2095            offset = self._parse_conjunction()
2096
2097        return self.expression(
2098            exp.Unnest,
2099            expressions=expressions,
2100            ordinality=ordinality,
2101            alias=alias,
2102            offset=offset,
2103        )
2104
2105    def _parse_derived_table_values(self) -> t.Optional[exp.Expression]:
2106        is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES)
2107        if not is_derived and not self._match(TokenType.VALUES):
2108            return None
2109
2110        expressions = self._parse_csv(self._parse_value)
2111
2112        if is_derived:
2113            self._match_r_paren()
2114
2115        return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias())
2116
2117    def _parse_table_sample(self) -> t.Optional[exp.Expression]:
2118        if not self._match(TokenType.TABLE_SAMPLE):
2119            return None
2120
2121        method = self._parse_var()
2122        bucket_numerator = None
2123        bucket_denominator = None
2124        bucket_field = None
2125        percent = None
2126        rows = None
2127        size = None
2128        seed = None
2129
2130        self._match_l_paren()
2131
2132        if self._match(TokenType.BUCKET):
2133            bucket_numerator = self._parse_number()
2134            self._match(TokenType.OUT_OF)
2135            bucket_denominator = bucket_denominator = self._parse_number()
2136            self._match(TokenType.ON)
2137            bucket_field = self._parse_field()
2138        else:
2139            num = self._parse_number()
2140
2141            if self._match(TokenType.PERCENT):
2142                percent = num
2143            elif self._match(TokenType.ROWS):
2144                rows = num
2145            else:
2146                size = num
2147
2148        self._match_r_paren()
2149
2150        if self._match(TokenType.SEED):
2151            seed = self._parse_wrapped(self._parse_number)
2152
2153        return self.expression(
2154            exp.TableSample,
2155            method=method,
2156            bucket_numerator=bucket_numerator,
2157            bucket_denominator=bucket_denominator,
2158            bucket_field=bucket_field,
2159            percent=percent,
2160            rows=rows,
2161            size=size,
2162            seed=seed,
2163        )
2164
2165    def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]:
2166        return list(iter(self._parse_pivot, None))
2167
2168    def _parse_pivot(self) -> t.Optional[exp.Expression]:
2169        index = self._index
2170
2171        if self._match(TokenType.PIVOT):
2172            unpivot = False
2173        elif self._match(TokenType.UNPIVOT):
2174            unpivot = True
2175        else:
2176            return None
2177
2178        expressions = []
2179        field = None
2180
2181        if not self._match(TokenType.L_PAREN):
2182            self._retreat(index)
2183            return None
2184
2185        if unpivot:
2186            expressions = self._parse_csv(self._parse_column)
2187        else:
2188            expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function()))
2189
2190        if not self._match(TokenType.FOR):
2191            self.raise_error("Expecting FOR")
2192
2193        value = self._parse_column()
2194
2195        if not self._match(TokenType.IN):
2196            self.raise_error("Expecting IN")
2197
2198        field = self._parse_in(value)
2199
2200        self._match_r_paren()
2201
2202        pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot)
2203
2204        if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False):
2205            pivot.set("alias", self._parse_table_alias())
2206
2207        return pivot
2208
2209    def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]:
2210        if not skip_where_token and not self._match(TokenType.WHERE):
2211            return None
2212
2213        return self.expression(
2214            exp.Where, comments=self._prev_comments, this=self._parse_conjunction()
2215        )
2216
2217    def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]:
2218        if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
2219            return None
2220
2221        elements = defaultdict(list)
2222
2223        while True:
2224            expressions = self._parse_csv(self._parse_conjunction)
2225            if expressions:
2226                elements["expressions"].extend(expressions)
2227
2228            grouping_sets = self._parse_grouping_sets()
2229            if grouping_sets:
2230                elements["grouping_sets"].extend(grouping_sets)
2231
2232            rollup = None
2233            cube = None
2234
2235            with_ = self._match(TokenType.WITH)
2236            if self._match(TokenType.ROLLUP):
2237                rollup = with_ or self._parse_wrapped_csv(self._parse_column)
2238                elements["rollup"].extend(ensure_list(rollup))
2239
2240            if self._match(TokenType.CUBE):
2241                cube = with_ or self._parse_wrapped_csv(self._parse_column)
2242                elements["cube"].extend(ensure_list(cube))
2243
2244            if not (expressions or grouping_sets or rollup or cube):
2245                break
2246
2247        return self.expression(exp.Group, **elements)  # type: ignore
2248
2249    def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
2250        if not self._match(TokenType.GROUPING_SETS):
2251            return None
2252
2253        return self._parse_wrapped_csv(self._parse_grouping_set)
2254
2255    def _parse_grouping_set(self) -> t.Optional[exp.Expression]:
2256        if self._match(TokenType.L_PAREN):
2257            grouping_set = self._parse_csv(self._parse_column)
2258            self._match_r_paren()
2259            return self.expression(exp.Tuple, expressions=grouping_set)
2260
2261        return self._parse_column()
2262
2263    def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]:
2264        if not skip_having_token and not self._match(TokenType.HAVING):
2265            return None
2266        return self.expression(exp.Having, this=self._parse_conjunction())
2267
2268    def _parse_qualify(self) -> t.Optional[exp.Expression]:
2269        if not self._match(TokenType.QUALIFY):
2270            return None
2271        return self.expression(exp.Qualify, this=self._parse_conjunction())
2272
2273    def _parse_order(
2274        self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
2275    ) -> t.Optional[exp.Expression]:
2276        if not skip_order_token and not self._match(TokenType.ORDER_BY):
2277            return this
2278
2279        return self.expression(
2280            exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
2281        )
2282
2283    def _parse_sort(
2284        self, token_type: TokenType, exp_class: t.Type[exp.Expression]
2285    ) -> t.Optional[exp.Expression]:
2286        if not self._match(token_type):
2287            return None
2288        return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered))
2289
2290    def _parse_ordered(self) -> exp.Expression:
2291        this = self._parse_conjunction()
2292        self._match(TokenType.ASC)
2293        is_desc = self._match(TokenType.DESC)
2294        is_nulls_first = self._match(TokenType.NULLS_FIRST)
2295        is_nulls_last = self._match(TokenType.NULLS_LAST)
2296        desc = is_desc or False
2297        asc = not desc
2298        nulls_first = is_nulls_first or False
2299        explicitly_null_ordered = is_nulls_first or is_nulls_last
2300        if (
2301            not explicitly_null_ordered
2302            and (
2303                (asc and self.null_ordering == "nulls_are_small")
2304                or (desc and self.null_ordering != "nulls_are_small")
2305            )
2306            and self.null_ordering != "nulls_are_last"
2307        ):
2308            nulls_first = True
2309
2310        return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first)
2311
2312    def _parse_limit(
2313        self, this: t.Optional[exp.Expression] = None, top: bool = False
2314    ) -> t.Optional[exp.Expression]:
2315        if self._match(TokenType.TOP if top else TokenType.LIMIT):
2316            limit_paren = self._match(TokenType.L_PAREN)
2317            limit_exp = self.expression(
2318                exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term()
2319            )
2320
2321            if limit_paren:
2322                self._match_r_paren()
2323
2324            return limit_exp
2325
2326        if self._match(TokenType.FETCH):
2327            direction = self._match_set((TokenType.FIRST, TokenType.NEXT))
2328            direction = self._prev.text if direction else "FIRST"
2329            count = self._parse_number()
2330            self._match_set((TokenType.ROW, TokenType.ROWS))
2331            self._match(TokenType.ONLY)
2332            return self.expression(exp.Fetch, direction=direction, count=count)
2333
2334        return this
2335
2336    def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2337        if not self._match_set((TokenType.OFFSET, TokenType.COMMA)):
2338            return this
2339
2340        count = self._parse_number()
2341        self._match_set((TokenType.ROW, TokenType.ROWS))
2342        return self.expression(exp.Offset, this=this, expression=count)
2343
2344    def _parse_lock(self) -> t.Optional[exp.Expression]:
2345        if self._match_text_seq("FOR", "UPDATE"):
2346            return self.expression(exp.Lock, update=True)
2347        if self._match_text_seq("FOR", "SHARE"):
2348            return self.expression(exp.Lock, update=False)
2349
2350        return None
2351
2352    def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2353        if not self._match_set(self.SET_OPERATIONS):
2354            return this
2355
2356        token_type = self._prev.token_type
2357
2358        if token_type == TokenType.UNION:
2359            expression = exp.Union
2360        elif token_type == TokenType.EXCEPT:
2361            expression = exp.Except
2362        else:
2363            expression = exp.Intersect
2364
2365        return self.expression(
2366            expression,
2367            this=this,
2368            distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
2369            expression=self._parse_set_operations(self._parse_select(nested=True)),
2370        )
2371
2372    def _parse_expression(self) -> t.Optional[exp.Expression]:
2373        return self._parse_alias(self._parse_conjunction())
2374
2375    def _parse_conjunction(self) -> t.Optional[exp.Expression]:
2376        return self._parse_tokens(self._parse_equality, self.CONJUNCTION)
2377
2378    def _parse_equality(self) -> t.Optional[exp.Expression]:
2379        return self._parse_tokens(self._parse_comparison, self.EQUALITY)
2380
2381    def _parse_comparison(self) -> t.Optional[exp.Expression]:
2382        return self._parse_tokens(self._parse_range, self.COMPARISON)
2383
2384    def _parse_range(self) -> t.Optional[exp.Expression]:
2385        this = self._parse_bitwise()
2386        negate = self._match(TokenType.NOT)
2387
2388        if self._match_set(self.RANGE_PARSERS):
2389            this = self.RANGE_PARSERS[self._prev.token_type](self, this)
2390        elif self._match(TokenType.ISNULL):
2391            this = self.expression(exp.Is, this=this, expression=exp.Null())
2392
2393        # Postgres supports ISNULL and NOTNULL for conditions.
2394        # https://blog.andreiavram.ro/postgresql-null-composite-type/
2395        if self._match(TokenType.NOTNULL):
2396            this = self.expression(exp.Is, this=this, expression=exp.Null())
2397            this = self.expression(exp.Not, this=this)
2398
2399        if negate:
2400            this = self.expression(exp.Not, this=this)
2401
2402        if self._match(TokenType.IS):
2403            this = self._parse_is(this)
2404
2405        return this
2406
2407    def _parse_is(self, this: t.Optional[exp.Expression]) -> exp.Expression:
2408        negate = self._match(TokenType.NOT)
2409        if self._match(TokenType.DISTINCT_FROM):
2410            klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ
2411            return self.expression(klass, this=this, expression=self._parse_expression())
2412
2413        this = self.expression(
2414            exp.Is,
2415            this=this,
2416            expression=self._parse_null() or self._parse_boolean(),
2417        )
2418        return self.expression(exp.Not, this=this) if negate else this
2419
2420    def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression:
2421        unnest = self._parse_unnest()
2422        if unnest:
2423            this = self.expression(exp.In, this=this, unnest=unnest)
2424        elif self._match(TokenType.L_PAREN):
2425            expressions = self._parse_csv(self._parse_select_or_expression)
2426
2427            if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable):
2428                this = self.expression(exp.In, this=this, query=expressions[0])
2429            else:
2430                this = self.expression(exp.In, this=this, expressions=expressions)
2431
2432            self._match_r_paren()
2433        else:
2434            this = self.expression(exp.In, this=this, field=self._parse_field())
2435
2436        return this
2437
2438    def _parse_between(self, this: exp.Expression) -> exp.Expression:
2439        low = self._parse_bitwise()
2440        self._match(TokenType.AND)
2441        high = self._parse_bitwise()
2442        return self.expression(exp.Between, this=this, low=low, high=high)
2443
2444    def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2445        if not self._match(TokenType.ESCAPE):
2446            return this
2447        return self.expression(exp.Escape, this=this, expression=self._parse_string())
2448
2449    def _parse_bitwise(self) -> t.Optional[exp.Expression]:
2450        this = self._parse_term()
2451
2452        while True:
2453            if self._match_set(self.BITWISE):
2454                this = self.expression(
2455                    self.BITWISE[self._prev.token_type],
2456                    this=this,
2457                    expression=self._parse_term(),
2458                )
2459            elif self._match_pair(TokenType.LT, TokenType.LT):
2460                this = self.expression(
2461                    exp.BitwiseLeftShift, this=this, expression=self._parse_term()
2462                )
2463            elif self._match_pair(TokenType.GT, TokenType.GT):
2464                this = self.expression(
2465                    exp.BitwiseRightShift, this=this, expression=self._parse_term()
2466                )
2467            else:
2468                break
2469
2470        return this
2471
2472    def _parse_term(self) -> t.Optional[exp.Expression]:
2473        return self._parse_tokens(self._parse_factor, self.TERM)
2474
2475    def _parse_factor(self) -> t.Optional[exp.Expression]:
2476        return self._parse_tokens(self._parse_unary, self.FACTOR)
2477
2478    def _parse_unary(self) -> t.Optional[exp.Expression]:
2479        if self._match_set(self.UNARY_PARSERS):
2480            return self.UNARY_PARSERS[self._prev.token_type](self)
2481        return self._parse_at_time_zone(self._parse_type())
2482
2483    def _parse_type(self) -> t.Optional[exp.Expression]:
2484        if self._match(TokenType.INTERVAL):
2485            return self.expression(exp.Interval, this=self._parse_term(), unit=self._parse_var())
2486
2487        index = self._index
2488        type_token = self._parse_types(check_func=True)
2489        this = self._parse_column()
2490
2491        if type_token:
2492            if this and not isinstance(this, exp.Star):
2493                return self.expression(exp.Cast, this=this, to=type_token)
2494            if not type_token.args.get("expressions"):
2495                self._retreat(index)
2496                return self._parse_column()
2497            return type_token
2498
2499        return this
2500
2501    def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]:
2502        index = self._index
2503
2504        prefix = self._match_text_seq("SYSUDTLIB", ".")
2505
2506        if not self._match_set(self.TYPE_TOKENS):
2507            return None
2508
2509        type_token = self._prev.token_type
2510
2511        if type_token == TokenType.PSEUDO_TYPE:
2512            return self.expression(exp.PseudoType, this=self._prev.text)
2513
2514        nested = type_token in self.NESTED_TYPE_TOKENS
2515        is_struct = type_token == TokenType.STRUCT
2516        expressions = None
2517        maybe_func = False
2518
2519        if self._match(TokenType.L_PAREN):
2520            if is_struct:
2521                expressions = self._parse_csv(self._parse_struct_kwargs)
2522            elif nested:
2523                expressions = self._parse_csv(self._parse_types)
2524            else:
2525                expressions = self._parse_csv(self._parse_conjunction)
2526
2527            if not expressions:
2528                self._retreat(index)
2529                return None
2530
2531            self._match_r_paren()
2532            maybe_func = True
2533
2534        if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2535            this = exp.DataType(
2536                this=exp.DataType.Type.ARRAY,
2537                expressions=[exp.DataType.build(type_token.value, expressions=expressions)],
2538                nested=True,
2539            )
2540
2541            while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2542                this = exp.DataType(
2543                    this=exp.DataType.Type.ARRAY,
2544                    expressions=[this],
2545                    nested=True,
2546                )
2547
2548            return this
2549
2550        if self._match(TokenType.L_BRACKET):
2551            self._retreat(index)
2552            return None
2553
2554        values: t.Optional[t.List[t.Optional[exp.Expression]]] = None
2555        if nested and self._match(TokenType.LT):
2556            if is_struct:
2557                expressions = self._parse_csv(self._parse_struct_kwargs)
2558            else:
2559                expressions = self._parse_csv(self._parse_types)
2560
2561            if not self._match(TokenType.GT):
2562                self.raise_error("Expecting >")
2563
2564            if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)):
2565                values = self._parse_csv(self._parse_conjunction)
2566                self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN))
2567
2568        value: t.Optional[exp.Expression] = None
2569        if type_token in self.TIMESTAMPS:
2570            if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ:
2571                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions)
2572            elif (
2573                self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ
2574            ):
2575                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions)
2576            elif self._match(TokenType.WITHOUT_TIME_ZONE):
2577                if type_token == TokenType.TIME:
2578                    value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions)
2579                else:
2580                    value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2581
2582            maybe_func = maybe_func and value is None
2583
2584            if value is None:
2585                value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2586        elif type_token == TokenType.INTERVAL:
2587            value = self.expression(exp.Interval, unit=self._parse_var())
2588
2589        if maybe_func and check_func:
2590            index2 = self._index
2591            peek = self._parse_string()
2592
2593            if not peek:
2594                self._retreat(index)
2595                return None
2596
2597            self._retreat(index2)
2598
2599        if value:
2600            return value
2601
2602        return exp.DataType(
2603            this=exp.DataType.Type[type_token.value.upper()],
2604            expressions=expressions,
2605            nested=nested,
2606            values=values,
2607            prefix=prefix,
2608        )
2609
2610    def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]:
2611        if self._curr and self._curr.token_type in self.TYPE_TOKENS:
2612            return self._parse_types()
2613
2614        this = self._parse_id_var()
2615        self._match(TokenType.COLON)
2616        data_type = self._parse_types()
2617
2618        if not data_type:
2619            return None
2620        return self.expression(exp.StructKwarg, this=this, expression=data_type)
2621
2622    def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2623        if not self._match(TokenType.AT_TIME_ZONE):
2624            return this
2625        return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary())
2626
2627    def _parse_column(self) -> t.Optional[exp.Expression]:
2628        this = self._parse_field()
2629        if isinstance(this, exp.Identifier):
2630            this = self.expression(exp.Column, this=this)
2631        elif not this:
2632            return self._parse_bracket(this)
2633        this = self._parse_bracket(this)
2634
2635        while self._match_set(self.COLUMN_OPERATORS):
2636            op_token = self._prev.token_type
2637            op = self.COLUMN_OPERATORS.get(op_token)
2638
2639            if op_token == TokenType.DCOLON:
2640                field = self._parse_types()
2641                if not field:
2642                    self.raise_error("Expected type")
2643            elif op:
2644                self._advance()
2645                value = self._prev.text
2646                field = (
2647                    exp.Literal.number(value)
2648                    if self._prev.token_type == TokenType.NUMBER
2649                    else exp.Literal.string(value)
2650                )
2651            else:
2652                field = self._parse_star() or self._parse_function() or self._parse_id_var()
2653
2654            if isinstance(field, exp.Func):
2655                # bigquery allows function calls like x.y.count(...)
2656                # SAFE.SUBSTR(...)
2657                # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
2658                this = self._replace_columns_with_dots(this)
2659
2660            if op:
2661                this = op(self, this, field)
2662            elif isinstance(this, exp.Column) and not this.args.get("catalog"):
2663                this = self.expression(
2664                    exp.Column,
2665                    this=field,
2666                    table=this.this,
2667                    db=this.args.get("table"),
2668                    catalog=this.args.get("db"),
2669                )
2670            else:
2671                this = self.expression(exp.Dot, this=this, expression=field)
2672            this = self._parse_bracket(this)
2673
2674        return this
2675
2676    def _parse_primary(self) -> t.Optional[exp.Expression]:
2677        if self._match_set(self.PRIMARY_PARSERS):
2678            token_type = self._prev.token_type
2679            primary = self.PRIMARY_PARSERS[token_type](self, self._prev)
2680
2681            if token_type == TokenType.STRING:
2682                expressions = [primary]
2683                while self._match(TokenType.STRING):
2684                    expressions.append(exp.Literal.string(self._prev.text))
2685                if len(expressions) > 1:
2686                    return self.expression(exp.Concat, expressions=expressions)
2687            return primary
2688
2689        if self._match_pair(TokenType.DOT, TokenType.NUMBER):
2690            return exp.Literal.number(f"0.{self._prev.text}")
2691
2692        if self._match(TokenType.L_PAREN):
2693            comments = self._prev_comments
2694            query = self._parse_select()
2695
2696            if query:
2697                expressions = [query]
2698            else:
2699                expressions = self._parse_csv(
2700                    lambda: self._parse_alias(self._parse_conjunction(), explicit=True)
2701                )
2702
2703            this = seq_get(expressions, 0)
2704            self._parse_query_modifiers(this)
2705            self._match_r_paren()
2706
2707            if isinstance(this, exp.Subqueryable):
2708                this = self._parse_set_operations(
2709                    self._parse_subquery(this=this, parse_alias=False)
2710                )
2711            elif len(expressions) > 1:
2712                this = self.expression(exp.Tuple, expressions=expressions)
2713            else:
2714                this = self.expression(exp.Paren, this=this)
2715
2716            if this and comments:
2717                this.comments = comments
2718
2719            return this
2720
2721        return None
2722
2723    def _parse_field(self, any_token: bool = False) -> t.Optional[exp.Expression]:
2724        return self._parse_primary() or self._parse_function() or self._parse_id_var(any_token)
2725
2726    def _parse_function(
2727        self, functions: t.Optional[t.Dict[str, t.Callable]] = None
2728    ) -> t.Optional[exp.Expression]:
2729        if not self._curr:
2730            return None
2731
2732        token_type = self._curr.token_type
2733
2734        if self._match_set(self.NO_PAREN_FUNCTION_PARSERS):
2735            return self.NO_PAREN_FUNCTION_PARSERS[token_type](self)
2736
2737        if not self._next or self._next.token_type != TokenType.L_PAREN:
2738            if token_type in self.NO_PAREN_FUNCTIONS:
2739                self._advance()
2740                return self.expression(self.NO_PAREN_FUNCTIONS[token_type])
2741
2742            return None
2743
2744        if token_type not in self.FUNC_TOKENS:
2745            return None
2746
2747        this = self._curr.text
2748        upper = this.upper()
2749        self._advance(2)
2750
2751        parser = self.FUNCTION_PARSERS.get(upper)
2752
2753        if parser:
2754            this = parser(self)
2755        else:
2756            subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
2757
2758            if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH):
2759                this = self.expression(subquery_predicate, this=self._parse_select())
2760                self._match_r_paren()
2761                return this
2762
2763            if functions is None:
2764                functions = self.FUNCTIONS
2765
2766            function = functions.get(upper)
2767            args = self._parse_csv(self._parse_lambda)
2768
2769            if function:
2770                # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the
2771                # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists.
2772                if count_params(function) == 2:
2773                    params = None
2774                    if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN):
2775                        params = self._parse_csv(self._parse_lambda)
2776
2777                    this = function(args, params)
2778                else:
2779                    this = function(args)
2780
2781                self.validate_expression(this, args)
2782            else:
2783                this = self.expression(exp.Anonymous, this=this, expressions=args)
2784
2785        self._match_r_paren(this)
2786        return self._parse_window(this)
2787
2788    def _parse_function_parameter(self) -> t.Optional[exp.Expression]:
2789        return self._parse_column_def(self._parse_id_var())
2790
2791    def _parse_user_defined_function(
2792        self, kind: t.Optional[TokenType] = None
2793    ) -> t.Optional[exp.Expression]:
2794        this = self._parse_id_var()
2795
2796        while self._match(TokenType.DOT):
2797            this = self.expression(exp.Dot, this=this, expression=self._parse_id_var())
2798
2799        if not self._match(TokenType.L_PAREN):
2800            return this
2801
2802        expressions = self._parse_csv(self._parse_function_parameter)
2803        self._match_r_paren()
2804        return self.expression(
2805            exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True
2806        )
2807
2808    def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]:
2809        literal = self._parse_primary()
2810        if literal:
2811            return self.expression(exp.Introducer, this=token.text, expression=literal)
2812
2813        return self.expression(exp.Identifier, this=token.text)
2814
2815    def _parse_national(self, token: Token) -> exp.Expression:
2816        return self.expression(exp.National, this=exp.Literal.string(token.text))
2817
2818    def _parse_session_parameter(self) -> exp.Expression:
2819        kind = None
2820        this = self._parse_id_var() or self._parse_primary()
2821
2822        if this and self._match(TokenType.DOT):
2823            kind = this.name
2824            this = self._parse_var() or self._parse_primary()
2825
2826        return self.expression(exp.SessionParameter, this=this, kind=kind)
2827
2828    def _parse_lambda(self) -> t.Optional[exp.Expression]:
2829        index = self._index
2830
2831        if self._match(TokenType.L_PAREN):
2832            expressions = self._parse_csv(self._parse_id_var)
2833
2834            if not self._match(TokenType.R_PAREN):
2835                self._retreat(index)
2836        else:
2837            expressions = [self._parse_id_var()]
2838
2839        if self._match_set(self.LAMBDAS):
2840            return self.LAMBDAS[self._prev.token_type](self, expressions)
2841
2842        self._retreat(index)
2843
2844        this: t.Optional[exp.Expression]
2845
2846        if self._match(TokenType.DISTINCT):
2847            this = self.expression(
2848                exp.Distinct, expressions=self._parse_csv(self._parse_conjunction)
2849            )
2850        else:
2851            this = self._parse_select_or_expression()
2852
2853        if self._match(TokenType.IGNORE_NULLS):
2854            this = self.expression(exp.IgnoreNulls, this=this)
2855        else:
2856            self._match(TokenType.RESPECT_NULLS)
2857
2858        return self._parse_limit(self._parse_order(this))
2859
2860    def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2861        index = self._index
2862        if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT):
2863            self._retreat(index)
2864            return this
2865
2866        args = self._parse_csv(
2867            lambda: self._parse_constraint()
2868            or self._parse_column_def(self._parse_field(any_token=True))
2869        )
2870        self._match_r_paren()
2871        return self.expression(exp.Schema, this=this, expressions=args)
2872
2873    def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2874        kind = self._parse_types()
2875
2876        if self._match_text_seq("FOR", "ORDINALITY"):
2877            return self.expression(exp.ColumnDef, this=this, ordinality=True)
2878
2879        constraints = []
2880        while True:
2881            constraint = self._parse_column_constraint()
2882            if not constraint:
2883                break
2884            constraints.append(constraint)
2885
2886        if not kind and not constraints:
2887            return this
2888
2889        return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints)
2890
2891    def _parse_auto_increment(self) -> exp.Expression:
2892        start = None
2893        increment = None
2894
2895        if self._match(TokenType.L_PAREN, advance=False):
2896            args = self._parse_wrapped_csv(self._parse_bitwise)
2897            start = seq_get(args, 0)
2898            increment = seq_get(args, 1)
2899        elif self._match_text_seq("START"):
2900            start = self._parse_bitwise()
2901            self._match_text_seq("INCREMENT")
2902            increment = self._parse_bitwise()
2903
2904        if start and increment:
2905            return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment)
2906
2907        return exp.AutoIncrementColumnConstraint()
2908
2909    def _parse_compress(self) -> exp.Expression:
2910        if self._match(TokenType.L_PAREN, advance=False):
2911            return self.expression(
2912                exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise)
2913            )
2914
2915        return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise())
2916
2917    def _parse_generated_as_identity(self) -> exp.Expression:
2918        if self._match(TokenType.BY_DEFAULT):
2919            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False)
2920        else:
2921            self._match_text_seq("ALWAYS")
2922            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True)
2923
2924        self._match_text_seq("AS", "IDENTITY")
2925        if self._match(TokenType.L_PAREN):
2926            if self._match_text_seq("START", "WITH"):
2927                this.set("start", self._parse_bitwise())
2928            if self._match_text_seq("INCREMENT", "BY"):
2929                this.set("increment", self._parse_bitwise())
2930            if self._match_text_seq("MINVALUE"):
2931                this.set("minvalue", self._parse_bitwise())
2932            if self._match_text_seq("MAXVALUE"):
2933                this.set("maxvalue", self._parse_bitwise())
2934
2935            if self._match_text_seq("CYCLE"):
2936                this.set("cycle", True)
2937            elif self._match_text_seq("NO", "CYCLE"):
2938                this.set("cycle", False)
2939
2940            self._match_r_paren()
2941
2942        return this
2943
2944    def _parse_inline(self) -> t.Optional[exp.Expression]:
2945        self._match_text_seq("LENGTH")
2946        return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise())
2947
2948    def _parse_not_constraint(self) -> t.Optional[exp.Expression]:
2949        if self._match_text_seq("NULL"):
2950            return self.expression(exp.NotNullColumnConstraint)
2951        if self._match_text_seq("CASESPECIFIC"):
2952            return self.expression(exp.CaseSpecificColumnConstraint, not_=True)
2953        return None
2954
2955    def _parse_column_constraint(self) -> t.Optional[exp.Expression]:
2956        this = self._parse_references()
2957        if this:
2958            return this
2959
2960        if self._match(TokenType.CONSTRAINT):
2961            this = self._parse_id_var()
2962
2963        if self._match_texts(self.CONSTRAINT_PARSERS):
2964            return self.expression(
2965                exp.ColumnConstraint,
2966                this=this,
2967                kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self),
2968            )
2969
2970        return this
2971
2972    def _parse_constraint(self) -> t.Optional[exp.Expression]:
2973        if not self._match(TokenType.CONSTRAINT):
2974            return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS)
2975
2976        this = self._parse_id_var()
2977        expressions = []
2978
2979        while True:
2980            constraint = self._parse_unnamed_constraint() or self._parse_function()
2981            if not constraint:
2982                break
2983            expressions.append(constraint)
2984
2985        return self.expression(exp.Constraint, this=this, expressions=expressions)
2986
2987    def _parse_unnamed_constraint(
2988        self, constraints: t.Optional[t.Collection[str]] = None
2989    ) -> t.Optional[exp.Expression]:
2990        if not self._match_texts(constraints or self.CONSTRAINT_PARSERS):
2991            return None
2992
2993        constraint = self._prev.text.upper()
2994        if constraint not in self.CONSTRAINT_PARSERS:
2995            self.raise_error(f"No parser found for schema constraint {constraint}.")
2996
2997        return self.CONSTRAINT_PARSERS[constraint](self)
2998
2999    def _parse_unique(self) -> exp.Expression:
3000        if not self._match(TokenType.L_PAREN, advance=False):
3001            return self.expression(exp.UniqueColumnConstraint)
3002        return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars())
3003
3004    def _parse_key_constraint_options(self) -> t.List[str]:
3005        options = []
3006        while True:
3007            if not self._curr:
3008                break
3009
3010            if self._match(TokenType.ON):
3011                action = None
3012                on = self._advance_any() and self._prev.text
3013
3014                if self._match(TokenType.NO_ACTION):
3015                    action = "NO ACTION"
3016                elif self._match(TokenType.CASCADE):
3017                    action = "CASCADE"
3018                elif self._match_pair(TokenType.SET, TokenType.NULL):
3019                    action = "SET NULL"
3020                elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
3021                    action = "SET DEFAULT"
3022                else:
3023                    self.raise_error("Invalid key constraint")
3024
3025                options.append(f"ON {on} {action}")
3026            elif self._match_text_seq("NOT", "ENFORCED"):
3027                options.append("NOT ENFORCED")
3028            elif self._match_text_seq("DEFERRABLE"):
3029                options.append("DEFERRABLE")
3030            elif self._match_text_seq("INITIALLY", "DEFERRED"):
3031                options.append("INITIALLY DEFERRED")
3032            elif self._match_text_seq("NORELY"):
3033                options.append("NORELY")
3034            elif self._match_text_seq("MATCH", "FULL"):
3035                options.append("MATCH FULL")
3036            else:
3037                break
3038
3039        return options
3040
3041    def _parse_references(self) -> t.Optional[exp.Expression]:
3042        if not self._match(TokenType.REFERENCES):
3043            return None
3044
3045        expressions = None
3046        this = self._parse_id_var()
3047
3048        if self._match(TokenType.L_PAREN, advance=False):
3049            expressions = self._parse_wrapped_id_vars()
3050
3051        options = self._parse_key_constraint_options()
3052        return self.expression(exp.Reference, this=this, expressions=expressions, options=options)
3053
3054    def _parse_foreign_key(self) -> exp.Expression:
3055        expressions = self._parse_wrapped_id_vars()
3056        reference = self._parse_references()
3057        options = {}
3058
3059        while self._match(TokenType.ON):
3060            if not self._match_set((TokenType.DELETE, TokenType.UPDATE)):
3061                self.raise_error("Expected DELETE or UPDATE")
3062
3063            kind = self._prev.text.lower()
3064
3065            if self._match(TokenType.NO_ACTION):
3066                action = "NO ACTION"
3067            elif self._match(TokenType.SET):
3068                self._match_set((TokenType.NULL, TokenType.DEFAULT))
3069                action = "SET " + self._prev.text.upper()
3070            else:
3071                self._advance()
3072                action = self._prev.text.upper()
3073
3074            options[kind] = action
3075
3076        return self.expression(
3077            exp.ForeignKey, expressions=expressions, reference=reference, **options  # type: ignore
3078        )
3079
3080    def _parse_primary_key(self) -> exp.Expression:
3081        desc = (
3082            self._match_set((TokenType.ASC, TokenType.DESC))
3083            and self._prev.token_type == TokenType.DESC
3084        )
3085
3086        if not self._match(TokenType.L_PAREN, advance=False):
3087            return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc)
3088
3089        expressions = self._parse_wrapped_id_vars()
3090        options = self._parse_key_constraint_options()
3091        return self.expression(exp.PrimaryKey, expressions=expressions, options=options)
3092
3093    def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3094        if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)):
3095            return this
3096
3097        bracket_kind = self._prev.token_type
3098        expressions: t.List[t.Optional[exp.Expression]]
3099
3100        if self._match(TokenType.COLON):
3101            expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())]
3102        else:
3103            expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction()))
3104
3105        # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
3106        if bracket_kind == TokenType.L_BRACE:
3107            this = self.expression(exp.Struct, expressions=expressions)
3108        elif not this or this.name.upper() == "ARRAY":
3109            this = self.expression(exp.Array, expressions=expressions)
3110        else:
3111            expressions = apply_index_offset(expressions, -self.index_offset)
3112            this = self.expression(exp.Bracket, this=this, expressions=expressions)
3113
3114        if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET:
3115            self.raise_error("Expected ]")
3116        elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE:
3117            self.raise_error("Expected }")
3118
3119        this.comments = self._prev_comments
3120        return self._parse_bracket(this)
3121
3122    def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3123        if self._match(TokenType.COLON):
3124            return self.expression(exp.Slice, this=this, expression=self._parse_conjunction())
3125        return this
3126
3127    def _parse_case(self) -> t.Optional[exp.Expression]:
3128        ifs = []
3129        default = None
3130
3131        expression = self._parse_conjunction()
3132
3133        while self._match(TokenType.WHEN):
3134            this = self._parse_conjunction()
3135            self._match(TokenType.THEN)
3136            then = self._parse_conjunction()
3137            ifs.append(self.expression(exp.If, this=this, true=then))
3138
3139        if self._match(TokenType.ELSE):
3140            default = self._parse_conjunction()
3141
3142        if not self._match(TokenType.END):
3143            self.raise_error("Expected END after CASE", self._prev)
3144
3145        return self._parse_window(
3146            self.expression(exp.Case, this=expression, ifs=ifs, default=default)
3147        )
3148
3149    def _parse_if(self) -> t.Optional[exp.Expression]:
3150        if self._match(TokenType.L_PAREN):
3151            args = self._parse_csv(self._parse_conjunction)
3152            this = exp.If.from_arg_list(args)
3153            self.validate_expression(this, args)
3154            self._match_r_paren()
3155        else:
3156            condition = self._parse_conjunction()
3157            self._match(TokenType.THEN)
3158            true = self._parse_conjunction()
3159            false = self._parse_conjunction() if self._match(TokenType.ELSE) else None
3160            self._match(TokenType.END)
3161            this = self.expression(exp.If, this=condition, true=true, false=false)
3162
3163        return self._parse_window(this)
3164
3165    def _parse_extract(self) -> exp.Expression:
3166        this = self._parse_function() or self._parse_var() or self._parse_type()
3167
3168        if self._match(TokenType.FROM):
3169            return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3170
3171        if not self._match(TokenType.COMMA):
3172            self.raise_error("Expected FROM or comma after EXTRACT", self._prev)
3173
3174        return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3175
3176    def _parse_cast(self, strict: bool) -> exp.Expression:
3177        this = self._parse_conjunction()
3178
3179        if not self._match(TokenType.ALIAS):
3180            self.raise_error("Expected AS after CAST")
3181
3182        to = self._parse_types()
3183
3184        if not to:
3185            self.raise_error("Expected TYPE after CAST")
3186        elif to.this == exp.DataType.Type.CHAR:
3187            if self._match(TokenType.CHARACTER_SET):
3188                to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())
3189
3190        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3191
3192    def _parse_string_agg(self) -> exp.Expression:
3193        expression: t.Optional[exp.Expression]
3194
3195        if self._match(TokenType.DISTINCT):
3196            args = self._parse_csv(self._parse_conjunction)
3197            expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)])
3198        else:
3199            args = self._parse_csv(self._parse_conjunction)
3200            expression = seq_get(args, 0)
3201
3202        index = self._index
3203        if not self._match(TokenType.R_PAREN):
3204            # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]])
3205            order = self._parse_order(this=expression)
3206            return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3207
3208        # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]).
3209        # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that
3210        # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them.
3211        if not self._match(TokenType.WITHIN_GROUP):
3212            self._retreat(index)
3213            this = exp.GroupConcat.from_arg_list(args)
3214            self.validate_expression(this, args)
3215            return this
3216
3217        self._match_l_paren()  # The corresponding match_r_paren will be called in parse_function (caller)
3218        order = self._parse_order(this=expression)
3219        return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3220
3221    def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]:
3222        to: t.Optional[exp.Expression]
3223        this = self._parse_column()
3224
3225        if self._match(TokenType.USING):
3226            to = self.expression(exp.CharacterSet, this=self._parse_var())
3227        elif self._match(TokenType.COMMA):
3228            to = self._parse_types()
3229        else:
3230            to = None
3231
3232        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3233
3234    def _parse_position(self, haystack_first: bool = False) -> exp.Expression:
3235        args = self._parse_csv(self._parse_bitwise)
3236
3237        if self._match(TokenType.IN):
3238            return self.expression(
3239                exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0)
3240            )
3241
3242        if haystack_first:
3243            haystack = seq_get(args, 0)
3244            needle = seq_get(args, 1)
3245        else:
3246            needle = seq_get(args, 0)
3247            haystack = seq_get(args, 1)
3248
3249        this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2))
3250
3251        self.validate_expression(this, args)
3252
3253        return this
3254
3255    def _parse_join_hint(self, func_name: str) -> exp.Expression:
3256        args = self._parse_csv(self._parse_table)
3257        return exp.JoinHint(this=func_name.upper(), expressions=args)
3258
3259    def _parse_substring(self) -> exp.Expression:
3260        # Postgres supports the form: substring(string [from int] [for int])
3261        # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
3262
3263        args = self._parse_csv(self._parse_bitwise)
3264
3265        if self._match(TokenType.FROM):
3266            args.append(self._parse_bitwise())
3267            if self._match(TokenType.FOR):
3268                args.append(self._parse_bitwise())
3269
3270        this = exp.Substring.from_arg_list(args)
3271        self.validate_expression(this, args)
3272
3273        return this
3274
3275    def _parse_trim(self) -> exp.Expression:
3276        # https://www.w3resource.com/sql/character-functions/trim.php
3277        # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html
3278
3279        position = None
3280        collation = None
3281
3282        if self._match_set(self.TRIM_TYPES):
3283            position = self._prev.text.upper()
3284
3285        expression = self._parse_term()
3286        if self._match_set((TokenType.FROM, TokenType.COMMA)):
3287            this = self._parse_term()
3288        else:
3289            this = expression
3290            expression = None
3291
3292        if self._match(TokenType.COLLATE):
3293            collation = self._parse_term()
3294
3295        return self.expression(
3296            exp.Trim,
3297            this=this,
3298            position=position,
3299            expression=expression,
3300            collation=collation,
3301        )
3302
3303    def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3304        return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window)
3305
3306    def _parse_named_window(self) -> t.Optional[exp.Expression]:
3307        return self._parse_window(self._parse_id_var(), alias=True)
3308
3309    def _parse_window(
3310        self, this: t.Optional[exp.Expression], alias: bool = False
3311    ) -> t.Optional[exp.Expression]:
3312        if self._match(TokenType.FILTER):
3313            where = self._parse_wrapped(self._parse_where)
3314            this = self.expression(exp.Filter, this=this, expression=where)
3315
3316        # T-SQL allows the OVER (...) syntax after WITHIN GROUP.
3317        # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
3318        if self._match(TokenType.WITHIN_GROUP):
3319            order = self._parse_wrapped(self._parse_order)
3320            this = self.expression(exp.WithinGroup, this=this, expression=order)
3321
3322        # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
3323        # Some dialects choose to implement and some do not.
3324        # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
3325
3326        # There is some code above in _parse_lambda that handles
3327        #   SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ...
3328
3329        # The below changes handle
3330        #   SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ...
3331
3332        # Oracle allows both formats
3333        #   (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html)
3334        #   and Snowflake chose to do the same for familiarity
3335        #   https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes
3336        if self._match(TokenType.IGNORE_NULLS):
3337            this = self.expression(exp.IgnoreNulls, this=this)
3338        elif self._match(TokenType.RESPECT_NULLS):
3339            this = self.expression(exp.RespectNulls, this=this)
3340
3341        # bigquery select from window x AS (partition by ...)
3342        if alias:
3343            self._match(TokenType.ALIAS)
3344        elif not self._match(TokenType.OVER):
3345            return this
3346
3347        if not self._match(TokenType.L_PAREN):
3348            return self.expression(exp.Window, this=this, alias=self._parse_id_var(False))
3349
3350        window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
3351        partition = self._parse_partition_by()
3352        order = self._parse_order()
3353        kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text
3354
3355        if kind:
3356            self._match(TokenType.BETWEEN)
3357            start = self._parse_window_spec()
3358            self._match(TokenType.AND)
3359            end = self._parse_window_spec()
3360
3361            spec = self.expression(
3362                exp.WindowSpec,
3363                kind=kind,
3364                start=start["value"],
3365                start_side=start["side"],
3366                end=end["value"],
3367                end_side=end["side"],
3368            )
3369        else:
3370            spec = None
3371
3372        self._match_r_paren()
3373
3374        return self.expression(
3375            exp.Window,
3376            this=this,
3377            partition_by=partition,
3378            order=order,
3379            spec=spec,
3380            alias=window_alias,
3381        )
3382
3383    def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]:
3384        self._match(TokenType.BETWEEN)
3385
3386        return {
3387            "value": (
3388                self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text
3389            )
3390            or self._parse_bitwise(),
3391            "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text,
3392        }
3393
3394    def _parse_alias(
3395        self, this: t.Optional[exp.Expression], explicit: bool = False
3396    ) -> t.Optional[exp.Expression]:
3397        any_token = self._match(TokenType.ALIAS)
3398
3399        if explicit and not any_token:
3400            return this
3401
3402        if self._match(TokenType.L_PAREN):
3403            aliases = self.expression(
3404                exp.Aliases,
3405                this=this,
3406                expressions=self._parse_csv(lambda: self._parse_id_var(any_token)),
3407            )
3408            self._match_r_paren(aliases)
3409            return aliases
3410
3411        alias = self._parse_id_var(any_token)
3412
3413        if alias:
3414            return self.expression(exp.Alias, this=this, alias=alias)
3415
3416        return this
3417
3418    def _parse_id_var(
3419        self,
3420        any_token: bool = True,
3421        tokens: t.Optional[t.Collection[TokenType]] = None,
3422        prefix_tokens: t.Optional[t.Collection[TokenType]] = None,
3423    ) -> t.Optional[exp.Expression]:
3424        identifier = self._parse_identifier()
3425
3426        if identifier:
3427            return identifier
3428
3429        prefix = ""
3430
3431        if prefix_tokens:
3432            while self._match_set(prefix_tokens):
3433                prefix += self._prev.text
3434
3435        if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS):
3436            quoted = self._prev.token_type == TokenType.STRING
3437            return exp.Identifier(this=prefix + self._prev.text, quoted=quoted)
3438
3439        return None
3440
3441    def _parse_string(self) -> t.Optional[exp.Expression]:
3442        if self._match(TokenType.STRING):
3443            return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev)
3444        return self._parse_placeholder()
3445
3446    def _parse_number(self) -> t.Optional[exp.Expression]:
3447        if self._match(TokenType.NUMBER):
3448            return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev)
3449        return self._parse_placeholder()
3450
3451    def _parse_identifier(self) -> t.Optional[exp.Expression]:
3452        if self._match(TokenType.IDENTIFIER):
3453            return self.expression(exp.Identifier, this=self._prev.text, quoted=True)
3454        return self._parse_placeholder()
3455
3456    def _parse_var(self, any_token: bool = False) -> t.Optional[exp.Expression]:
3457        if (any_token and self._advance_any()) or self._match(TokenType.VAR):
3458            return self.expression(exp.Var, this=self._prev.text)
3459        return self._parse_placeholder()
3460
3461    def _advance_any(self) -> t.Optional[Token]:
3462        if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS:
3463            self._advance()
3464            return self._prev
3465        return None
3466
3467    def _parse_var_or_string(self) -> t.Optional[exp.Expression]:
3468        return self._parse_var() or self._parse_string()
3469
3470    def _parse_null(self) -> t.Optional[exp.Expression]:
3471        if self._match(TokenType.NULL):
3472            return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev)
3473        return None
3474
3475    def _parse_boolean(self) -> t.Optional[exp.Expression]:
3476        if self._match(TokenType.TRUE):
3477            return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev)
3478        if self._match(TokenType.FALSE):
3479            return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev)
3480        return None
3481
3482    def _parse_star(self) -> t.Optional[exp.Expression]:
3483        if self._match(TokenType.STAR):
3484            return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev)
3485        return None
3486
3487    def _parse_parameter(self) -> exp.Expression:
3488        wrapped = self._match(TokenType.L_BRACE)
3489        this = self._parse_var() or self._parse_primary()
3490        self._match(TokenType.R_BRACE)
3491        return self.expression(exp.Parameter, this=this, wrapped=wrapped)
3492
3493    def _parse_placeholder(self) -> t.Optional[exp.Expression]:
3494        if self._match_set(self.PLACEHOLDER_PARSERS):
3495            placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self)
3496            if placeholder:
3497                return placeholder
3498            self._advance(-1)
3499        return None
3500
3501    def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3502        if not self._match(TokenType.EXCEPT):
3503            return None
3504        if self._match(TokenType.L_PAREN, advance=False):
3505            return self._parse_wrapped_csv(self._parse_column)
3506        return self._parse_csv(self._parse_column)
3507
3508    def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3509        if not self._match(TokenType.REPLACE):
3510            return None
3511        if self._match(TokenType.L_PAREN, advance=False):
3512            return self._parse_wrapped_csv(self._parse_expression)
3513        return self._parse_csv(self._parse_expression)
3514
3515    def _parse_csv(
3516        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
3517    ) -> t.List[t.Optional[exp.Expression]]:
3518        parse_result = parse_method()
3519        items = [parse_result] if parse_result is not None else []
3520
3521        while self._match(sep):
3522            if parse_result and self._prev_comments:
3523                parse_result.comments = self._prev_comments
3524
3525            parse_result = parse_method()
3526            if parse_result is not None:
3527                items.append(parse_result)
3528
3529        return items
3530
3531    def _parse_tokens(
3532        self, parse_method: t.Callable, expressions: t.Dict
3533    ) -> t.Optional[exp.Expression]:
3534        this = parse_method()
3535
3536        while self._match_set(expressions):
3537            this = self.expression(
3538                expressions[self._prev.token_type],
3539                this=this,
3540                comments=self._prev_comments,
3541                expression=parse_method(),
3542            )
3543
3544        return this
3545
3546    def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]:
3547        return self._parse_wrapped_csv(self._parse_id_var)
3548
3549    def _parse_wrapped_csv(
3550        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
3551    ) -> t.List[t.Optional[exp.Expression]]:
3552        return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep))
3553
3554    def _parse_wrapped(self, parse_method: t.Callable) -> t.Any:
3555        self._match_l_paren()
3556        parse_result = parse_method()
3557        self._match_r_paren()
3558        return parse_result
3559
3560    def _parse_select_or_expression(self) -> t.Optional[exp.Expression]:
3561        return self._parse_select() or self._parse_expression()
3562
3563    def _parse_ddl_select(self) -> t.Optional[exp.Expression]:
3564        return self._parse_set_operations(
3565            self._parse_select(nested=True, parse_subquery_alias=False)
3566        )
3567
3568    def _parse_transaction(self) -> exp.Expression:
3569        this = None
3570        if self._match_texts(self.TRANSACTION_KIND):
3571            this = self._prev.text
3572
3573        self._match_texts({"TRANSACTION", "WORK"})
3574
3575        modes = []
3576        while True:
3577            mode = []
3578            while self._match(TokenType.VAR):
3579                mode.append(self._prev.text)
3580
3581            if mode:
3582                modes.append(" ".join(mode))
3583            if not self._match(TokenType.COMMA):
3584                break
3585
3586        return self.expression(exp.Transaction, this=this, modes=modes)
3587
3588    def _parse_commit_or_rollback(self) -> exp.Expression:
3589        chain = None
3590        savepoint = None
3591        is_rollback = self._prev.token_type == TokenType.ROLLBACK
3592
3593        self._match_texts({"TRANSACTION", "WORK"})
3594
3595        if self._match_text_seq("TO"):
3596            self._match_text_seq("SAVEPOINT")
3597            savepoint = self._parse_id_var()
3598
3599        if self._match(TokenType.AND):
3600            chain = not self._match_text_seq("NO")
3601            self._match_text_seq("CHAIN")
3602
3603        if is_rollback:
3604            return self.expression(exp.Rollback, savepoint=savepoint)
3605        return self.expression(exp.Commit, chain=chain)
3606
3607    def _parse_add_column(self) -> t.Optional[exp.Expression]:
3608        if not self._match_text_seq("ADD"):
3609            return None
3610
3611        self._match(TokenType.COLUMN)
3612        exists_column = self._parse_exists(not_=True)
3613        expression = self._parse_column_def(self._parse_field(any_token=True))
3614
3615        if expression:
3616            expression.set("exists", exists_column)
3617
3618        return expression
3619
3620    def _parse_drop_column(self) -> t.Optional[exp.Expression]:
3621        return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN")
3622
3623    # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html
3624    def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression:
3625        return self.expression(
3626            exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists
3627        )
3628
3629    def _parse_add_constraint(self) -> t.Optional[exp.Expression]:
3630        this = None
3631        kind = self._prev.token_type
3632
3633        if kind == TokenType.CONSTRAINT:
3634            this = self._parse_id_var()
3635
3636            if self._match_text_seq("CHECK"):
3637                expression = self._parse_wrapped(self._parse_conjunction)
3638                enforced = self._match_text_seq("ENFORCED")
3639
3640                return self.expression(
3641                    exp.AddConstraint, this=this, expression=expression, enforced=enforced
3642                )
3643
3644        if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY):
3645            expression = self._parse_foreign_key()
3646        elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY):
3647            expression = self._parse_primary_key()
3648
3649        return self.expression(exp.AddConstraint, this=this, expression=expression)
3650
3651    def _parse_alter(self) -> t.Optional[exp.Expression]:
3652        if not self._match(TokenType.TABLE):
3653            return self._parse_as_command(self._prev)
3654
3655        exists = self._parse_exists()
3656        this = self._parse_table(schema=True)
3657
3658        actions: t.Optional[exp.Expression | t.List[t.Optional[exp.Expression]]] = None
3659
3660        index = self._index
3661        if self._match(TokenType.DELETE):
3662            actions = [self.expression(exp.Delete, where=self._parse_where())]
3663        elif self._match_text_seq("ADD"):
3664            if self._match_set(self.ADD_CONSTRAINT_TOKENS):
3665                actions = self._parse_csv(self._parse_add_constraint)
3666            else:
3667                self._retreat(index)
3668                actions = self._parse_csv(self._parse_add_column)
3669        elif self._match_text_seq("DROP"):
3670            partition_exists = self._parse_exists()
3671
3672            if self._match(TokenType.PARTITION, advance=False):
3673                actions = self._parse_csv(
3674                    lambda: self._parse_drop_partition(exists=partition_exists)
3675                )
3676            else:
3677                self._retreat(index)
3678                actions = self._parse_csv(self._parse_drop_column)
3679        elif self._match_text_seq("RENAME", "TO"):
3680            actions = self.expression(exp.RenameTable, this=self._parse_table(schema=True))
3681        elif self._match_text_seq("ALTER"):
3682            self._match(TokenType.COLUMN)
3683            column = self._parse_field(any_token=True)
3684
3685            if self._match_pair(TokenType.DROP, TokenType.DEFAULT):
3686                actions = self.expression(exp.AlterColumn, this=column, drop=True)
3687            elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
3688                actions = self.expression(
3689                    exp.AlterColumn, this=column, default=self._parse_conjunction()
3690                )
3691            else:
3692                self._match_text_seq("SET", "DATA")
3693                actions = self.expression(
3694                    exp.AlterColumn,
3695                    this=column,
3696                    dtype=self._match_text_seq("TYPE") and self._parse_types(),
3697                    collate=self._match(TokenType.COLLATE) and self._parse_term(),
3698                    using=self._match(TokenType.USING) and self._parse_conjunction(),
3699                )
3700
3701        actions = ensure_list(actions)
3702        return self.expression(exp.AlterTable, this=this, exists=exists, actions=actions)
3703
3704    def _parse_show(self) -> t.Optional[exp.Expression]:
3705        parser = self._find_parser(self.SHOW_PARSERS, self._show_trie)  # type: ignore
3706        if parser:
3707            return parser(self)
3708        self._advance()
3709        return self.expression(exp.Show, this=self._prev.text.upper())
3710
3711    def _default_parse_set_item(self) -> exp.Expression:
3712        return self.expression(
3713            exp.SetItem,
3714            this=self._parse_statement(),
3715        )
3716
3717    def _parse_set_item(self) -> t.Optional[exp.Expression]:
3718        parser = self._find_parser(self.SET_PARSERS, self._set_trie)  # type: ignore
3719        return parser(self) if parser else self._default_parse_set_item()
3720
3721    def _parse_merge(self) -> exp.Expression:
3722        self._match(TokenType.INTO)
3723        target = self._parse_table()
3724
3725        self._match(TokenType.USING)
3726        using = self._parse_table()
3727
3728        self._match(TokenType.ON)
3729        on = self._parse_conjunction()
3730
3731        whens = []
3732        while self._match(TokenType.WHEN):
3733            this = self._parse_conjunction()
3734            self._match(TokenType.THEN)
3735
3736            if self._match(TokenType.INSERT):
3737                _this = self._parse_star()
3738                if _this:
3739                    then = self.expression(exp.Insert, this=_this)
3740                else:
3741                    then = self.expression(
3742                        exp.Insert,
3743                        this=self._parse_value(),
3744                        expression=self._match(TokenType.VALUES) and self._parse_value(),
3745                    )
3746            elif self._match(TokenType.UPDATE):
3747                expressions = self._parse_star()
3748                if expressions:
3749                    then = self.expression(exp.Update, expressions=expressions)
3750                else:
3751                    then = self.expression(
3752                        exp.Update,
3753                        expressions=self._match(TokenType.SET)
3754                        and self._parse_csv(self._parse_equality),
3755                    )
3756            elif self._match(TokenType.DELETE):
3757                then = self.expression(exp.Var, this=self._prev.text)
3758
3759            whens.append(self.expression(exp.When, this=this, then=then))
3760
3761        return self.expression(
3762            exp.Merge,
3763            this=target,
3764            using=using,
3765            on=on,
3766            expressions=whens,
3767        )
3768
3769    def _parse_set(self) -> exp.Expression:
3770        return self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item))
3771
3772    def _parse_as_command(self, start: Token) -> exp.Command:
3773        while self._curr:
3774            self._advance()
3775        return exp.Command(this=self._find_sql(start, self._prev))
3776
3777    def _find_parser(
3778        self, parsers: t.Dict[str, t.Callable], trie: t.Dict
3779    ) -> t.Optional[t.Callable]:
3780        index = self._index
3781        this = []
3782        while True:
3783            # The current token might be multiple words
3784            curr = self._curr.text.upper()
3785            key = curr.split(" ")
3786            this.append(curr)
3787            self._advance()
3788            result, trie = in_trie(trie, key)
3789            if result == 0:
3790                break
3791            if result == 2:
3792                subparser = parsers[" ".join(this)]
3793                return subparser
3794        self._retreat(index)
3795        return None
3796
3797    def _match(self, token_type, advance=True):
3798        if not self._curr:
3799            return None
3800
3801        if self._curr.token_type == token_type:
3802            if advance:
3803                self._advance()
3804            return True
3805
3806        return None
3807
3808    def _match_set(self, types, advance=True):
3809        if not self._curr:
3810            return None
3811
3812        if self._curr.token_type in types:
3813            if advance:
3814                self._advance()
3815            return True
3816
3817        return None
3818
3819    def _match_pair(self, token_type_a, token_type_b, advance=True):
3820        if not self._curr or not self._next:
3821            return None
3822
3823        if self._curr.token_type == token_type_a and self._next.token_type == token_type_b:
3824            if advance:
3825                self._advance(2)
3826            return True
3827
3828        return None
3829
3830    def _match_l_paren(self, expression=None):
3831        if not self._match(TokenType.L_PAREN):
3832            self.raise_error("Expecting (")
3833        if expression and self._prev_comments:
3834            expression.comments = self._prev_comments
3835
3836    def _match_r_paren(self, expression=None):
3837        if not self._match(TokenType.R_PAREN):
3838            self.raise_error("Expecting )")
3839        if expression and self._prev_comments:
3840            expression.comments = self._prev_comments
3841
3842    def _match_texts(self, texts, advance=True):
3843        if self._curr and self._curr.text.upper() in texts:
3844            if advance:
3845                self._advance()
3846            return True
3847        return False
3848
3849    def _match_text_seq(self, *texts, advance=True):
3850        index = self._index
3851        for text in texts:
3852            if self._curr and self._curr.text.upper() == text:
3853                self._advance()
3854            else:
3855                self._retreat(index)
3856                return False
3857
3858        if not advance:
3859            self._retreat(index)
3860
3861        return True
3862
3863    def _replace_columns_with_dots(self, this):
3864        if isinstance(this, exp.Dot):
3865            exp.replace_children(this, self._replace_columns_with_dots)
3866        elif isinstance(this, exp.Column):
3867            exp.replace_children(this, self._replace_columns_with_dots)
3868            table = this.args.get("table")
3869            this = (
3870                self.expression(exp.Dot, this=table, expression=this.this)
3871                if table
3872                else self.expression(exp.Var, this=this.name)
3873            )
3874        elif isinstance(this, exp.Identifier):
3875            this = self.expression(exp.Var, this=this.name)
3876        return this
3877
3878    def _replace_lambda(self, node, lambda_variables):
3879        if isinstance(node, exp.Column):
3880            if node.name in lambda_variables:
3881                return node.this
3882        return node
def parse_var_map(args):
23def parse_var_map(args):
24    keys = []
25    values = []
26    for i in range(0, len(args), 2):
27        keys.append(args[i])
28        values.append(args[i + 1])
29    return exp.VarMap(
30        keys=exp.Array(expressions=keys),
31        values=exp.Array(expressions=values),
32    )
class Parser:
  43class Parser(metaclass=_Parser):
  44    """
  45    Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces
  46    a parsed syntax tree.
  47
  48    Args:
  49        error_level: the desired error level.
  50            Default: ErrorLevel.RAISE
  51        error_message_context: determines the amount of context to capture from a
  52            query string when displaying the error message (in number of characters).
  53            Default: 50.
  54        index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list.
  55            Default: 0
  56        alias_post_tablesample: If the table alias comes after tablesample.
  57            Default: False
  58        max_errors: Maximum number of error messages to include in a raised ParseError.
  59            This is only relevant if error_level is ErrorLevel.RAISE.
  60            Default: 3
  61        null_ordering: Indicates the default null ordering method to use if not explicitly set.
  62            Options are "nulls_are_small", "nulls_are_large", "nulls_are_last".
  63            Default: "nulls_are_small"
  64    """
  65
  66    FUNCTIONS: t.Dict[str, t.Callable] = {
  67        **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()},
  68        "DATE_TO_DATE_STR": lambda args: exp.Cast(
  69            this=seq_get(args, 0),
  70            to=exp.DataType(this=exp.DataType.Type.TEXT),
  71        ),
  72        "TIME_TO_TIME_STR": lambda args: exp.Cast(
  73            this=seq_get(args, 0),
  74            to=exp.DataType(this=exp.DataType.Type.TEXT),
  75        ),
  76        "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
  77            this=exp.Cast(
  78                this=seq_get(args, 0),
  79                to=exp.DataType(this=exp.DataType.Type.TEXT),
  80            ),
  81            start=exp.Literal.number(1),
  82            length=exp.Literal.number(10),
  83        ),
  84        "VAR_MAP": parse_var_map,
  85        "IFNULL": exp.Coalesce.from_arg_list,
  86    }
  87
  88    NO_PAREN_FUNCTIONS = {
  89        TokenType.CURRENT_DATE: exp.CurrentDate,
  90        TokenType.CURRENT_DATETIME: exp.CurrentDate,
  91        TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
  92    }
  93
  94    NESTED_TYPE_TOKENS = {
  95        TokenType.ARRAY,
  96        TokenType.MAP,
  97        TokenType.STRUCT,
  98        TokenType.NULLABLE,
  99    }
 100
 101    TYPE_TOKENS = {
 102        TokenType.BOOLEAN,
 103        TokenType.TINYINT,
 104        TokenType.SMALLINT,
 105        TokenType.INT,
 106        TokenType.BIGINT,
 107        TokenType.FLOAT,
 108        TokenType.DOUBLE,
 109        TokenType.CHAR,
 110        TokenType.NCHAR,
 111        TokenType.VARCHAR,
 112        TokenType.NVARCHAR,
 113        TokenType.TEXT,
 114        TokenType.MEDIUMTEXT,
 115        TokenType.LONGTEXT,
 116        TokenType.MEDIUMBLOB,
 117        TokenType.LONGBLOB,
 118        TokenType.BINARY,
 119        TokenType.VARBINARY,
 120        TokenType.JSON,
 121        TokenType.JSONB,
 122        TokenType.INTERVAL,
 123        TokenType.TIME,
 124        TokenType.TIMESTAMP,
 125        TokenType.TIMESTAMPTZ,
 126        TokenType.TIMESTAMPLTZ,
 127        TokenType.DATETIME,
 128        TokenType.DATE,
 129        TokenType.DECIMAL,
 130        TokenType.UUID,
 131        TokenType.GEOGRAPHY,
 132        TokenType.GEOMETRY,
 133        TokenType.HLLSKETCH,
 134        TokenType.HSTORE,
 135        TokenType.PSEUDO_TYPE,
 136        TokenType.SUPER,
 137        TokenType.SERIAL,
 138        TokenType.SMALLSERIAL,
 139        TokenType.BIGSERIAL,
 140        TokenType.XML,
 141        TokenType.UNIQUEIDENTIFIER,
 142        TokenType.MONEY,
 143        TokenType.SMALLMONEY,
 144        TokenType.ROWVERSION,
 145        TokenType.IMAGE,
 146        TokenType.VARIANT,
 147        TokenType.OBJECT,
 148        TokenType.INET,
 149        *NESTED_TYPE_TOKENS,
 150    }
 151
 152    SUBQUERY_PREDICATES = {
 153        TokenType.ANY: exp.Any,
 154        TokenType.ALL: exp.All,
 155        TokenType.EXISTS: exp.Exists,
 156        TokenType.SOME: exp.Any,
 157    }
 158
 159    RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT}
 160
 161    ID_VAR_TOKENS = {
 162        TokenType.VAR,
 163        TokenType.ANTI,
 164        TokenType.APPLY,
 165        TokenType.AUTO_INCREMENT,
 166        TokenType.BEGIN,
 167        TokenType.BOTH,
 168        TokenType.BUCKET,
 169        TokenType.CACHE,
 170        TokenType.CASCADE,
 171        TokenType.COLLATE,
 172        TokenType.COLUMN,
 173        TokenType.COMMAND,
 174        TokenType.COMMIT,
 175        TokenType.COMPOUND,
 176        TokenType.CONSTRAINT,
 177        TokenType.CURRENT_TIME,
 178        TokenType.DEFAULT,
 179        TokenType.DELETE,
 180        TokenType.DESCRIBE,
 181        TokenType.DIV,
 182        TokenType.END,
 183        TokenType.EXECUTE,
 184        TokenType.ESCAPE,
 185        TokenType.FALSE,
 186        TokenType.FIRST,
 187        TokenType.FILTER,
 188        TokenType.FOLLOWING,
 189        TokenType.FORMAT,
 190        TokenType.FUNCTION,
 191        TokenType.IF,
 192        TokenType.INDEX,
 193        TokenType.ISNULL,
 194        TokenType.INTERVAL,
 195        TokenType.LAZY,
 196        TokenType.LEADING,
 197        TokenType.LEFT,
 198        TokenType.LOCAL,
 199        TokenType.MATERIALIZED,
 200        TokenType.MERGE,
 201        TokenType.NATURAL,
 202        TokenType.NEXT,
 203        TokenType.OFFSET,
 204        TokenType.ONLY,
 205        TokenType.OPTIONS,
 206        TokenType.ORDINALITY,
 207        TokenType.PERCENT,
 208        TokenType.PIVOT,
 209        TokenType.PRECEDING,
 210        TokenType.RANGE,
 211        TokenType.REFERENCES,
 212        TokenType.RIGHT,
 213        TokenType.ROW,
 214        TokenType.ROWS,
 215        TokenType.SCHEMA,
 216        TokenType.SEED,
 217        TokenType.SEMI,
 218        TokenType.SET,
 219        TokenType.SHOW,
 220        TokenType.SORTKEY,
 221        TokenType.TABLE,
 222        TokenType.TEMPORARY,
 223        TokenType.TOP,
 224        TokenType.TRAILING,
 225        TokenType.TRUE,
 226        TokenType.UNBOUNDED,
 227        TokenType.UNIQUE,
 228        TokenType.UNLOGGED,
 229        TokenType.UNPIVOT,
 230        TokenType.PROCEDURE,
 231        TokenType.VIEW,
 232        TokenType.VOLATILE,
 233        TokenType.WINDOW,
 234        *SUBQUERY_PREDICATES,
 235        *TYPE_TOKENS,
 236        *NO_PAREN_FUNCTIONS,
 237    }
 238
 239    TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {
 240        TokenType.APPLY,
 241        TokenType.LEFT,
 242        TokenType.NATURAL,
 243        TokenType.OFFSET,
 244        TokenType.RIGHT,
 245        TokenType.WINDOW,
 246    }
 247
 248    UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
 249
 250    TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH}
 251
 252    FUNC_TOKENS = {
 253        TokenType.COMMAND,
 254        TokenType.CURRENT_DATE,
 255        TokenType.CURRENT_DATETIME,
 256        TokenType.CURRENT_TIMESTAMP,
 257        TokenType.CURRENT_TIME,
 258        TokenType.FILTER,
 259        TokenType.FIRST,
 260        TokenType.FORMAT,
 261        TokenType.IDENTIFIER,
 262        TokenType.INDEX,
 263        TokenType.ISNULL,
 264        TokenType.ILIKE,
 265        TokenType.LIKE,
 266        TokenType.MERGE,
 267        TokenType.OFFSET,
 268        TokenType.PRIMARY_KEY,
 269        TokenType.REPLACE,
 270        TokenType.ROW,
 271        TokenType.UNNEST,
 272        TokenType.VAR,
 273        TokenType.LEFT,
 274        TokenType.RIGHT,
 275        TokenType.DATE,
 276        TokenType.DATETIME,
 277        TokenType.TABLE,
 278        TokenType.TIMESTAMP,
 279        TokenType.TIMESTAMPTZ,
 280        TokenType.WINDOW,
 281        *TYPE_TOKENS,
 282        *SUBQUERY_PREDICATES,
 283    }
 284
 285    CONJUNCTION = {
 286        TokenType.AND: exp.And,
 287        TokenType.OR: exp.Or,
 288    }
 289
 290    EQUALITY = {
 291        TokenType.EQ: exp.EQ,
 292        TokenType.NEQ: exp.NEQ,
 293        TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
 294    }
 295
 296    COMPARISON = {
 297        TokenType.GT: exp.GT,
 298        TokenType.GTE: exp.GTE,
 299        TokenType.LT: exp.LT,
 300        TokenType.LTE: exp.LTE,
 301    }
 302
 303    BITWISE = {
 304        TokenType.AMP: exp.BitwiseAnd,
 305        TokenType.CARET: exp.BitwiseXor,
 306        TokenType.PIPE: exp.BitwiseOr,
 307        TokenType.DPIPE: exp.DPipe,
 308    }
 309
 310    TERM = {
 311        TokenType.DASH: exp.Sub,
 312        TokenType.PLUS: exp.Add,
 313        TokenType.MOD: exp.Mod,
 314        TokenType.COLLATE: exp.Collate,
 315    }
 316
 317    FACTOR = {
 318        TokenType.DIV: exp.IntDiv,
 319        TokenType.LR_ARROW: exp.Distance,
 320        TokenType.SLASH: exp.Div,
 321        TokenType.STAR: exp.Mul,
 322    }
 323
 324    TIMESTAMPS = {
 325        TokenType.TIME,
 326        TokenType.TIMESTAMP,
 327        TokenType.TIMESTAMPTZ,
 328        TokenType.TIMESTAMPLTZ,
 329    }
 330
 331    SET_OPERATIONS = {
 332        TokenType.UNION,
 333        TokenType.INTERSECT,
 334        TokenType.EXCEPT,
 335    }
 336
 337    JOIN_SIDES = {
 338        TokenType.LEFT,
 339        TokenType.RIGHT,
 340        TokenType.FULL,
 341    }
 342
 343    JOIN_KINDS = {
 344        TokenType.INNER,
 345        TokenType.OUTER,
 346        TokenType.CROSS,
 347        TokenType.SEMI,
 348        TokenType.ANTI,
 349    }
 350
 351    LAMBDAS = {
 352        TokenType.ARROW: lambda self, expressions: self.expression(
 353            exp.Lambda,
 354            this=self._parse_conjunction().transform(
 355                self._replace_lambda, {node.name for node in expressions}
 356            ),
 357            expressions=expressions,
 358        ),
 359        TokenType.FARROW: lambda self, expressions: self.expression(
 360            exp.Kwarg,
 361            this=exp.Var(this=expressions[0].name),
 362            expression=self._parse_conjunction(),
 363        ),
 364    }
 365
 366    COLUMN_OPERATORS = {
 367        TokenType.DOT: None,
 368        TokenType.DCOLON: lambda self, this, to: self.expression(
 369            exp.Cast,
 370            this=this,
 371            to=to,
 372        ),
 373        TokenType.ARROW: lambda self, this, path: self.expression(
 374            exp.JSONExtract,
 375            this=this,
 376            expression=path,
 377        ),
 378        TokenType.DARROW: lambda self, this, path: self.expression(
 379            exp.JSONExtractScalar,
 380            this=this,
 381            expression=path,
 382        ),
 383        TokenType.HASH_ARROW: lambda self, this, path: self.expression(
 384            exp.JSONBExtract,
 385            this=this,
 386            expression=path,
 387        ),
 388        TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
 389            exp.JSONBExtractScalar,
 390            this=this,
 391            expression=path,
 392        ),
 393        TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
 394            exp.JSONBContains,
 395            this=this,
 396            expression=key,
 397        ),
 398    }
 399
 400    EXPRESSION_PARSERS = {
 401        exp.Column: lambda self: self._parse_column(),
 402        exp.DataType: lambda self: self._parse_types(),
 403        exp.From: lambda self: self._parse_from(),
 404        exp.Group: lambda self: self._parse_group(),
 405        exp.Identifier: lambda self: self._parse_id_var(),
 406        exp.Lateral: lambda self: self._parse_lateral(),
 407        exp.Join: lambda self: self._parse_join(),
 408        exp.Order: lambda self: self._parse_order(),
 409        exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
 410        exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
 411        exp.Lambda: lambda self: self._parse_lambda(),
 412        exp.Limit: lambda self: self._parse_limit(),
 413        exp.Offset: lambda self: self._parse_offset(),
 414        exp.TableAlias: lambda self: self._parse_table_alias(),
 415        exp.Table: lambda self: self._parse_table(),
 416        exp.Condition: lambda self: self._parse_conjunction(),
 417        exp.Expression: lambda self: self._parse_statement(),
 418        exp.Properties: lambda self: self._parse_properties(),
 419        exp.Where: lambda self: self._parse_where(),
 420        exp.Ordered: lambda self: self._parse_ordered(),
 421        exp.Having: lambda self: self._parse_having(),
 422        exp.With: lambda self: self._parse_with(),
 423        exp.Window: lambda self: self._parse_named_window(),
 424        "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(),
 425    }
 426
 427    STATEMENT_PARSERS = {
 428        TokenType.ALTER: lambda self: self._parse_alter(),
 429        TokenType.BEGIN: lambda self: self._parse_transaction(),
 430        TokenType.CACHE: lambda self: self._parse_cache(),
 431        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
 432        TokenType.CREATE: lambda self: self._parse_create(),
 433        TokenType.DELETE: lambda self: self._parse_delete(),
 434        TokenType.DESC: lambda self: self._parse_describe(),
 435        TokenType.DESCRIBE: lambda self: self._parse_describe(),
 436        TokenType.DROP: lambda self: self._parse_drop(),
 437        TokenType.END: lambda self: self._parse_commit_or_rollback(),
 438        TokenType.INSERT: lambda self: self._parse_insert(),
 439        TokenType.LOAD_DATA: lambda self: self._parse_load_data(),
 440        TokenType.MERGE: lambda self: self._parse_merge(),
 441        TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
 442        TokenType.UNCACHE: lambda self: self._parse_uncache(),
 443        TokenType.UPDATE: lambda self: self._parse_update(),
 444        TokenType.USE: lambda self: self.expression(
 445            exp.Use,
 446            kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"))
 447            and exp.Var(this=self._prev.text),
 448            this=self._parse_table(schema=False),
 449        ),
 450    }
 451
 452    UNARY_PARSERS = {
 453        TokenType.PLUS: lambda self: self._parse_unary(),  # Unary + is handled as a no-op
 454        TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()),
 455        TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()),
 456        TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()),
 457    }
 458
 459    PRIMARY_PARSERS = {
 460        TokenType.STRING: lambda self, token: self.expression(
 461            exp.Literal, this=token.text, is_string=True
 462        ),
 463        TokenType.NUMBER: lambda self, token: self.expression(
 464            exp.Literal, this=token.text, is_string=False
 465        ),
 466        TokenType.STAR: lambda self, _: self.expression(
 467            exp.Star,
 468            **{"except": self._parse_except(), "replace": self._parse_replace()},
 469        ),
 470        TokenType.NULL: lambda self, _: self.expression(exp.Null),
 471        TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True),
 472        TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False),
 473        TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text),
 474        TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text),
 475        TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text),
 476        TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
 477        TokenType.NATIONAL: lambda self, token: self._parse_national(token),
 478        TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
 479    }
 480
 481    PLACEHOLDER_PARSERS = {
 482        TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
 483        TokenType.PARAMETER: lambda self: self._parse_parameter(),
 484        TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text)
 485        if self._match_set((TokenType.NUMBER, TokenType.VAR))
 486        else None,
 487    }
 488
 489    RANGE_PARSERS = {
 490        TokenType.BETWEEN: lambda self, this: self._parse_between(this),
 491        TokenType.GLOB: lambda self, this: self._parse_escape(
 492            self.expression(exp.Glob, this=this, expression=self._parse_bitwise())
 493        ),
 494        TokenType.IN: lambda self, this: self._parse_in(this),
 495        TokenType.IS: lambda self, this: self._parse_is(this),
 496        TokenType.LIKE: lambda self, this: self._parse_escape(
 497            self.expression(exp.Like, this=this, expression=self._parse_bitwise())
 498        ),
 499        TokenType.ILIKE: lambda self, this: self._parse_escape(
 500            self.expression(exp.ILike, this=this, expression=self._parse_bitwise())
 501        ),
 502        TokenType.IRLIKE: lambda self, this: self.expression(
 503            exp.RegexpILike, this=this, expression=self._parse_bitwise()
 504        ),
 505        TokenType.RLIKE: lambda self, this: self.expression(
 506            exp.RegexpLike, this=this, expression=self._parse_bitwise()
 507        ),
 508        TokenType.SIMILAR_TO: lambda self, this: self.expression(
 509            exp.SimilarTo, this=this, expression=self._parse_bitwise()
 510        ),
 511    }
 512
 513    PROPERTY_PARSERS = {
 514        "AFTER": lambda self: self._parse_afterjournal(
 515            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 516        ),
 517        "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
 518        "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
 519        "BEFORE": lambda self: self._parse_journal(
 520            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 521        ),
 522        "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
 523        "CHARACTER SET": lambda self: self._parse_character_set(),
 524        "CHECKSUM": lambda self: self._parse_checksum(),
 525        "CLUSTER BY": lambda self: self.expression(
 526            exp.Cluster, expressions=self._parse_csv(self._parse_ordered)
 527        ),
 528        "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty),
 529        "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
 530        "DATABLOCKSIZE": lambda self: self._parse_datablocksize(
 531            default=self._prev.text.upper() == "DEFAULT"
 532        ),
 533        "DEFINER": lambda self: self._parse_definer(),
 534        "DETERMINISTIC": lambda self: self.expression(
 535            exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE")
 536        ),
 537        "DISTKEY": lambda self: self._parse_distkey(),
 538        "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
 539        "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
 540        "EXTERNAL": lambda self: self.expression(exp.ExternalProperty),
 541        "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"),
 542        "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 543        "FREESPACE": lambda self: self._parse_freespace(),
 544        "GLOBAL": lambda self: self._parse_temporary(global_=True),
 545        "IMMUTABLE": lambda self: self.expression(
 546            exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE")
 547        ),
 548        "JOURNAL": lambda self: self._parse_journal(
 549            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 550        ),
 551        "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
 552        "LIKE": lambda self: self._parse_create_like(),
 553        "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True),
 554        "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
 555        "LOCK": lambda self: self._parse_locking(),
 556        "LOCKING": lambda self: self._parse_locking(),
 557        "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"),
 558        "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty),
 559        "MAX": lambda self: self._parse_datablocksize(),
 560        "MAXIMUM": lambda self: self._parse_datablocksize(),
 561        "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio(
 562            no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT"
 563        ),
 564        "MIN": lambda self: self._parse_datablocksize(),
 565        "MINIMUM": lambda self: self._parse_datablocksize(),
 566        "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True),
 567        "NO": lambda self: self._parse_noprimaryindex(),
 568        "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False),
 569        "ON": lambda self: self._parse_oncommit(),
 570        "PARTITION BY": lambda self: self._parse_partitioned_by(),
 571        "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
 572        "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
 573        "RETURNS": lambda self: self._parse_returns(),
 574        "ROW": lambda self: self._parse_row(),
 575        "SET": lambda self: self.expression(exp.SetProperty, multi=False),
 576        "SORTKEY": lambda self: self._parse_sortkey(),
 577        "STABLE": lambda self: self.expression(
 578            exp.VolatilityProperty, this=exp.Literal.string("STABLE")
 579        ),
 580        "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 581        "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty),
 582        "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property),
 583        "TEMPORARY": lambda self: self._parse_temporary(global_=False),
 584        "TRANSIENT": lambda self: self.expression(exp.TransientProperty),
 585        "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty),
 586        "VOLATILE": lambda self: self.expression(
 587            exp.VolatilityProperty, this=exp.Literal.string("VOLATILE")
 588        ),
 589        "WITH": lambda self: self._parse_with_property(),
 590    }
 591
 592    CONSTRAINT_PARSERS = {
 593        "AUTOINCREMENT": lambda self: self._parse_auto_increment(),
 594        "AUTO_INCREMENT": lambda self: self._parse_auto_increment(),
 595        "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False),
 596        "CHARACTER SET": lambda self: self.expression(
 597            exp.CharacterSetColumnConstraint, this=self._parse_var_or_string()
 598        ),
 599        "CHECK": lambda self: self.expression(
 600            exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction)
 601        ),
 602        "COLLATE": lambda self: self.expression(
 603            exp.CollateColumnConstraint, this=self._parse_var()
 604        ),
 605        "COMMENT": lambda self: self.expression(
 606            exp.CommentColumnConstraint, this=self._parse_string()
 607        ),
 608        "COMPRESS": lambda self: self._parse_compress(),
 609        "DEFAULT": lambda self: self.expression(
 610            exp.DefaultColumnConstraint, this=self._parse_bitwise()
 611        ),
 612        "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()),
 613        "FOREIGN KEY": lambda self: self._parse_foreign_key(),
 614        "FORMAT": lambda self: self.expression(
 615            exp.DateFormatColumnConstraint, this=self._parse_var_or_string()
 616        ),
 617        "GENERATED": lambda self: self._parse_generated_as_identity(),
 618        "IDENTITY": lambda self: self._parse_auto_increment(),
 619        "INLINE": lambda self: self._parse_inline(),
 620        "LIKE": lambda self: self._parse_create_like(),
 621        "NOT": lambda self: self._parse_not_constraint(),
 622        "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True),
 623        "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()),
 624        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 625        "TITLE": lambda self: self.expression(
 626            exp.TitleColumnConstraint, this=self._parse_var_or_string()
 627        ),
 628        "UNIQUE": lambda self: self._parse_unique(),
 629        "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint),
 630    }
 631
 632    SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"}
 633
 634    NO_PAREN_FUNCTION_PARSERS = {
 635        TokenType.CASE: lambda self: self._parse_case(),
 636        TokenType.IF: lambda self: self._parse_if(),
 637        TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()),
 638    }
 639
 640    FUNCTION_PARSERS: t.Dict[str, t.Callable] = {
 641        "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
 642        "TRY_CONVERT": lambda self: self._parse_convert(False),
 643        "EXTRACT": lambda self: self._parse_extract(),
 644        "POSITION": lambda self: self._parse_position(),
 645        "SUBSTRING": lambda self: self._parse_substring(),
 646        "TRIM": lambda self: self._parse_trim(),
 647        "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
 648        "TRY_CAST": lambda self: self._parse_cast(False),
 649        "STRING_AGG": lambda self: self._parse_string_agg(),
 650    }
 651
 652    QUERY_MODIFIER_PARSERS = {
 653        "match": lambda self: self._parse_match_recognize(),
 654        "where": lambda self: self._parse_where(),
 655        "group": lambda self: self._parse_group(),
 656        "having": lambda self: self._parse_having(),
 657        "qualify": lambda self: self._parse_qualify(),
 658        "windows": lambda self: self._parse_window_clause(),
 659        "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute),
 660        "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
 661        "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
 662        "order": lambda self: self._parse_order(),
 663        "limit": lambda self: self._parse_limit(),
 664        "offset": lambda self: self._parse_offset(),
 665        "lock": lambda self: self._parse_lock(),
 666    }
 667
 668    SHOW_PARSERS: t.Dict[str, t.Callable] = {}
 669    SET_PARSERS: t.Dict[str, t.Callable] = {}
 670
 671    MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
 672
 673    CREATABLES = {
 674        TokenType.COLUMN,
 675        TokenType.FUNCTION,
 676        TokenType.INDEX,
 677        TokenType.PROCEDURE,
 678        TokenType.SCHEMA,
 679        TokenType.TABLE,
 680        TokenType.VIEW,
 681    }
 682
 683    TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
 684
 685    INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
 686
 687    WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
 688
 689    ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY}
 690
 691    STRICT_CAST = True
 692
 693    __slots__ = (
 694        "error_level",
 695        "error_message_context",
 696        "sql",
 697        "errors",
 698        "index_offset",
 699        "unnest_column_only",
 700        "alias_post_tablesample",
 701        "max_errors",
 702        "null_ordering",
 703        "_tokens",
 704        "_index",
 705        "_curr",
 706        "_next",
 707        "_prev",
 708        "_prev_comments",
 709        "_show_trie",
 710        "_set_trie",
 711    )
 712
 713    def __init__(
 714        self,
 715        error_level: t.Optional[ErrorLevel] = None,
 716        error_message_context: int = 100,
 717        index_offset: int = 0,
 718        unnest_column_only: bool = False,
 719        alias_post_tablesample: bool = False,
 720        max_errors: int = 3,
 721        null_ordering: t.Optional[str] = None,
 722    ):
 723        self.error_level = error_level or ErrorLevel.IMMEDIATE
 724        self.error_message_context = error_message_context
 725        self.index_offset = index_offset
 726        self.unnest_column_only = unnest_column_only
 727        self.alias_post_tablesample = alias_post_tablesample
 728        self.max_errors = max_errors
 729        self.null_ordering = null_ordering
 730        self.reset()
 731
 732    def reset(self):
 733        self.sql = ""
 734        self.errors = []
 735        self._tokens = []
 736        self._index = 0
 737        self._curr = None
 738        self._next = None
 739        self._prev = None
 740        self._prev_comments = None
 741
 742    def parse(
 743        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
 744    ) -> t.List[t.Optional[exp.Expression]]:
 745        """
 746        Parses a list of tokens and returns a list of syntax trees, one tree
 747        per parsed SQL statement.
 748
 749        Args:
 750            raw_tokens: the list of tokens.
 751            sql: the original SQL string, used to produce helpful debug messages.
 752
 753        Returns:
 754            The list of syntax trees.
 755        """
 756        return self._parse(
 757            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
 758        )
 759
 760    def parse_into(
 761        self,
 762        expression_types: exp.IntoType,
 763        raw_tokens: t.List[Token],
 764        sql: t.Optional[str] = None,
 765    ) -> t.List[t.Optional[exp.Expression]]:
 766        """
 767        Parses a list of tokens into a given Expression type. If a collection of Expression
 768        types is given instead, this method will try to parse the token list into each one
 769        of them, stopping at the first for which the parsing succeeds.
 770
 771        Args:
 772            expression_types: the expression type(s) to try and parse the token list into.
 773            raw_tokens: the list of tokens.
 774            sql: the original SQL string, used to produce helpful debug messages.
 775
 776        Returns:
 777            The target Expression.
 778        """
 779        errors = []
 780        for expression_type in ensure_collection(expression_types):
 781            parser = self.EXPRESSION_PARSERS.get(expression_type)
 782            if not parser:
 783                raise TypeError(f"No parser registered for {expression_type}")
 784            try:
 785                return self._parse(parser, raw_tokens, sql)
 786            except ParseError as e:
 787                e.errors[0]["into_expression"] = expression_type
 788                errors.append(e)
 789        raise ParseError(
 790            f"Failed to parse into {expression_types}",
 791            errors=merge_errors(errors),
 792        ) from errors[-1]
 793
 794    def _parse(
 795        self,
 796        parse_method: t.Callable[[Parser], t.Optional[exp.Expression]],
 797        raw_tokens: t.List[Token],
 798        sql: t.Optional[str] = None,
 799    ) -> t.List[t.Optional[exp.Expression]]:
 800        self.reset()
 801        self.sql = sql or ""
 802        total = len(raw_tokens)
 803        chunks: t.List[t.List[Token]] = [[]]
 804
 805        for i, token in enumerate(raw_tokens):
 806            if token.token_type == TokenType.SEMICOLON:
 807                if i < total - 1:
 808                    chunks.append([])
 809            else:
 810                chunks[-1].append(token)
 811
 812        expressions = []
 813
 814        for tokens in chunks:
 815            self._index = -1
 816            self._tokens = tokens
 817            self._advance()
 818
 819            expressions.append(parse_method(self))
 820
 821            if self._index < len(self._tokens):
 822                self.raise_error("Invalid expression / Unexpected token")
 823
 824            self.check_errors()
 825
 826        return expressions
 827
 828    def check_errors(self) -> None:
 829        """
 830        Logs or raises any found errors, depending on the chosen error level setting.
 831        """
 832        if self.error_level == ErrorLevel.WARN:
 833            for error in self.errors:
 834                logger.error(str(error))
 835        elif self.error_level == ErrorLevel.RAISE and self.errors:
 836            raise ParseError(
 837                concat_messages(self.errors, self.max_errors),
 838                errors=merge_errors(self.errors),
 839            )
 840
 841    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
 842        """
 843        Appends an error in the list of recorded errors or raises it, depending on the chosen
 844        error level setting.
 845        """
 846        token = token or self._curr or self._prev or Token.string("")
 847        start = self._find_token(token)
 848        end = start + len(token.text)
 849        start_context = self.sql[max(start - self.error_message_context, 0) : start]
 850        highlight = self.sql[start:end]
 851        end_context = self.sql[end : end + self.error_message_context]
 852
 853        error = ParseError.new(
 854            f"{message}. Line {token.line}, Col: {token.col}.\n"
 855            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
 856            description=message,
 857            line=token.line,
 858            col=token.col,
 859            start_context=start_context,
 860            highlight=highlight,
 861            end_context=end_context,
 862        )
 863
 864        if self.error_level == ErrorLevel.IMMEDIATE:
 865            raise error
 866
 867        self.errors.append(error)
 868
 869    def expression(
 870        self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs
 871    ) -> exp.Expression:
 872        """
 873        Creates a new, validated Expression.
 874
 875        Args:
 876            exp_class: the expression class to instantiate.
 877            comments: an optional list of comments to attach to the expression.
 878            kwargs: the arguments to set for the expression along with their respective values.
 879
 880        Returns:
 881            The target expression.
 882        """
 883        instance = exp_class(**kwargs)
 884        if self._prev_comments:
 885            instance.comments = self._prev_comments
 886            self._prev_comments = None
 887        if comments:
 888            instance.comments = comments
 889        self.validate_expression(instance)
 890        return instance
 891
 892    def validate_expression(
 893        self, expression: exp.Expression, args: t.Optional[t.List] = None
 894    ) -> None:
 895        """
 896        Validates an already instantiated expression, making sure that all its mandatory arguments
 897        are set.
 898
 899        Args:
 900            expression: the expression to validate.
 901            args: an optional list of items that was used to instantiate the expression, if it's a Func.
 902        """
 903        if self.error_level == ErrorLevel.IGNORE:
 904            return
 905
 906        for error_message in expression.error_messages(args):
 907            self.raise_error(error_message)
 908
 909    def _find_sql(self, start: Token, end: Token) -> str:
 910        return self.sql[self._find_token(start) : self._find_token(end) + len(end.text)]
 911
 912    def _find_token(self, token: Token) -> int:
 913        line = 1
 914        col = 1
 915        index = 0
 916
 917        while line < token.line or col < token.col:
 918            if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK:
 919                line += 1
 920                col = 1
 921            else:
 922                col += 1
 923            index += 1
 924
 925        return index
 926
 927    def _advance(self, times: int = 1) -> None:
 928        self._index += times
 929        self._curr = seq_get(self._tokens, self._index)
 930        self._next = seq_get(self._tokens, self._index + 1)
 931        if self._index > 0:
 932            self._prev = self._tokens[self._index - 1]
 933            self._prev_comments = self._prev.comments
 934        else:
 935            self._prev = None
 936            self._prev_comments = None
 937
 938    def _retreat(self, index: int) -> None:
 939        self._advance(index - self._index)
 940
 941    def _parse_command(self) -> exp.Expression:
 942        return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string())
 943
 944    def _parse_statement(self) -> t.Optional[exp.Expression]:
 945        if self._curr is None:
 946            return None
 947
 948        if self._match_set(self.STATEMENT_PARSERS):
 949            return self.STATEMENT_PARSERS[self._prev.token_type](self)
 950
 951        if self._match_set(Tokenizer.COMMANDS):
 952            return self._parse_command()
 953
 954        expression = self._parse_expression()
 955        expression = self._parse_set_operations(expression) if expression else self._parse_select()
 956
 957        self._parse_query_modifiers(expression)
 958        return expression
 959
 960    def _parse_drop(self, default_kind: t.Optional[str] = None) -> t.Optional[exp.Expression]:
 961        start = self._prev
 962        temporary = self._match(TokenType.TEMPORARY)
 963        materialized = self._match(TokenType.MATERIALIZED)
 964        kind = self._match_set(self.CREATABLES) and self._prev.text
 965        if not kind:
 966            if default_kind:
 967                kind = default_kind
 968            else:
 969                return self._parse_as_command(start)
 970
 971        return self.expression(
 972            exp.Drop,
 973            exists=self._parse_exists(),
 974            this=self._parse_table(schema=True),
 975            kind=kind,
 976            temporary=temporary,
 977            materialized=materialized,
 978            cascade=self._match(TokenType.CASCADE),
 979        )
 980
 981    def _parse_exists(self, not_: bool = False) -> t.Optional[bool]:
 982        return (
 983            self._match(TokenType.IF)
 984            and (not not_ or self._match(TokenType.NOT))
 985            and self._match(TokenType.EXISTS)
 986        )
 987
 988    def _parse_create(self) -> t.Optional[exp.Expression]:
 989        start = self._prev
 990        replace = self._prev.text.upper() == "REPLACE" or self._match_pair(
 991            TokenType.OR, TokenType.REPLACE
 992        )
 993        unique = self._match(TokenType.UNIQUE)
 994
 995        if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
 996            self._match(TokenType.TABLE)
 997
 998        properties = None
 999        create_token = self._match_set(self.CREATABLES) and self._prev
1000
1001        if not create_token:
1002            properties = self._parse_properties()  # exp.Properties.Location.POST_CREATE
1003            create_token = self._match_set(self.CREATABLES) and self._prev
1004
1005            if not properties or not create_token:
1006                return self._parse_as_command(start)
1007
1008        exists = self._parse_exists(not_=True)
1009        this = None
1010        expression = None
1011        indexes = None
1012        no_schema_binding = None
1013        begin = None
1014
1015        if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1016            this = self._parse_user_defined_function(kind=create_token.token_type)
1017            temp_properties = self._parse_properties()
1018            if properties and temp_properties:
1019                properties.expressions.extend(temp_properties.expressions)
1020            elif temp_properties:
1021                properties = temp_properties
1022
1023            self._match(TokenType.ALIAS)
1024            begin = self._match(TokenType.BEGIN)
1025            return_ = self._match_text_seq("RETURN")
1026            expression = self._parse_statement()
1027
1028            if return_:
1029                expression = self.expression(exp.Return, this=expression)
1030        elif create_token.token_type == TokenType.INDEX:
1031            this = self._parse_index()
1032        elif create_token.token_type in (
1033            TokenType.TABLE,
1034            TokenType.VIEW,
1035            TokenType.SCHEMA,
1036        ):
1037            table_parts = self._parse_table_parts(schema=True)
1038
1039            # exp.Properties.Location.POST_NAME
1040            if self._match(TokenType.COMMA):
1041                temp_properties = self._parse_properties(before=True)
1042                if properties and temp_properties:
1043                    properties.expressions.extend(temp_properties.expressions)
1044                elif temp_properties:
1045                    properties = temp_properties
1046
1047            this = self._parse_schema(this=table_parts)
1048
1049            # exp.Properties.Location.POST_SCHEMA and POST_WITH
1050            temp_properties = self._parse_properties()
1051            if properties and temp_properties:
1052                properties.expressions.extend(temp_properties.expressions)
1053            elif temp_properties:
1054                properties = temp_properties
1055
1056            self._match(TokenType.ALIAS)
1057
1058            # exp.Properties.Location.POST_ALIAS
1059            if not (
1060                self._match(TokenType.SELECT, advance=False)
1061                or self._match(TokenType.WITH, advance=False)
1062                or self._match(TokenType.L_PAREN, advance=False)
1063            ):
1064                temp_properties = self._parse_properties()
1065                if properties and temp_properties:
1066                    properties.expressions.extend(temp_properties.expressions)
1067                elif temp_properties:
1068                    properties = temp_properties
1069
1070            expression = self._parse_ddl_select()
1071
1072            if create_token.token_type == TokenType.TABLE:
1073                # exp.Properties.Location.POST_EXPRESSION
1074                temp_properties = self._parse_properties()
1075                if properties and temp_properties:
1076                    properties.expressions.extend(temp_properties.expressions)
1077                elif temp_properties:
1078                    properties = temp_properties
1079
1080                indexes = []
1081                while True:
1082                    index = self._parse_create_table_index()
1083
1084                    # exp.Properties.Location.POST_INDEX
1085                    if self._match(TokenType.PARTITION_BY, advance=False):
1086                        temp_properties = self._parse_properties()
1087                        if properties and temp_properties:
1088                            properties.expressions.extend(temp_properties.expressions)
1089                        elif temp_properties:
1090                            properties = temp_properties
1091
1092                    if not index:
1093                        break
1094                    else:
1095                        indexes.append(index)
1096            elif create_token.token_type == TokenType.VIEW:
1097                if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
1098                    no_schema_binding = True
1099
1100        return self.expression(
1101            exp.Create,
1102            this=this,
1103            kind=create_token.text,
1104            unique=unique,
1105            expression=expression,
1106            exists=exists,
1107            properties=properties,
1108            replace=replace,
1109            indexes=indexes,
1110            no_schema_binding=no_schema_binding,
1111            begin=begin,
1112        )
1113
1114    def _parse_property_before(self) -> t.Optional[exp.Expression]:
1115        self._match(TokenType.COMMA)
1116
1117        # parsers look to _prev for no/dual/default, so need to consume first
1118        self._match_text_seq("NO")
1119        self._match_text_seq("DUAL")
1120        self._match_text_seq("DEFAULT")
1121
1122        if self.PROPERTY_PARSERS.get(self._curr.text.upper()):
1123            return self.PROPERTY_PARSERS[self._curr.text.upper()](self)
1124
1125        return None
1126
1127    def _parse_property(self) -> t.Optional[exp.Expression]:
1128        if self._match_texts(self.PROPERTY_PARSERS):
1129            return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
1130
1131        if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET):
1132            return self._parse_character_set(default=True)
1133
1134        if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY):
1135            return self._parse_sortkey(compound=True)
1136
1137        if self._match_text_seq("SQL", "SECURITY"):
1138            return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER"))
1139
1140        assignment = self._match_pair(
1141            TokenType.VAR, TokenType.EQ, advance=False
1142        ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False)
1143
1144        if assignment:
1145            key = self._parse_var_or_string()
1146            self._match(TokenType.EQ)
1147            return self.expression(exp.Property, this=key, value=self._parse_column())
1148
1149        return None
1150
1151    def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression:
1152        self._match(TokenType.EQ)
1153        self._match(TokenType.ALIAS)
1154        return self.expression(
1155            exp_class,
1156            this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
1157        )
1158
1159    def _parse_properties(self, before=None) -> t.Optional[exp.Expression]:
1160        properties = []
1161
1162        while True:
1163            if before:
1164                identified_property = self._parse_property_before()
1165            else:
1166                identified_property = self._parse_property()
1167
1168            if not identified_property:
1169                break
1170            for p in ensure_collection(identified_property):
1171                properties.append(p)
1172
1173        if properties:
1174            return self.expression(exp.Properties, expressions=properties)
1175
1176        return None
1177
1178    def _parse_fallback(self, no=False) -> exp.Expression:
1179        self._match_text_seq("FALLBACK")
1180        return self.expression(
1181            exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
1182        )
1183
1184    def _parse_with_property(
1185        self,
1186    ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]:
1187        self._match(TokenType.WITH)
1188        if self._match(TokenType.L_PAREN, advance=False):
1189            return self._parse_wrapped_csv(self._parse_property)
1190
1191        if self._match_text_seq("JOURNAL"):
1192            return self._parse_withjournaltable()
1193
1194        if self._match_text_seq("DATA"):
1195            return self._parse_withdata(no=False)
1196        elif self._match_text_seq("NO", "DATA"):
1197            return self._parse_withdata(no=True)
1198
1199        if not self._next:
1200            return None
1201
1202        return self._parse_withisolatedloading()
1203
1204    # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
1205    def _parse_definer(self) -> t.Optional[exp.Expression]:
1206        self._match(TokenType.EQ)
1207
1208        user = self._parse_id_var()
1209        self._match(TokenType.PARAMETER)
1210        host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
1211
1212        if not user or not host:
1213            return None
1214
1215        return exp.DefinerProperty(this=f"{user}@{host}")
1216
1217    def _parse_withjournaltable(self) -> exp.Expression:
1218        self._match(TokenType.TABLE)
1219        self._match(TokenType.EQ)
1220        return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
1221
1222    def _parse_log(self, no=False) -> exp.Expression:
1223        self._match_text_seq("LOG")
1224        return self.expression(exp.LogProperty, no=no)
1225
1226    def _parse_journal(self, no=False, dual=False) -> exp.Expression:
1227        before = self._match_text_seq("BEFORE")
1228        self._match_text_seq("JOURNAL")
1229        return self.expression(exp.JournalProperty, no=no, dual=dual, before=before)
1230
1231    def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression:
1232        self._match_text_seq("NOT")
1233        self._match_text_seq("LOCAL")
1234        self._match_text_seq("AFTER", "JOURNAL")
1235        return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local)
1236
1237    def _parse_checksum(self) -> exp.Expression:
1238        self._match_text_seq("CHECKSUM")
1239        self._match(TokenType.EQ)
1240
1241        on = None
1242        if self._match(TokenType.ON):
1243            on = True
1244        elif self._match_text_seq("OFF"):
1245            on = False
1246        default = self._match(TokenType.DEFAULT)
1247
1248        return self.expression(
1249            exp.ChecksumProperty,
1250            on=on,
1251            default=default,
1252        )
1253
1254    def _parse_freespace(self) -> exp.Expression:
1255        self._match_text_seq("FREESPACE")
1256        self._match(TokenType.EQ)
1257        return self.expression(
1258            exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
1259        )
1260
1261    def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression:
1262        self._match_text_seq("MERGEBLOCKRATIO")
1263        if self._match(TokenType.EQ):
1264            return self.expression(
1265                exp.MergeBlockRatioProperty,
1266                this=self._parse_number(),
1267                percent=self._match(TokenType.PERCENT),
1268            )
1269        else:
1270            return self.expression(
1271                exp.MergeBlockRatioProperty,
1272                no=no,
1273                default=default,
1274            )
1275
1276    def _parse_datablocksize(self, default=None) -> exp.Expression:
1277        if default:
1278            self._match_text_seq("DATABLOCKSIZE")
1279            return self.expression(exp.DataBlocksizeProperty, default=True)
1280        elif self._match_texts(("MIN", "MINIMUM")):
1281            self._match_text_seq("DATABLOCKSIZE")
1282            return self.expression(exp.DataBlocksizeProperty, min=True)
1283        elif self._match_texts(("MAX", "MAXIMUM")):
1284            self._match_text_seq("DATABLOCKSIZE")
1285            return self.expression(exp.DataBlocksizeProperty, min=False)
1286
1287        self._match_text_seq("DATABLOCKSIZE")
1288        self._match(TokenType.EQ)
1289        size = self._parse_number()
1290        units = None
1291        if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
1292            units = self._prev.text
1293        return self.expression(exp.DataBlocksizeProperty, size=size, units=units)
1294
1295    def _parse_blockcompression(self) -> exp.Expression:
1296        self._match_text_seq("BLOCKCOMPRESSION")
1297        self._match(TokenType.EQ)
1298        always = self._match_text_seq("ALWAYS")
1299        manual = self._match_text_seq("MANUAL")
1300        never = self._match_text_seq("NEVER")
1301        default = self._match_text_seq("DEFAULT")
1302        autotemp = None
1303        if self._match_text_seq("AUTOTEMP"):
1304            autotemp = self._parse_schema()
1305
1306        return self.expression(
1307            exp.BlockCompressionProperty,
1308            always=always,
1309            manual=manual,
1310            never=never,
1311            default=default,
1312            autotemp=autotemp,
1313        )
1314
1315    def _parse_withisolatedloading(self) -> exp.Expression:
1316        no = self._match_text_seq("NO")
1317        concurrent = self._match_text_seq("CONCURRENT")
1318        self._match_text_seq("ISOLATED", "LOADING")
1319        for_all = self._match_text_seq("FOR", "ALL")
1320        for_insert = self._match_text_seq("FOR", "INSERT")
1321        for_none = self._match_text_seq("FOR", "NONE")
1322        return self.expression(
1323            exp.IsolatedLoadingProperty,
1324            no=no,
1325            concurrent=concurrent,
1326            for_all=for_all,
1327            for_insert=for_insert,
1328            for_none=for_none,
1329        )
1330
1331    def _parse_locking(self) -> exp.Expression:
1332        if self._match(TokenType.TABLE):
1333            kind = "TABLE"
1334        elif self._match(TokenType.VIEW):
1335            kind = "VIEW"
1336        elif self._match(TokenType.ROW):
1337            kind = "ROW"
1338        elif self._match_text_seq("DATABASE"):
1339            kind = "DATABASE"
1340        else:
1341            kind = None
1342
1343        if kind in ("DATABASE", "TABLE", "VIEW"):
1344            this = self._parse_table_parts()
1345        else:
1346            this = None
1347
1348        if self._match(TokenType.FOR):
1349            for_or_in = "FOR"
1350        elif self._match(TokenType.IN):
1351            for_or_in = "IN"
1352        else:
1353            for_or_in = None
1354
1355        if self._match_text_seq("ACCESS"):
1356            lock_type = "ACCESS"
1357        elif self._match_texts(("EXCL", "EXCLUSIVE")):
1358            lock_type = "EXCLUSIVE"
1359        elif self._match_text_seq("SHARE"):
1360            lock_type = "SHARE"
1361        elif self._match_text_seq("READ"):
1362            lock_type = "READ"
1363        elif self._match_text_seq("WRITE"):
1364            lock_type = "WRITE"
1365        elif self._match_text_seq("CHECKSUM"):
1366            lock_type = "CHECKSUM"
1367        else:
1368            lock_type = None
1369
1370        override = self._match_text_seq("OVERRIDE")
1371
1372        return self.expression(
1373            exp.LockingProperty,
1374            this=this,
1375            kind=kind,
1376            for_or_in=for_or_in,
1377            lock_type=lock_type,
1378            override=override,
1379        )
1380
1381    def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]:
1382        if self._match(TokenType.PARTITION_BY):
1383            return self._parse_csv(self._parse_conjunction)
1384        return []
1385
1386    def _parse_partitioned_by(self) -> exp.Expression:
1387        self._match(TokenType.EQ)
1388        return self.expression(
1389            exp.PartitionedByProperty,
1390            this=self._parse_schema() or self._parse_bracket(self._parse_field()),
1391        )
1392
1393    def _parse_withdata(self, no=False) -> exp.Expression:
1394        if self._match_text_seq("AND", "STATISTICS"):
1395            statistics = True
1396        elif self._match_text_seq("AND", "NO", "STATISTICS"):
1397            statistics = False
1398        else:
1399            statistics = None
1400
1401        return self.expression(exp.WithDataProperty, no=no, statistics=statistics)
1402
1403    def _parse_noprimaryindex(self) -> exp.Expression:
1404        self._match_text_seq("PRIMARY", "INDEX")
1405        return exp.NoPrimaryIndexProperty()
1406
1407    def _parse_oncommit(self) -> exp.Expression:
1408        self._match_text_seq("COMMIT", "PRESERVE", "ROWS")
1409        return exp.OnCommitProperty()
1410
1411    def _parse_distkey(self) -> exp.Expression:
1412        return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
1413
1414    def _parse_create_like(self) -> t.Optional[exp.Expression]:
1415        table = self._parse_table(schema=True)
1416        options = []
1417        while self._match_texts(("INCLUDING", "EXCLUDING")):
1418            this = self._prev.text.upper()
1419            id_var = self._parse_id_var()
1420
1421            if not id_var:
1422                return None
1423
1424            options.append(
1425                self.expression(
1426                    exp.Property,
1427                    this=this,
1428                    value=exp.Var(this=id_var.this.upper()),
1429                )
1430            )
1431        return self.expression(exp.LikeProperty, this=table, expressions=options)
1432
1433    def _parse_sortkey(self, compound: bool = False) -> exp.Expression:
1434        return self.expression(
1435            exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound
1436        )
1437
1438    def _parse_character_set(self, default: bool = False) -> exp.Expression:
1439        self._match(TokenType.EQ)
1440        return self.expression(
1441            exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default
1442        )
1443
1444    def _parse_returns(self) -> exp.Expression:
1445        value: t.Optional[exp.Expression]
1446        is_table = self._match(TokenType.TABLE)
1447
1448        if is_table:
1449            if self._match(TokenType.LT):
1450                value = self.expression(
1451                    exp.Schema,
1452                    this="TABLE",
1453                    expressions=self._parse_csv(self._parse_struct_kwargs),
1454                )
1455                if not self._match(TokenType.GT):
1456                    self.raise_error("Expecting >")
1457            else:
1458                value = self._parse_schema(exp.Var(this="TABLE"))
1459        else:
1460            value = self._parse_types()
1461
1462        return self.expression(exp.ReturnsProperty, this=value, is_table=is_table)
1463
1464    def _parse_temporary(self, global_=False) -> exp.Expression:
1465        self._match(TokenType.TEMPORARY)  # in case calling from "GLOBAL"
1466        return self.expression(exp.TemporaryProperty, global_=global_)
1467
1468    def _parse_describe(self) -> exp.Expression:
1469        kind = self._match_set(self.CREATABLES) and self._prev.text
1470        this = self._parse_table()
1471
1472        return self.expression(exp.Describe, this=this, kind=kind)
1473
1474    def _parse_insert(self) -> exp.Expression:
1475        overwrite = self._match(TokenType.OVERWRITE)
1476        local = self._match(TokenType.LOCAL)
1477
1478        this: t.Optional[exp.Expression]
1479
1480        alternative = None
1481        if self._match_text_seq("DIRECTORY"):
1482            this = self.expression(
1483                exp.Directory,
1484                this=self._parse_var_or_string(),
1485                local=local,
1486                row_format=self._parse_row_format(match_row=True),
1487            )
1488        else:
1489            if self._match(TokenType.OR):
1490                alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text
1491
1492            self._match(TokenType.INTO)
1493            self._match(TokenType.TABLE)
1494            this = self._parse_table(schema=True)
1495
1496        return self.expression(
1497            exp.Insert,
1498            this=this,
1499            exists=self._parse_exists(),
1500            partition=self._parse_partition(),
1501            expression=self._parse_ddl_select(),
1502            overwrite=overwrite,
1503            alternative=alternative,
1504        )
1505
1506    def _parse_row(self) -> t.Optional[exp.Expression]:
1507        if not self._match(TokenType.FORMAT):
1508            return None
1509        return self._parse_row_format()
1510
1511    def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]:
1512        if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
1513            return None
1514
1515        if self._match_text_seq("SERDE"):
1516            return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string())
1517
1518        self._match_text_seq("DELIMITED")
1519
1520        kwargs = {}
1521
1522        if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
1523            kwargs["fields"] = self._parse_string()
1524            if self._match_text_seq("ESCAPED", "BY"):
1525                kwargs["escaped"] = self._parse_string()
1526        if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
1527            kwargs["collection_items"] = self._parse_string()
1528        if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
1529            kwargs["map_keys"] = self._parse_string()
1530        if self._match_text_seq("LINES", "TERMINATED", "BY"):
1531            kwargs["lines"] = self._parse_string()
1532        if self._match_text_seq("NULL", "DEFINED", "AS"):
1533            kwargs["null"] = self._parse_string()
1534
1535        return self.expression(exp.RowFormatDelimitedProperty, **kwargs)  # type: ignore
1536
1537    def _parse_load_data(self) -> exp.Expression:
1538        local = self._match(TokenType.LOCAL)
1539        self._match_text_seq("INPATH")
1540        inpath = self._parse_string()
1541        overwrite = self._match(TokenType.OVERWRITE)
1542        self._match_pair(TokenType.INTO, TokenType.TABLE)
1543
1544        return self.expression(
1545            exp.LoadData,
1546            this=self._parse_table(schema=True),
1547            local=local,
1548            overwrite=overwrite,
1549            inpath=inpath,
1550            partition=self._parse_partition(),
1551            input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
1552            serde=self._match_text_seq("SERDE") and self._parse_string(),
1553        )
1554
1555    def _parse_delete(self) -> exp.Expression:
1556        self._match(TokenType.FROM)
1557
1558        return self.expression(
1559            exp.Delete,
1560            this=self._parse_table(schema=True),
1561            using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()),
1562            where=self._parse_where(),
1563        )
1564
1565    def _parse_update(self) -> exp.Expression:
1566        return self.expression(
1567            exp.Update,
1568            **{  # type: ignore
1569                "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS),
1570                "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality),
1571                "from": self._parse_from(),
1572                "where": self._parse_where(),
1573            },
1574        )
1575
1576    def _parse_uncache(self) -> exp.Expression:
1577        if not self._match(TokenType.TABLE):
1578            self.raise_error("Expecting TABLE after UNCACHE")
1579
1580        return self.expression(
1581            exp.Uncache,
1582            exists=self._parse_exists(),
1583            this=self._parse_table(schema=True),
1584        )
1585
1586    def _parse_cache(self) -> exp.Expression:
1587        lazy = self._match(TokenType.LAZY)
1588        self._match(TokenType.TABLE)
1589        table = self._parse_table(schema=True)
1590        options = []
1591
1592        if self._match(TokenType.OPTIONS):
1593            self._match_l_paren()
1594            k = self._parse_string()
1595            self._match(TokenType.EQ)
1596            v = self._parse_string()
1597            options = [k, v]
1598            self._match_r_paren()
1599
1600        self._match(TokenType.ALIAS)
1601        return self.expression(
1602            exp.Cache,
1603            this=table,
1604            lazy=lazy,
1605            options=options,
1606            expression=self._parse_select(nested=True),
1607        )
1608
1609    def _parse_partition(self) -> t.Optional[exp.Expression]:
1610        if not self._match(TokenType.PARTITION):
1611            return None
1612
1613        return self.expression(
1614            exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction)
1615        )
1616
1617    def _parse_value(self) -> exp.Expression:
1618        if self._match(TokenType.L_PAREN):
1619            expressions = self._parse_csv(self._parse_conjunction)
1620            self._match_r_paren()
1621            return self.expression(exp.Tuple, expressions=expressions)
1622
1623        # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows.
1624        # Source: https://prestodb.io/docs/current/sql/values.html
1625        return self.expression(exp.Tuple, expressions=[self._parse_conjunction()])
1626
1627    def _parse_select(
1628        self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True
1629    ) -> t.Optional[exp.Expression]:
1630        cte = self._parse_with()
1631        if cte:
1632            this = self._parse_statement()
1633
1634            if not this:
1635                self.raise_error("Failed to parse any statement following CTE")
1636                return cte
1637
1638            if "with" in this.arg_types:
1639                this.set("with", cte)
1640            else:
1641                self.raise_error(f"{this.key} does not support CTE")
1642                this = cte
1643        elif self._match(TokenType.SELECT):
1644            comments = self._prev_comments
1645
1646            hint = self._parse_hint()
1647            all_ = self._match(TokenType.ALL)
1648            distinct = self._match(TokenType.DISTINCT)
1649
1650            if distinct:
1651                distinct = self.expression(
1652                    exp.Distinct,
1653                    on=self._parse_value() if self._match(TokenType.ON) else None,
1654                )
1655
1656            if all_ and distinct:
1657                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
1658
1659            limit = self._parse_limit(top=True)
1660            expressions = self._parse_csv(self._parse_expression)
1661
1662            this = self.expression(
1663                exp.Select,
1664                hint=hint,
1665                distinct=distinct,
1666                expressions=expressions,
1667                limit=limit,
1668            )
1669            this.comments = comments
1670
1671            into = self._parse_into()
1672            if into:
1673                this.set("into", into)
1674
1675            from_ = self._parse_from()
1676            if from_:
1677                this.set("from", from_)
1678
1679            self._parse_query_modifiers(this)
1680        elif (table or nested) and self._match(TokenType.L_PAREN):
1681            this = self._parse_table() if table else self._parse_select(nested=True)
1682            self._parse_query_modifiers(this)
1683            this = self._parse_set_operations(this)
1684            self._match_r_paren()
1685
1686            # early return so that subquery unions aren't parsed again
1687            # SELECT * FROM (SELECT 1) UNION ALL SELECT 1
1688            # Union ALL should be a property of the top select node, not the subquery
1689            return self._parse_subquery(this, parse_alias=parse_subquery_alias)
1690        elif self._match(TokenType.VALUES):
1691            this = self.expression(
1692                exp.Values,
1693                expressions=self._parse_csv(self._parse_value),
1694                alias=self._parse_table_alias(),
1695            )
1696        else:
1697            this = None
1698
1699        return self._parse_set_operations(this)
1700
1701    def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]:
1702        if not skip_with_token and not self._match(TokenType.WITH):
1703            return None
1704
1705        recursive = self._match(TokenType.RECURSIVE)
1706
1707        expressions = []
1708        while True:
1709            expressions.append(self._parse_cte())
1710
1711            if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
1712                break
1713            else:
1714                self._match(TokenType.WITH)
1715
1716        return self.expression(exp.With, expressions=expressions, recursive=recursive)
1717
1718    def _parse_cte(self) -> exp.Expression:
1719        alias = self._parse_table_alias()
1720        if not alias or not alias.this:
1721            self.raise_error("Expected CTE to have alias")
1722
1723        self._match(TokenType.ALIAS)
1724
1725        return self.expression(
1726            exp.CTE,
1727            this=self._parse_wrapped(self._parse_statement),
1728            alias=alias,
1729        )
1730
1731    def _parse_table_alias(
1732        self, alias_tokens: t.Optional[t.Collection[TokenType]] = None
1733    ) -> t.Optional[exp.Expression]:
1734        any_token = self._match(TokenType.ALIAS)
1735        alias = self._parse_id_var(
1736            any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS
1737        )
1738        index = self._index
1739
1740        if self._match(TokenType.L_PAREN):
1741            columns = self._parse_csv(self._parse_function_parameter)
1742            self._match_r_paren() if columns else self._retreat(index)
1743        else:
1744            columns = None
1745
1746        if not alias and not columns:
1747            return None
1748
1749        return self.expression(exp.TableAlias, this=alias, columns=columns)
1750
1751    def _parse_subquery(
1752        self, this: t.Optional[exp.Expression], parse_alias: bool = True
1753    ) -> exp.Expression:
1754        return self.expression(
1755            exp.Subquery,
1756            this=this,
1757            pivots=self._parse_pivots(),
1758            alias=self._parse_table_alias() if parse_alias else None,
1759        )
1760
1761    def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None:
1762        if not isinstance(this, self.MODIFIABLES):
1763            return
1764
1765        table = isinstance(this, exp.Table)
1766
1767        while True:
1768            lateral = self._parse_lateral()
1769            join = self._parse_join()
1770            comma = None if table else self._match(TokenType.COMMA)
1771            if lateral:
1772                this.append("laterals", lateral)
1773            if join:
1774                this.append("joins", join)
1775            if comma:
1776                this.args["from"].append("expressions", self._parse_table())
1777            if not (lateral or join or comma):
1778                break
1779
1780        for key, parser in self.QUERY_MODIFIER_PARSERS.items():
1781            expression = parser(self)
1782
1783            if expression:
1784                this.set(key, expression)
1785
1786    def _parse_hint(self) -> t.Optional[exp.Expression]:
1787        if self._match(TokenType.HINT):
1788            hints = self._parse_csv(self._parse_function)
1789            if not self._match_pair(TokenType.STAR, TokenType.SLASH):
1790                self.raise_error("Expected */ after HINT")
1791            return self.expression(exp.Hint, expressions=hints)
1792
1793        return None
1794
1795    def _parse_into(self) -> t.Optional[exp.Expression]:
1796        if not self._match(TokenType.INTO):
1797            return None
1798
1799        temp = self._match(TokenType.TEMPORARY)
1800        unlogged = self._match(TokenType.UNLOGGED)
1801        self._match(TokenType.TABLE)
1802
1803        return self.expression(
1804            exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged
1805        )
1806
1807    def _parse_from(self) -> t.Optional[exp.Expression]:
1808        if not self._match(TokenType.FROM):
1809            return None
1810
1811        return self.expression(
1812            exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table)
1813        )
1814
1815    def _parse_match_recognize(self) -> t.Optional[exp.Expression]:
1816        if not self._match(TokenType.MATCH_RECOGNIZE):
1817            return None
1818        self._match_l_paren()
1819
1820        partition = self._parse_partition_by()
1821        order = self._parse_order()
1822        measures = (
1823            self._parse_alias(self._parse_conjunction())
1824            if self._match_text_seq("MEASURES")
1825            else None
1826        )
1827
1828        if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
1829            rows = exp.Var(this="ONE ROW PER MATCH")
1830        elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
1831            text = "ALL ROWS PER MATCH"
1832            if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
1833                text += f" SHOW EMPTY MATCHES"
1834            elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
1835                text += f" OMIT EMPTY MATCHES"
1836            elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
1837                text += f" WITH UNMATCHED ROWS"
1838            rows = exp.Var(this=text)
1839        else:
1840            rows = None
1841
1842        if self._match_text_seq("AFTER", "MATCH", "SKIP"):
1843            text = "AFTER MATCH SKIP"
1844            if self._match_text_seq("PAST", "LAST", "ROW"):
1845                text += f" PAST LAST ROW"
1846            elif self._match_text_seq("TO", "NEXT", "ROW"):
1847                text += f" TO NEXT ROW"
1848            elif self._match_text_seq("TO", "FIRST"):
1849                text += f" TO FIRST {self._advance_any().text}"  # type: ignore
1850            elif self._match_text_seq("TO", "LAST"):
1851                text += f" TO LAST {self._advance_any().text}"  # type: ignore
1852            after = exp.Var(this=text)
1853        else:
1854            after = None
1855
1856        if self._match_text_seq("PATTERN"):
1857            self._match_l_paren()
1858
1859            if not self._curr:
1860                self.raise_error("Expecting )", self._curr)
1861
1862            paren = 1
1863            start = self._curr
1864
1865            while self._curr and paren > 0:
1866                if self._curr.token_type == TokenType.L_PAREN:
1867                    paren += 1
1868                if self._curr.token_type == TokenType.R_PAREN:
1869                    paren -= 1
1870                end = self._prev
1871                self._advance()
1872            if paren > 0:
1873                self.raise_error("Expecting )", self._curr)
1874            pattern = exp.Var(this=self._find_sql(start, end))
1875        else:
1876            pattern = None
1877
1878        define = (
1879            self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None
1880        )
1881        self._match_r_paren()
1882
1883        return self.expression(
1884            exp.MatchRecognize,
1885            partition_by=partition,
1886            order=order,
1887            measures=measures,
1888            rows=rows,
1889            after=after,
1890            pattern=pattern,
1891            define=define,
1892        )
1893
1894    def _parse_lateral(self) -> t.Optional[exp.Expression]:
1895        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY)
1896        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
1897
1898        if outer_apply or cross_apply:
1899            this = self._parse_select(table=True)
1900            view = None
1901            outer = not cross_apply
1902        elif self._match(TokenType.LATERAL):
1903            this = self._parse_select(table=True)
1904            view = self._match(TokenType.VIEW)
1905            outer = self._match(TokenType.OUTER)
1906        else:
1907            return None
1908
1909        if not this:
1910            this = self._parse_function() or self._parse_id_var(any_token=False)
1911            while self._match(TokenType.DOT):
1912                this = exp.Dot(
1913                    this=this,
1914                    expression=self._parse_function() or self._parse_id_var(any_token=False),
1915                )
1916
1917        table_alias: t.Optional[exp.Expression]
1918
1919        if view:
1920            table = self._parse_id_var(any_token=False)
1921            columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
1922            table_alias = self.expression(exp.TableAlias, this=table, columns=columns)
1923        else:
1924            table_alias = self._parse_table_alias()
1925
1926        expression = self.expression(
1927            exp.Lateral,
1928            this=this,
1929            view=view,
1930            outer=outer,
1931            alias=table_alias,
1932        )
1933
1934        if outer_apply or cross_apply:
1935            return self.expression(exp.Join, this=expression, side=None if cross_apply else "LEFT")
1936
1937        return expression
1938
1939    def _parse_join_side_and_kind(
1940        self,
1941    ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
1942        return (
1943            self._match(TokenType.NATURAL) and self._prev,
1944            self._match_set(self.JOIN_SIDES) and self._prev,
1945            self._match_set(self.JOIN_KINDS) and self._prev,
1946        )
1947
1948    def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]:
1949        natural, side, kind = self._parse_join_side_and_kind()
1950
1951        if not skip_join_token and not self._match(TokenType.JOIN):
1952            return None
1953
1954        kwargs: t.Dict[
1955            str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]]
1956        ] = {"this": self._parse_table()}
1957
1958        if natural:
1959            kwargs["natural"] = True
1960        if side:
1961            kwargs["side"] = side.text
1962        if kind:
1963            kwargs["kind"] = kind.text
1964
1965        if self._match(TokenType.ON):
1966            kwargs["on"] = self._parse_conjunction()
1967        elif self._match(TokenType.USING):
1968            kwargs["using"] = self._parse_wrapped_id_vars()
1969
1970        return self.expression(exp.Join, **kwargs)  # type: ignore
1971
1972    def _parse_index(self) -> exp.Expression:
1973        index = self._parse_id_var()
1974        self._match(TokenType.ON)
1975        self._match(TokenType.TABLE)  # hive
1976
1977        return self.expression(
1978            exp.Index,
1979            this=index,
1980            table=self.expression(exp.Table, this=self._parse_id_var()),
1981            columns=self._parse_expression(),
1982        )
1983
1984    def _parse_create_table_index(self) -> t.Optional[exp.Expression]:
1985        unique = self._match(TokenType.UNIQUE)
1986        primary = self._match_text_seq("PRIMARY")
1987        amp = self._match_text_seq("AMP")
1988        if not self._match(TokenType.INDEX):
1989            return None
1990        index = self._parse_id_var()
1991        columns = None
1992        if self._match(TokenType.L_PAREN, advance=False):
1993            columns = self._parse_wrapped_csv(self._parse_column)
1994        return self.expression(
1995            exp.Index,
1996            this=index,
1997            columns=columns,
1998            unique=unique,
1999            primary=primary,
2000            amp=amp,
2001        )
2002
2003    def _parse_table_parts(self, schema: bool = False) -> exp.Expression:
2004        catalog = None
2005        db = None
2006        table = (not schema and self._parse_function()) or self._parse_id_var(any_token=False)
2007
2008        while self._match(TokenType.DOT):
2009            if catalog:
2010                # This allows nesting the table in arbitrarily many dot expressions if needed
2011                table = self.expression(exp.Dot, this=table, expression=self._parse_id_var())
2012            else:
2013                catalog = db
2014                db = table
2015                table = self._parse_id_var()
2016
2017        if not table:
2018            self.raise_error(f"Expected table name but got {self._curr}")
2019
2020        return self.expression(
2021            exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots()
2022        )
2023
2024    def _parse_table(
2025        self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
2026    ) -> t.Optional[exp.Expression]:
2027        lateral = self._parse_lateral()
2028
2029        if lateral:
2030            return lateral
2031
2032        unnest = self._parse_unnest()
2033
2034        if unnest:
2035            return unnest
2036
2037        values = self._parse_derived_table_values()
2038
2039        if values:
2040            return values
2041
2042        subquery = self._parse_select(table=True)
2043
2044        if subquery:
2045            return subquery
2046
2047        this = self._parse_table_parts(schema=schema)
2048
2049        if schema:
2050            return self._parse_schema(this=this)
2051
2052        if self.alias_post_tablesample:
2053            table_sample = self._parse_table_sample()
2054
2055        alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
2056
2057        if alias:
2058            this.set("alias", alias)
2059
2060        if not this.args.get("pivots"):
2061            this.set("pivots", self._parse_pivots())
2062
2063        if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
2064            this.set(
2065                "hints",
2066                self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)),
2067            )
2068            self._match_r_paren()
2069
2070        if not self.alias_post_tablesample:
2071            table_sample = self._parse_table_sample()
2072
2073        if table_sample:
2074            table_sample.set("this", this)
2075            this = table_sample
2076
2077        return this
2078
2079    def _parse_unnest(self) -> t.Optional[exp.Expression]:
2080        if not self._match(TokenType.UNNEST):
2081            return None
2082
2083        expressions = self._parse_wrapped_csv(self._parse_column)
2084        ordinality = bool(self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY))
2085        alias = self._parse_table_alias()
2086
2087        if alias and self.unnest_column_only:
2088            if alias.args.get("columns"):
2089                self.raise_error("Unexpected extra column alias in unnest.")
2090            alias.set("columns", [alias.this])
2091            alias.set("this", None)
2092
2093        offset = None
2094        if self._match_pair(TokenType.WITH, TokenType.OFFSET):
2095            self._match(TokenType.ALIAS)
2096            offset = self._parse_conjunction()
2097
2098        return self.expression(
2099            exp.Unnest,
2100            expressions=expressions,
2101            ordinality=ordinality,
2102            alias=alias,
2103            offset=offset,
2104        )
2105
2106    def _parse_derived_table_values(self) -> t.Optional[exp.Expression]:
2107        is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES)
2108        if not is_derived and not self._match(TokenType.VALUES):
2109            return None
2110
2111        expressions = self._parse_csv(self._parse_value)
2112
2113        if is_derived:
2114            self._match_r_paren()
2115
2116        return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias())
2117
2118    def _parse_table_sample(self) -> t.Optional[exp.Expression]:
2119        if not self._match(TokenType.TABLE_SAMPLE):
2120            return None
2121
2122        method = self._parse_var()
2123        bucket_numerator = None
2124        bucket_denominator = None
2125        bucket_field = None
2126        percent = None
2127        rows = None
2128        size = None
2129        seed = None
2130
2131        self._match_l_paren()
2132
2133        if self._match(TokenType.BUCKET):
2134            bucket_numerator = self._parse_number()
2135            self._match(TokenType.OUT_OF)
2136            bucket_denominator = bucket_denominator = self._parse_number()
2137            self._match(TokenType.ON)
2138            bucket_field = self._parse_field()
2139        else:
2140            num = self._parse_number()
2141
2142            if self._match(TokenType.PERCENT):
2143                percent = num
2144            elif self._match(TokenType.ROWS):
2145                rows = num
2146            else:
2147                size = num
2148
2149        self._match_r_paren()
2150
2151        if self._match(TokenType.SEED):
2152            seed = self._parse_wrapped(self._parse_number)
2153
2154        return self.expression(
2155            exp.TableSample,
2156            method=method,
2157            bucket_numerator=bucket_numerator,
2158            bucket_denominator=bucket_denominator,
2159            bucket_field=bucket_field,
2160            percent=percent,
2161            rows=rows,
2162            size=size,
2163            seed=seed,
2164        )
2165
2166    def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]:
2167        return list(iter(self._parse_pivot, None))
2168
2169    def _parse_pivot(self) -> t.Optional[exp.Expression]:
2170        index = self._index
2171
2172        if self._match(TokenType.PIVOT):
2173            unpivot = False
2174        elif self._match(TokenType.UNPIVOT):
2175            unpivot = True
2176        else:
2177            return None
2178
2179        expressions = []
2180        field = None
2181
2182        if not self._match(TokenType.L_PAREN):
2183            self._retreat(index)
2184            return None
2185
2186        if unpivot:
2187            expressions = self._parse_csv(self._parse_column)
2188        else:
2189            expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function()))
2190
2191        if not self._match(TokenType.FOR):
2192            self.raise_error("Expecting FOR")
2193
2194        value = self._parse_column()
2195
2196        if not self._match(TokenType.IN):
2197            self.raise_error("Expecting IN")
2198
2199        field = self._parse_in(value)
2200
2201        self._match_r_paren()
2202
2203        pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot)
2204
2205        if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False):
2206            pivot.set("alias", self._parse_table_alias())
2207
2208        return pivot
2209
2210    def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]:
2211        if not skip_where_token and not self._match(TokenType.WHERE):
2212            return None
2213
2214        return self.expression(
2215            exp.Where, comments=self._prev_comments, this=self._parse_conjunction()
2216        )
2217
2218    def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]:
2219        if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
2220            return None
2221
2222        elements = defaultdict(list)
2223
2224        while True:
2225            expressions = self._parse_csv(self._parse_conjunction)
2226            if expressions:
2227                elements["expressions"].extend(expressions)
2228
2229            grouping_sets = self._parse_grouping_sets()
2230            if grouping_sets:
2231                elements["grouping_sets"].extend(grouping_sets)
2232
2233            rollup = None
2234            cube = None
2235
2236            with_ = self._match(TokenType.WITH)
2237            if self._match(TokenType.ROLLUP):
2238                rollup = with_ or self._parse_wrapped_csv(self._parse_column)
2239                elements["rollup"].extend(ensure_list(rollup))
2240
2241            if self._match(TokenType.CUBE):
2242                cube = with_ or self._parse_wrapped_csv(self._parse_column)
2243                elements["cube"].extend(ensure_list(cube))
2244
2245            if not (expressions or grouping_sets or rollup or cube):
2246                break
2247
2248        return self.expression(exp.Group, **elements)  # type: ignore
2249
2250    def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
2251        if not self._match(TokenType.GROUPING_SETS):
2252            return None
2253
2254        return self._parse_wrapped_csv(self._parse_grouping_set)
2255
2256    def _parse_grouping_set(self) -> t.Optional[exp.Expression]:
2257        if self._match(TokenType.L_PAREN):
2258            grouping_set = self._parse_csv(self._parse_column)
2259            self._match_r_paren()
2260            return self.expression(exp.Tuple, expressions=grouping_set)
2261
2262        return self._parse_column()
2263
2264    def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]:
2265        if not skip_having_token and not self._match(TokenType.HAVING):
2266            return None
2267        return self.expression(exp.Having, this=self._parse_conjunction())
2268
2269    def _parse_qualify(self) -> t.Optional[exp.Expression]:
2270        if not self._match(TokenType.QUALIFY):
2271            return None
2272        return self.expression(exp.Qualify, this=self._parse_conjunction())
2273
2274    def _parse_order(
2275        self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
2276    ) -> t.Optional[exp.Expression]:
2277        if not skip_order_token and not self._match(TokenType.ORDER_BY):
2278            return this
2279
2280        return self.expression(
2281            exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
2282        )
2283
2284    def _parse_sort(
2285        self, token_type: TokenType, exp_class: t.Type[exp.Expression]
2286    ) -> t.Optional[exp.Expression]:
2287        if not self._match(token_type):
2288            return None
2289        return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered))
2290
2291    def _parse_ordered(self) -> exp.Expression:
2292        this = self._parse_conjunction()
2293        self._match(TokenType.ASC)
2294        is_desc = self._match(TokenType.DESC)
2295        is_nulls_first = self._match(TokenType.NULLS_FIRST)
2296        is_nulls_last = self._match(TokenType.NULLS_LAST)
2297        desc = is_desc or False
2298        asc = not desc
2299        nulls_first = is_nulls_first or False
2300        explicitly_null_ordered = is_nulls_first or is_nulls_last
2301        if (
2302            not explicitly_null_ordered
2303            and (
2304                (asc and self.null_ordering == "nulls_are_small")
2305                or (desc and self.null_ordering != "nulls_are_small")
2306            )
2307            and self.null_ordering != "nulls_are_last"
2308        ):
2309            nulls_first = True
2310
2311        return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first)
2312
2313    def _parse_limit(
2314        self, this: t.Optional[exp.Expression] = None, top: bool = False
2315    ) -> t.Optional[exp.Expression]:
2316        if self._match(TokenType.TOP if top else TokenType.LIMIT):
2317            limit_paren = self._match(TokenType.L_PAREN)
2318            limit_exp = self.expression(
2319                exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term()
2320            )
2321
2322            if limit_paren:
2323                self._match_r_paren()
2324
2325            return limit_exp
2326
2327        if self._match(TokenType.FETCH):
2328            direction = self._match_set((TokenType.FIRST, TokenType.NEXT))
2329            direction = self._prev.text if direction else "FIRST"
2330            count = self._parse_number()
2331            self._match_set((TokenType.ROW, TokenType.ROWS))
2332            self._match(TokenType.ONLY)
2333            return self.expression(exp.Fetch, direction=direction, count=count)
2334
2335        return this
2336
2337    def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2338        if not self._match_set((TokenType.OFFSET, TokenType.COMMA)):
2339            return this
2340
2341        count = self._parse_number()
2342        self._match_set((TokenType.ROW, TokenType.ROWS))
2343        return self.expression(exp.Offset, this=this, expression=count)
2344
2345    def _parse_lock(self) -> t.Optional[exp.Expression]:
2346        if self._match_text_seq("FOR", "UPDATE"):
2347            return self.expression(exp.Lock, update=True)
2348        if self._match_text_seq("FOR", "SHARE"):
2349            return self.expression(exp.Lock, update=False)
2350
2351        return None
2352
2353    def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2354        if not self._match_set(self.SET_OPERATIONS):
2355            return this
2356
2357        token_type = self._prev.token_type
2358
2359        if token_type == TokenType.UNION:
2360            expression = exp.Union
2361        elif token_type == TokenType.EXCEPT:
2362            expression = exp.Except
2363        else:
2364            expression = exp.Intersect
2365
2366        return self.expression(
2367            expression,
2368            this=this,
2369            distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
2370            expression=self._parse_set_operations(self._parse_select(nested=True)),
2371        )
2372
2373    def _parse_expression(self) -> t.Optional[exp.Expression]:
2374        return self._parse_alias(self._parse_conjunction())
2375
2376    def _parse_conjunction(self) -> t.Optional[exp.Expression]:
2377        return self._parse_tokens(self._parse_equality, self.CONJUNCTION)
2378
2379    def _parse_equality(self) -> t.Optional[exp.Expression]:
2380        return self._parse_tokens(self._parse_comparison, self.EQUALITY)
2381
2382    def _parse_comparison(self) -> t.Optional[exp.Expression]:
2383        return self._parse_tokens(self._parse_range, self.COMPARISON)
2384
2385    def _parse_range(self) -> t.Optional[exp.Expression]:
2386        this = self._parse_bitwise()
2387        negate = self._match(TokenType.NOT)
2388
2389        if self._match_set(self.RANGE_PARSERS):
2390            this = self.RANGE_PARSERS[self._prev.token_type](self, this)
2391        elif self._match(TokenType.ISNULL):
2392            this = self.expression(exp.Is, this=this, expression=exp.Null())
2393
2394        # Postgres supports ISNULL and NOTNULL for conditions.
2395        # https://blog.andreiavram.ro/postgresql-null-composite-type/
2396        if self._match(TokenType.NOTNULL):
2397            this = self.expression(exp.Is, this=this, expression=exp.Null())
2398            this = self.expression(exp.Not, this=this)
2399
2400        if negate:
2401            this = self.expression(exp.Not, this=this)
2402
2403        if self._match(TokenType.IS):
2404            this = self._parse_is(this)
2405
2406        return this
2407
2408    def _parse_is(self, this: t.Optional[exp.Expression]) -> exp.Expression:
2409        negate = self._match(TokenType.NOT)
2410        if self._match(TokenType.DISTINCT_FROM):
2411            klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ
2412            return self.expression(klass, this=this, expression=self._parse_expression())
2413
2414        this = self.expression(
2415            exp.Is,
2416            this=this,
2417            expression=self._parse_null() or self._parse_boolean(),
2418        )
2419        return self.expression(exp.Not, this=this) if negate else this
2420
2421    def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression:
2422        unnest = self._parse_unnest()
2423        if unnest:
2424            this = self.expression(exp.In, this=this, unnest=unnest)
2425        elif self._match(TokenType.L_PAREN):
2426            expressions = self._parse_csv(self._parse_select_or_expression)
2427
2428            if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable):
2429                this = self.expression(exp.In, this=this, query=expressions[0])
2430            else:
2431                this = self.expression(exp.In, this=this, expressions=expressions)
2432
2433            self._match_r_paren()
2434        else:
2435            this = self.expression(exp.In, this=this, field=self._parse_field())
2436
2437        return this
2438
2439    def _parse_between(self, this: exp.Expression) -> exp.Expression:
2440        low = self._parse_bitwise()
2441        self._match(TokenType.AND)
2442        high = self._parse_bitwise()
2443        return self.expression(exp.Between, this=this, low=low, high=high)
2444
2445    def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2446        if not self._match(TokenType.ESCAPE):
2447            return this
2448        return self.expression(exp.Escape, this=this, expression=self._parse_string())
2449
2450    def _parse_bitwise(self) -> t.Optional[exp.Expression]:
2451        this = self._parse_term()
2452
2453        while True:
2454            if self._match_set(self.BITWISE):
2455                this = self.expression(
2456                    self.BITWISE[self._prev.token_type],
2457                    this=this,
2458                    expression=self._parse_term(),
2459                )
2460            elif self._match_pair(TokenType.LT, TokenType.LT):
2461                this = self.expression(
2462                    exp.BitwiseLeftShift, this=this, expression=self._parse_term()
2463                )
2464            elif self._match_pair(TokenType.GT, TokenType.GT):
2465                this = self.expression(
2466                    exp.BitwiseRightShift, this=this, expression=self._parse_term()
2467                )
2468            else:
2469                break
2470
2471        return this
2472
2473    def _parse_term(self) -> t.Optional[exp.Expression]:
2474        return self._parse_tokens(self._parse_factor, self.TERM)
2475
2476    def _parse_factor(self) -> t.Optional[exp.Expression]:
2477        return self._parse_tokens(self._parse_unary, self.FACTOR)
2478
2479    def _parse_unary(self) -> t.Optional[exp.Expression]:
2480        if self._match_set(self.UNARY_PARSERS):
2481            return self.UNARY_PARSERS[self._prev.token_type](self)
2482        return self._parse_at_time_zone(self._parse_type())
2483
2484    def _parse_type(self) -> t.Optional[exp.Expression]:
2485        if self._match(TokenType.INTERVAL):
2486            return self.expression(exp.Interval, this=self._parse_term(), unit=self._parse_var())
2487
2488        index = self._index
2489        type_token = self._parse_types(check_func=True)
2490        this = self._parse_column()
2491
2492        if type_token:
2493            if this and not isinstance(this, exp.Star):
2494                return self.expression(exp.Cast, this=this, to=type_token)
2495            if not type_token.args.get("expressions"):
2496                self._retreat(index)
2497                return self._parse_column()
2498            return type_token
2499
2500        return this
2501
2502    def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]:
2503        index = self._index
2504
2505        prefix = self._match_text_seq("SYSUDTLIB", ".")
2506
2507        if not self._match_set(self.TYPE_TOKENS):
2508            return None
2509
2510        type_token = self._prev.token_type
2511
2512        if type_token == TokenType.PSEUDO_TYPE:
2513            return self.expression(exp.PseudoType, this=self._prev.text)
2514
2515        nested = type_token in self.NESTED_TYPE_TOKENS
2516        is_struct = type_token == TokenType.STRUCT
2517        expressions = None
2518        maybe_func = False
2519
2520        if self._match(TokenType.L_PAREN):
2521            if is_struct:
2522                expressions = self._parse_csv(self._parse_struct_kwargs)
2523            elif nested:
2524                expressions = self._parse_csv(self._parse_types)
2525            else:
2526                expressions = self._parse_csv(self._parse_conjunction)
2527
2528            if not expressions:
2529                self._retreat(index)
2530                return None
2531
2532            self._match_r_paren()
2533            maybe_func = True
2534
2535        if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2536            this = exp.DataType(
2537                this=exp.DataType.Type.ARRAY,
2538                expressions=[exp.DataType.build(type_token.value, expressions=expressions)],
2539                nested=True,
2540            )
2541
2542            while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2543                this = exp.DataType(
2544                    this=exp.DataType.Type.ARRAY,
2545                    expressions=[this],
2546                    nested=True,
2547                )
2548
2549            return this
2550
2551        if self._match(TokenType.L_BRACKET):
2552            self._retreat(index)
2553            return None
2554
2555        values: t.Optional[t.List[t.Optional[exp.Expression]]] = None
2556        if nested and self._match(TokenType.LT):
2557            if is_struct:
2558                expressions = self._parse_csv(self._parse_struct_kwargs)
2559            else:
2560                expressions = self._parse_csv(self._parse_types)
2561
2562            if not self._match(TokenType.GT):
2563                self.raise_error("Expecting >")
2564
2565            if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)):
2566                values = self._parse_csv(self._parse_conjunction)
2567                self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN))
2568
2569        value: t.Optional[exp.Expression] = None
2570        if type_token in self.TIMESTAMPS:
2571            if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ:
2572                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions)
2573            elif (
2574                self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ
2575            ):
2576                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions)
2577            elif self._match(TokenType.WITHOUT_TIME_ZONE):
2578                if type_token == TokenType.TIME:
2579                    value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions)
2580                else:
2581                    value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2582
2583            maybe_func = maybe_func and value is None
2584
2585            if value is None:
2586                value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2587        elif type_token == TokenType.INTERVAL:
2588            value = self.expression(exp.Interval, unit=self._parse_var())
2589
2590        if maybe_func and check_func:
2591            index2 = self._index
2592            peek = self._parse_string()
2593
2594            if not peek:
2595                self._retreat(index)
2596                return None
2597
2598            self._retreat(index2)
2599
2600        if value:
2601            return value
2602
2603        return exp.DataType(
2604            this=exp.DataType.Type[type_token.value.upper()],
2605            expressions=expressions,
2606            nested=nested,
2607            values=values,
2608            prefix=prefix,
2609        )
2610
2611    def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]:
2612        if self._curr and self._curr.token_type in self.TYPE_TOKENS:
2613            return self._parse_types()
2614
2615        this = self._parse_id_var()
2616        self._match(TokenType.COLON)
2617        data_type = self._parse_types()
2618
2619        if not data_type:
2620            return None
2621        return self.expression(exp.StructKwarg, this=this, expression=data_type)
2622
2623    def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2624        if not self._match(TokenType.AT_TIME_ZONE):
2625            return this
2626        return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary())
2627
2628    def _parse_column(self) -> t.Optional[exp.Expression]:
2629        this = self._parse_field()
2630        if isinstance(this, exp.Identifier):
2631            this = self.expression(exp.Column, this=this)
2632        elif not this:
2633            return self._parse_bracket(this)
2634        this = self._parse_bracket(this)
2635
2636        while self._match_set(self.COLUMN_OPERATORS):
2637            op_token = self._prev.token_type
2638            op = self.COLUMN_OPERATORS.get(op_token)
2639
2640            if op_token == TokenType.DCOLON:
2641                field = self._parse_types()
2642                if not field:
2643                    self.raise_error("Expected type")
2644            elif op:
2645                self._advance()
2646                value = self._prev.text
2647                field = (
2648                    exp.Literal.number(value)
2649                    if self._prev.token_type == TokenType.NUMBER
2650                    else exp.Literal.string(value)
2651                )
2652            else:
2653                field = self._parse_star() or self._parse_function() or self._parse_id_var()
2654
2655            if isinstance(field, exp.Func):
2656                # bigquery allows function calls like x.y.count(...)
2657                # SAFE.SUBSTR(...)
2658                # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
2659                this = self._replace_columns_with_dots(this)
2660
2661            if op:
2662                this = op(self, this, field)
2663            elif isinstance(this, exp.Column) and not this.args.get("catalog"):
2664                this = self.expression(
2665                    exp.Column,
2666                    this=field,
2667                    table=this.this,
2668                    db=this.args.get("table"),
2669                    catalog=this.args.get("db"),
2670                )
2671            else:
2672                this = self.expression(exp.Dot, this=this, expression=field)
2673            this = self._parse_bracket(this)
2674
2675        return this
2676
2677    def _parse_primary(self) -> t.Optional[exp.Expression]:
2678        if self._match_set(self.PRIMARY_PARSERS):
2679            token_type = self._prev.token_type
2680            primary = self.PRIMARY_PARSERS[token_type](self, self._prev)
2681
2682            if token_type == TokenType.STRING:
2683                expressions = [primary]
2684                while self._match(TokenType.STRING):
2685                    expressions.append(exp.Literal.string(self._prev.text))
2686                if len(expressions) > 1:
2687                    return self.expression(exp.Concat, expressions=expressions)
2688            return primary
2689
2690        if self._match_pair(TokenType.DOT, TokenType.NUMBER):
2691            return exp.Literal.number(f"0.{self._prev.text}")
2692
2693        if self._match(TokenType.L_PAREN):
2694            comments = self._prev_comments
2695            query = self._parse_select()
2696
2697            if query:
2698                expressions = [query]
2699            else:
2700                expressions = self._parse_csv(
2701                    lambda: self._parse_alias(self._parse_conjunction(), explicit=True)
2702                )
2703
2704            this = seq_get(expressions, 0)
2705            self._parse_query_modifiers(this)
2706            self._match_r_paren()
2707
2708            if isinstance(this, exp.Subqueryable):
2709                this = self._parse_set_operations(
2710                    self._parse_subquery(this=this, parse_alias=False)
2711                )
2712            elif len(expressions) > 1:
2713                this = self.expression(exp.Tuple, expressions=expressions)
2714            else:
2715                this = self.expression(exp.Paren, this=this)
2716
2717            if this and comments:
2718                this.comments = comments
2719
2720            return this
2721
2722        return None
2723
2724    def _parse_field(self, any_token: bool = False) -> t.Optional[exp.Expression]:
2725        return self._parse_primary() or self._parse_function() or self._parse_id_var(any_token)
2726
2727    def _parse_function(
2728        self, functions: t.Optional[t.Dict[str, t.Callable]] = None
2729    ) -> t.Optional[exp.Expression]:
2730        if not self._curr:
2731            return None
2732
2733        token_type = self._curr.token_type
2734
2735        if self._match_set(self.NO_PAREN_FUNCTION_PARSERS):
2736            return self.NO_PAREN_FUNCTION_PARSERS[token_type](self)
2737
2738        if not self._next or self._next.token_type != TokenType.L_PAREN:
2739            if token_type in self.NO_PAREN_FUNCTIONS:
2740                self._advance()
2741                return self.expression(self.NO_PAREN_FUNCTIONS[token_type])
2742
2743            return None
2744
2745        if token_type not in self.FUNC_TOKENS:
2746            return None
2747
2748        this = self._curr.text
2749        upper = this.upper()
2750        self._advance(2)
2751
2752        parser = self.FUNCTION_PARSERS.get(upper)
2753
2754        if parser:
2755            this = parser(self)
2756        else:
2757            subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
2758
2759            if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH):
2760                this = self.expression(subquery_predicate, this=self._parse_select())
2761                self._match_r_paren()
2762                return this
2763
2764            if functions is None:
2765                functions = self.FUNCTIONS
2766
2767            function = functions.get(upper)
2768            args = self._parse_csv(self._parse_lambda)
2769
2770            if function:
2771                # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the
2772                # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists.
2773                if count_params(function) == 2:
2774                    params = None
2775                    if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN):
2776                        params = self._parse_csv(self._parse_lambda)
2777
2778                    this = function(args, params)
2779                else:
2780                    this = function(args)
2781
2782                self.validate_expression(this, args)
2783            else:
2784                this = self.expression(exp.Anonymous, this=this, expressions=args)
2785
2786        self._match_r_paren(this)
2787        return self._parse_window(this)
2788
2789    def _parse_function_parameter(self) -> t.Optional[exp.Expression]:
2790        return self._parse_column_def(self._parse_id_var())
2791
2792    def _parse_user_defined_function(
2793        self, kind: t.Optional[TokenType] = None
2794    ) -> t.Optional[exp.Expression]:
2795        this = self._parse_id_var()
2796
2797        while self._match(TokenType.DOT):
2798            this = self.expression(exp.Dot, this=this, expression=self._parse_id_var())
2799
2800        if not self._match(TokenType.L_PAREN):
2801            return this
2802
2803        expressions = self._parse_csv(self._parse_function_parameter)
2804        self._match_r_paren()
2805        return self.expression(
2806            exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True
2807        )
2808
2809    def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]:
2810        literal = self._parse_primary()
2811        if literal:
2812            return self.expression(exp.Introducer, this=token.text, expression=literal)
2813
2814        return self.expression(exp.Identifier, this=token.text)
2815
2816    def _parse_national(self, token: Token) -> exp.Expression:
2817        return self.expression(exp.National, this=exp.Literal.string(token.text))
2818
2819    def _parse_session_parameter(self) -> exp.Expression:
2820        kind = None
2821        this = self._parse_id_var() or self._parse_primary()
2822
2823        if this and self._match(TokenType.DOT):
2824            kind = this.name
2825            this = self._parse_var() or self._parse_primary()
2826
2827        return self.expression(exp.SessionParameter, this=this, kind=kind)
2828
2829    def _parse_lambda(self) -> t.Optional[exp.Expression]:
2830        index = self._index
2831
2832        if self._match(TokenType.L_PAREN):
2833            expressions = self._parse_csv(self._parse_id_var)
2834
2835            if not self._match(TokenType.R_PAREN):
2836                self._retreat(index)
2837        else:
2838            expressions = [self._parse_id_var()]
2839
2840        if self._match_set(self.LAMBDAS):
2841            return self.LAMBDAS[self._prev.token_type](self, expressions)
2842
2843        self._retreat(index)
2844
2845        this: t.Optional[exp.Expression]
2846
2847        if self._match(TokenType.DISTINCT):
2848            this = self.expression(
2849                exp.Distinct, expressions=self._parse_csv(self._parse_conjunction)
2850            )
2851        else:
2852            this = self._parse_select_or_expression()
2853
2854        if self._match(TokenType.IGNORE_NULLS):
2855            this = self.expression(exp.IgnoreNulls, this=this)
2856        else:
2857            self._match(TokenType.RESPECT_NULLS)
2858
2859        return self._parse_limit(self._parse_order(this))
2860
2861    def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2862        index = self._index
2863        if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT):
2864            self._retreat(index)
2865            return this
2866
2867        args = self._parse_csv(
2868            lambda: self._parse_constraint()
2869            or self._parse_column_def(self._parse_field(any_token=True))
2870        )
2871        self._match_r_paren()
2872        return self.expression(exp.Schema, this=this, expressions=args)
2873
2874    def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2875        kind = self._parse_types()
2876
2877        if self._match_text_seq("FOR", "ORDINALITY"):
2878            return self.expression(exp.ColumnDef, this=this, ordinality=True)
2879
2880        constraints = []
2881        while True:
2882            constraint = self._parse_column_constraint()
2883            if not constraint:
2884                break
2885            constraints.append(constraint)
2886
2887        if not kind and not constraints:
2888            return this
2889
2890        return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints)
2891
2892    def _parse_auto_increment(self) -> exp.Expression:
2893        start = None
2894        increment = None
2895
2896        if self._match(TokenType.L_PAREN, advance=False):
2897            args = self._parse_wrapped_csv(self._parse_bitwise)
2898            start = seq_get(args, 0)
2899            increment = seq_get(args, 1)
2900        elif self._match_text_seq("START"):
2901            start = self._parse_bitwise()
2902            self._match_text_seq("INCREMENT")
2903            increment = self._parse_bitwise()
2904
2905        if start and increment:
2906            return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment)
2907
2908        return exp.AutoIncrementColumnConstraint()
2909
2910    def _parse_compress(self) -> exp.Expression:
2911        if self._match(TokenType.L_PAREN, advance=False):
2912            return self.expression(
2913                exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise)
2914            )
2915
2916        return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise())
2917
2918    def _parse_generated_as_identity(self) -> exp.Expression:
2919        if self._match(TokenType.BY_DEFAULT):
2920            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False)
2921        else:
2922            self._match_text_seq("ALWAYS")
2923            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True)
2924
2925        self._match_text_seq("AS", "IDENTITY")
2926        if self._match(TokenType.L_PAREN):
2927            if self._match_text_seq("START", "WITH"):
2928                this.set("start", self._parse_bitwise())
2929            if self._match_text_seq("INCREMENT", "BY"):
2930                this.set("increment", self._parse_bitwise())
2931            if self._match_text_seq("MINVALUE"):
2932                this.set("minvalue", self._parse_bitwise())
2933            if self._match_text_seq("MAXVALUE"):
2934                this.set("maxvalue", self._parse_bitwise())
2935
2936            if self._match_text_seq("CYCLE"):
2937                this.set("cycle", True)
2938            elif self._match_text_seq("NO", "CYCLE"):
2939                this.set("cycle", False)
2940
2941            self._match_r_paren()
2942
2943        return this
2944
2945    def _parse_inline(self) -> t.Optional[exp.Expression]:
2946        self._match_text_seq("LENGTH")
2947        return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise())
2948
2949    def _parse_not_constraint(self) -> t.Optional[exp.Expression]:
2950        if self._match_text_seq("NULL"):
2951            return self.expression(exp.NotNullColumnConstraint)
2952        if self._match_text_seq("CASESPECIFIC"):
2953            return self.expression(exp.CaseSpecificColumnConstraint, not_=True)
2954        return None
2955
2956    def _parse_column_constraint(self) -> t.Optional[exp.Expression]:
2957        this = self._parse_references()
2958        if this:
2959            return this
2960
2961        if self._match(TokenType.CONSTRAINT):
2962            this = self._parse_id_var()
2963
2964        if self._match_texts(self.CONSTRAINT_PARSERS):
2965            return self.expression(
2966                exp.ColumnConstraint,
2967                this=this,
2968                kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self),
2969            )
2970
2971        return this
2972
2973    def _parse_constraint(self) -> t.Optional[exp.Expression]:
2974        if not self._match(TokenType.CONSTRAINT):
2975            return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS)
2976
2977        this = self._parse_id_var()
2978        expressions = []
2979
2980        while True:
2981            constraint = self._parse_unnamed_constraint() or self._parse_function()
2982            if not constraint:
2983                break
2984            expressions.append(constraint)
2985
2986        return self.expression(exp.Constraint, this=this, expressions=expressions)
2987
2988    def _parse_unnamed_constraint(
2989        self, constraints: t.Optional[t.Collection[str]] = None
2990    ) -> t.Optional[exp.Expression]:
2991        if not self._match_texts(constraints or self.CONSTRAINT_PARSERS):
2992            return None
2993
2994        constraint = self._prev.text.upper()
2995        if constraint not in self.CONSTRAINT_PARSERS:
2996            self.raise_error(f"No parser found for schema constraint {constraint}.")
2997
2998        return self.CONSTRAINT_PARSERS[constraint](self)
2999
3000    def _parse_unique(self) -> exp.Expression:
3001        if not self._match(TokenType.L_PAREN, advance=False):
3002            return self.expression(exp.UniqueColumnConstraint)
3003        return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars())
3004
3005    def _parse_key_constraint_options(self) -> t.List[str]:
3006        options = []
3007        while True:
3008            if not self._curr:
3009                break
3010
3011            if self._match(TokenType.ON):
3012                action = None
3013                on = self._advance_any() and self._prev.text
3014
3015                if self._match(TokenType.NO_ACTION):
3016                    action = "NO ACTION"
3017                elif self._match(TokenType.CASCADE):
3018                    action = "CASCADE"
3019                elif self._match_pair(TokenType.SET, TokenType.NULL):
3020                    action = "SET NULL"
3021                elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
3022                    action = "SET DEFAULT"
3023                else:
3024                    self.raise_error("Invalid key constraint")
3025
3026                options.append(f"ON {on} {action}")
3027            elif self._match_text_seq("NOT", "ENFORCED"):
3028                options.append("NOT ENFORCED")
3029            elif self._match_text_seq("DEFERRABLE"):
3030                options.append("DEFERRABLE")
3031            elif self._match_text_seq("INITIALLY", "DEFERRED"):
3032                options.append("INITIALLY DEFERRED")
3033            elif self._match_text_seq("NORELY"):
3034                options.append("NORELY")
3035            elif self._match_text_seq("MATCH", "FULL"):
3036                options.append("MATCH FULL")
3037            else:
3038                break
3039
3040        return options
3041
3042    def _parse_references(self) -> t.Optional[exp.Expression]:
3043        if not self._match(TokenType.REFERENCES):
3044            return None
3045
3046        expressions = None
3047        this = self._parse_id_var()
3048
3049        if self._match(TokenType.L_PAREN, advance=False):
3050            expressions = self._parse_wrapped_id_vars()
3051
3052        options = self._parse_key_constraint_options()
3053        return self.expression(exp.Reference, this=this, expressions=expressions, options=options)
3054
3055    def _parse_foreign_key(self) -> exp.Expression:
3056        expressions = self._parse_wrapped_id_vars()
3057        reference = self._parse_references()
3058        options = {}
3059
3060        while self._match(TokenType.ON):
3061            if not self._match_set((TokenType.DELETE, TokenType.UPDATE)):
3062                self.raise_error("Expected DELETE or UPDATE")
3063
3064            kind = self._prev.text.lower()
3065
3066            if self._match(TokenType.NO_ACTION):
3067                action = "NO ACTION"
3068            elif self._match(TokenType.SET):
3069                self._match_set((TokenType.NULL, TokenType.DEFAULT))
3070                action = "SET " + self._prev.text.upper()
3071            else:
3072                self._advance()
3073                action = self._prev.text.upper()
3074
3075            options[kind] = action
3076
3077        return self.expression(
3078            exp.ForeignKey, expressions=expressions, reference=reference, **options  # type: ignore
3079        )
3080
3081    def _parse_primary_key(self) -> exp.Expression:
3082        desc = (
3083            self._match_set((TokenType.ASC, TokenType.DESC))
3084            and self._prev.token_type == TokenType.DESC
3085        )
3086
3087        if not self._match(TokenType.L_PAREN, advance=False):
3088            return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc)
3089
3090        expressions = self._parse_wrapped_id_vars()
3091        options = self._parse_key_constraint_options()
3092        return self.expression(exp.PrimaryKey, expressions=expressions, options=options)
3093
3094    def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3095        if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)):
3096            return this
3097
3098        bracket_kind = self._prev.token_type
3099        expressions: t.List[t.Optional[exp.Expression]]
3100
3101        if self._match(TokenType.COLON):
3102            expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())]
3103        else:
3104            expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction()))
3105
3106        # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
3107        if bracket_kind == TokenType.L_BRACE:
3108            this = self.expression(exp.Struct, expressions=expressions)
3109        elif not this or this.name.upper() == "ARRAY":
3110            this = self.expression(exp.Array, expressions=expressions)
3111        else:
3112            expressions = apply_index_offset(expressions, -self.index_offset)
3113            this = self.expression(exp.Bracket, this=this, expressions=expressions)
3114
3115        if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET:
3116            self.raise_error("Expected ]")
3117        elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE:
3118            self.raise_error("Expected }")
3119
3120        this.comments = self._prev_comments
3121        return self._parse_bracket(this)
3122
3123    def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3124        if self._match(TokenType.COLON):
3125            return self.expression(exp.Slice, this=this, expression=self._parse_conjunction())
3126        return this
3127
3128    def _parse_case(self) -> t.Optional[exp.Expression]:
3129        ifs = []
3130        default = None
3131
3132        expression = self._parse_conjunction()
3133
3134        while self._match(TokenType.WHEN):
3135            this = self._parse_conjunction()
3136            self._match(TokenType.THEN)
3137            then = self._parse_conjunction()
3138            ifs.append(self.expression(exp.If, this=this, true=then))
3139
3140        if self._match(TokenType.ELSE):
3141            default = self._parse_conjunction()
3142
3143        if not self._match(TokenType.END):
3144            self.raise_error("Expected END after CASE", self._prev)
3145
3146        return self._parse_window(
3147            self.expression(exp.Case, this=expression, ifs=ifs, default=default)
3148        )
3149
3150    def _parse_if(self) -> t.Optional[exp.Expression]:
3151        if self._match(TokenType.L_PAREN):
3152            args = self._parse_csv(self._parse_conjunction)
3153            this = exp.If.from_arg_list(args)
3154            self.validate_expression(this, args)
3155            self._match_r_paren()
3156        else:
3157            condition = self._parse_conjunction()
3158            self._match(TokenType.THEN)
3159            true = self._parse_conjunction()
3160            false = self._parse_conjunction() if self._match(TokenType.ELSE) else None
3161            self._match(TokenType.END)
3162            this = self.expression(exp.If, this=condition, true=true, false=false)
3163
3164        return self._parse_window(this)
3165
3166    def _parse_extract(self) -> exp.Expression:
3167        this = self._parse_function() or self._parse_var() or self._parse_type()
3168
3169        if self._match(TokenType.FROM):
3170            return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3171
3172        if not self._match(TokenType.COMMA):
3173            self.raise_error("Expected FROM or comma after EXTRACT", self._prev)
3174
3175        return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3176
3177    def _parse_cast(self, strict: bool) -> exp.Expression:
3178        this = self._parse_conjunction()
3179
3180        if not self._match(TokenType.ALIAS):
3181            self.raise_error("Expected AS after CAST")
3182
3183        to = self._parse_types()
3184
3185        if not to:
3186            self.raise_error("Expected TYPE after CAST")
3187        elif to.this == exp.DataType.Type.CHAR:
3188            if self._match(TokenType.CHARACTER_SET):
3189                to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())
3190
3191        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3192
3193    def _parse_string_agg(self) -> exp.Expression:
3194        expression: t.Optional[exp.Expression]
3195
3196        if self._match(TokenType.DISTINCT):
3197            args = self._parse_csv(self._parse_conjunction)
3198            expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)])
3199        else:
3200            args = self._parse_csv(self._parse_conjunction)
3201            expression = seq_get(args, 0)
3202
3203        index = self._index
3204        if not self._match(TokenType.R_PAREN):
3205            # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]])
3206            order = self._parse_order(this=expression)
3207            return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3208
3209        # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]).
3210        # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that
3211        # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them.
3212        if not self._match(TokenType.WITHIN_GROUP):
3213            self._retreat(index)
3214            this = exp.GroupConcat.from_arg_list(args)
3215            self.validate_expression(this, args)
3216            return this
3217
3218        self._match_l_paren()  # The corresponding match_r_paren will be called in parse_function (caller)
3219        order = self._parse_order(this=expression)
3220        return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3221
3222    def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]:
3223        to: t.Optional[exp.Expression]
3224        this = self._parse_column()
3225
3226        if self._match(TokenType.USING):
3227            to = self.expression(exp.CharacterSet, this=self._parse_var())
3228        elif self._match(TokenType.COMMA):
3229            to = self._parse_types()
3230        else:
3231            to = None
3232
3233        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3234
3235    def _parse_position(self, haystack_first: bool = False) -> exp.Expression:
3236        args = self._parse_csv(self._parse_bitwise)
3237
3238        if self._match(TokenType.IN):
3239            return self.expression(
3240                exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0)
3241            )
3242
3243        if haystack_first:
3244            haystack = seq_get(args, 0)
3245            needle = seq_get(args, 1)
3246        else:
3247            needle = seq_get(args, 0)
3248            haystack = seq_get(args, 1)
3249
3250        this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2))
3251
3252        self.validate_expression(this, args)
3253
3254        return this
3255
3256    def _parse_join_hint(self, func_name: str) -> exp.Expression:
3257        args = self._parse_csv(self._parse_table)
3258        return exp.JoinHint(this=func_name.upper(), expressions=args)
3259
3260    def _parse_substring(self) -> exp.Expression:
3261        # Postgres supports the form: substring(string [from int] [for int])
3262        # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
3263
3264        args = self._parse_csv(self._parse_bitwise)
3265
3266        if self._match(TokenType.FROM):
3267            args.append(self._parse_bitwise())
3268            if self._match(TokenType.FOR):
3269                args.append(self._parse_bitwise())
3270
3271        this = exp.Substring.from_arg_list(args)
3272        self.validate_expression(this, args)
3273
3274        return this
3275
3276    def _parse_trim(self) -> exp.Expression:
3277        # https://www.w3resource.com/sql/character-functions/trim.php
3278        # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html
3279
3280        position = None
3281        collation = None
3282
3283        if self._match_set(self.TRIM_TYPES):
3284            position = self._prev.text.upper()
3285
3286        expression = self._parse_term()
3287        if self._match_set((TokenType.FROM, TokenType.COMMA)):
3288            this = self._parse_term()
3289        else:
3290            this = expression
3291            expression = None
3292
3293        if self._match(TokenType.COLLATE):
3294            collation = self._parse_term()
3295
3296        return self.expression(
3297            exp.Trim,
3298            this=this,
3299            position=position,
3300            expression=expression,
3301            collation=collation,
3302        )
3303
3304    def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3305        return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window)
3306
3307    def _parse_named_window(self) -> t.Optional[exp.Expression]:
3308        return self._parse_window(self._parse_id_var(), alias=True)
3309
3310    def _parse_window(
3311        self, this: t.Optional[exp.Expression], alias: bool = False
3312    ) -> t.Optional[exp.Expression]:
3313        if self._match(TokenType.FILTER):
3314            where = self._parse_wrapped(self._parse_where)
3315            this = self.expression(exp.Filter, this=this, expression=where)
3316
3317        # T-SQL allows the OVER (...) syntax after WITHIN GROUP.
3318        # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
3319        if self._match(TokenType.WITHIN_GROUP):
3320            order = self._parse_wrapped(self._parse_order)
3321            this = self.expression(exp.WithinGroup, this=this, expression=order)
3322
3323        # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
3324        # Some dialects choose to implement and some do not.
3325        # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
3326
3327        # There is some code above in _parse_lambda that handles
3328        #   SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ...
3329
3330        # The below changes handle
3331        #   SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ...
3332
3333        # Oracle allows both formats
3334        #   (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html)
3335        #   and Snowflake chose to do the same for familiarity
3336        #   https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes
3337        if self._match(TokenType.IGNORE_NULLS):
3338            this = self.expression(exp.IgnoreNulls, this=this)
3339        elif self._match(TokenType.RESPECT_NULLS):
3340            this = self.expression(exp.RespectNulls, this=this)
3341
3342        # bigquery select from window x AS (partition by ...)
3343        if alias:
3344            self._match(TokenType.ALIAS)
3345        elif not self._match(TokenType.OVER):
3346            return this
3347
3348        if not self._match(TokenType.L_PAREN):
3349            return self.expression(exp.Window, this=this, alias=self._parse_id_var(False))
3350
3351        window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
3352        partition = self._parse_partition_by()
3353        order = self._parse_order()
3354        kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text
3355
3356        if kind:
3357            self._match(TokenType.BETWEEN)
3358            start = self._parse_window_spec()
3359            self._match(TokenType.AND)
3360            end = self._parse_window_spec()
3361
3362            spec = self.expression(
3363                exp.WindowSpec,
3364                kind=kind,
3365                start=start["value"],
3366                start_side=start["side"],
3367                end=end["value"],
3368                end_side=end["side"],
3369            )
3370        else:
3371            spec = None
3372
3373        self._match_r_paren()
3374
3375        return self.expression(
3376            exp.Window,
3377            this=this,
3378            partition_by=partition,
3379            order=order,
3380            spec=spec,
3381            alias=window_alias,
3382        )
3383
3384    def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]:
3385        self._match(TokenType.BETWEEN)
3386
3387        return {
3388            "value": (
3389                self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text
3390            )
3391            or self._parse_bitwise(),
3392            "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text,
3393        }
3394
3395    def _parse_alias(
3396        self, this: t.Optional[exp.Expression], explicit: bool = False
3397    ) -> t.Optional[exp.Expression]:
3398        any_token = self._match(TokenType.ALIAS)
3399
3400        if explicit and not any_token:
3401            return this
3402
3403        if self._match(TokenType.L_PAREN):
3404            aliases = self.expression(
3405                exp.Aliases,
3406                this=this,
3407                expressions=self._parse_csv(lambda: self._parse_id_var(any_token)),
3408            )
3409            self._match_r_paren(aliases)
3410            return aliases
3411
3412        alias = self._parse_id_var(any_token)
3413
3414        if alias:
3415            return self.expression(exp.Alias, this=this, alias=alias)
3416
3417        return this
3418
3419    def _parse_id_var(
3420        self,
3421        any_token: bool = True,
3422        tokens: t.Optional[t.Collection[TokenType]] = None,
3423        prefix_tokens: t.Optional[t.Collection[TokenType]] = None,
3424    ) -> t.Optional[exp.Expression]:
3425        identifier = self._parse_identifier()
3426
3427        if identifier:
3428            return identifier
3429
3430        prefix = ""
3431
3432        if prefix_tokens:
3433            while self._match_set(prefix_tokens):
3434                prefix += self._prev.text
3435
3436        if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS):
3437            quoted = self._prev.token_type == TokenType.STRING
3438            return exp.Identifier(this=prefix + self._prev.text, quoted=quoted)
3439
3440        return None
3441
3442    def _parse_string(self) -> t.Optional[exp.Expression]:
3443        if self._match(TokenType.STRING):
3444            return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev)
3445        return self._parse_placeholder()
3446
3447    def _parse_number(self) -> t.Optional[exp.Expression]:
3448        if self._match(TokenType.NUMBER):
3449            return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev)
3450        return self._parse_placeholder()
3451
3452    def _parse_identifier(self) -> t.Optional[exp.Expression]:
3453        if self._match(TokenType.IDENTIFIER):
3454            return self.expression(exp.Identifier, this=self._prev.text, quoted=True)
3455        return self._parse_placeholder()
3456
3457    def _parse_var(self, any_token: bool = False) -> t.Optional[exp.Expression]:
3458        if (any_token and self._advance_any()) or self._match(TokenType.VAR):
3459            return self.expression(exp.Var, this=self._prev.text)
3460        return self._parse_placeholder()
3461
3462    def _advance_any(self) -> t.Optional[Token]:
3463        if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS:
3464            self._advance()
3465            return self._prev
3466        return None
3467
3468    def _parse_var_or_string(self) -> t.Optional[exp.Expression]:
3469        return self._parse_var() or self._parse_string()
3470
3471    def _parse_null(self) -> t.Optional[exp.Expression]:
3472        if self._match(TokenType.NULL):
3473            return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev)
3474        return None
3475
3476    def _parse_boolean(self) -> t.Optional[exp.Expression]:
3477        if self._match(TokenType.TRUE):
3478            return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev)
3479        if self._match(TokenType.FALSE):
3480            return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev)
3481        return None
3482
3483    def _parse_star(self) -> t.Optional[exp.Expression]:
3484        if self._match(TokenType.STAR):
3485            return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev)
3486        return None
3487
3488    def _parse_parameter(self) -> exp.Expression:
3489        wrapped = self._match(TokenType.L_BRACE)
3490        this = self._parse_var() or self._parse_primary()
3491        self._match(TokenType.R_BRACE)
3492        return self.expression(exp.Parameter, this=this, wrapped=wrapped)
3493
3494    def _parse_placeholder(self) -> t.Optional[exp.Expression]:
3495        if self._match_set(self.PLACEHOLDER_PARSERS):
3496            placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self)
3497            if placeholder:
3498                return placeholder
3499            self._advance(-1)
3500        return None
3501
3502    def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3503        if not self._match(TokenType.EXCEPT):
3504            return None
3505        if self._match(TokenType.L_PAREN, advance=False):
3506            return self._parse_wrapped_csv(self._parse_column)
3507        return self._parse_csv(self._parse_column)
3508
3509    def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3510        if not self._match(TokenType.REPLACE):
3511            return None
3512        if self._match(TokenType.L_PAREN, advance=False):
3513            return self._parse_wrapped_csv(self._parse_expression)
3514        return self._parse_csv(self._parse_expression)
3515
3516    def _parse_csv(
3517        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
3518    ) -> t.List[t.Optional[exp.Expression]]:
3519        parse_result = parse_method()
3520        items = [parse_result] if parse_result is not None else []
3521
3522        while self._match(sep):
3523            if parse_result and self._prev_comments:
3524                parse_result.comments = self._prev_comments
3525
3526            parse_result = parse_method()
3527            if parse_result is not None:
3528                items.append(parse_result)
3529
3530        return items
3531
3532    def _parse_tokens(
3533        self, parse_method: t.Callable, expressions: t.Dict
3534    ) -> t.Optional[exp.Expression]:
3535        this = parse_method()
3536
3537        while self._match_set(expressions):
3538            this = self.expression(
3539                expressions[self._prev.token_type],
3540                this=this,
3541                comments=self._prev_comments,
3542                expression=parse_method(),
3543            )
3544
3545        return this
3546
3547    def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]:
3548        return self._parse_wrapped_csv(self._parse_id_var)
3549
3550    def _parse_wrapped_csv(
3551        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
3552    ) -> t.List[t.Optional[exp.Expression]]:
3553        return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep))
3554
3555    def _parse_wrapped(self, parse_method: t.Callable) -> t.Any:
3556        self._match_l_paren()
3557        parse_result = parse_method()
3558        self._match_r_paren()
3559        return parse_result
3560
3561    def _parse_select_or_expression(self) -> t.Optional[exp.Expression]:
3562        return self._parse_select() or self._parse_expression()
3563
3564    def _parse_ddl_select(self) -> t.Optional[exp.Expression]:
3565        return self._parse_set_operations(
3566            self._parse_select(nested=True, parse_subquery_alias=False)
3567        )
3568
3569    def _parse_transaction(self) -> exp.Expression:
3570        this = None
3571        if self._match_texts(self.TRANSACTION_KIND):
3572            this = self._prev.text
3573
3574        self._match_texts({"TRANSACTION", "WORK"})
3575
3576        modes = []
3577        while True:
3578            mode = []
3579            while self._match(TokenType.VAR):
3580                mode.append(self._prev.text)
3581
3582            if mode:
3583                modes.append(" ".join(mode))
3584            if not self._match(TokenType.COMMA):
3585                break
3586
3587        return self.expression(exp.Transaction, this=this, modes=modes)
3588
3589    def _parse_commit_or_rollback(self) -> exp.Expression:
3590        chain = None
3591        savepoint = None
3592        is_rollback = self._prev.token_type == TokenType.ROLLBACK
3593
3594        self._match_texts({"TRANSACTION", "WORK"})
3595
3596        if self._match_text_seq("TO"):
3597            self._match_text_seq("SAVEPOINT")
3598            savepoint = self._parse_id_var()
3599
3600        if self._match(TokenType.AND):
3601            chain = not self._match_text_seq("NO")
3602            self._match_text_seq("CHAIN")
3603
3604        if is_rollback:
3605            return self.expression(exp.Rollback, savepoint=savepoint)
3606        return self.expression(exp.Commit, chain=chain)
3607
3608    def _parse_add_column(self) -> t.Optional[exp.Expression]:
3609        if not self._match_text_seq("ADD"):
3610            return None
3611
3612        self._match(TokenType.COLUMN)
3613        exists_column = self._parse_exists(not_=True)
3614        expression = self._parse_column_def(self._parse_field(any_token=True))
3615
3616        if expression:
3617            expression.set("exists", exists_column)
3618
3619        return expression
3620
3621    def _parse_drop_column(self) -> t.Optional[exp.Expression]:
3622        return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN")
3623
3624    # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html
3625    def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression:
3626        return self.expression(
3627            exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists
3628        )
3629
3630    def _parse_add_constraint(self) -> t.Optional[exp.Expression]:
3631        this = None
3632        kind = self._prev.token_type
3633
3634        if kind == TokenType.CONSTRAINT:
3635            this = self._parse_id_var()
3636
3637            if self._match_text_seq("CHECK"):
3638                expression = self._parse_wrapped(self._parse_conjunction)
3639                enforced = self._match_text_seq("ENFORCED")
3640
3641                return self.expression(
3642                    exp.AddConstraint, this=this, expression=expression, enforced=enforced
3643                )
3644
3645        if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY):
3646            expression = self._parse_foreign_key()
3647        elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY):
3648            expression = self._parse_primary_key()
3649
3650        return self.expression(exp.AddConstraint, this=this, expression=expression)
3651
3652    def _parse_alter(self) -> t.Optional[exp.Expression]:
3653        if not self._match(TokenType.TABLE):
3654            return self._parse_as_command(self._prev)
3655
3656        exists = self._parse_exists()
3657        this = self._parse_table(schema=True)
3658
3659        actions: t.Optional[exp.Expression | t.List[t.Optional[exp.Expression]]] = None
3660
3661        index = self._index
3662        if self._match(TokenType.DELETE):
3663            actions = [self.expression(exp.Delete, where=self._parse_where())]
3664        elif self._match_text_seq("ADD"):
3665            if self._match_set(self.ADD_CONSTRAINT_TOKENS):
3666                actions = self._parse_csv(self._parse_add_constraint)
3667            else:
3668                self._retreat(index)
3669                actions = self._parse_csv(self._parse_add_column)
3670        elif self._match_text_seq("DROP"):
3671            partition_exists = self._parse_exists()
3672
3673            if self._match(TokenType.PARTITION, advance=False):
3674                actions = self._parse_csv(
3675                    lambda: self._parse_drop_partition(exists=partition_exists)
3676                )
3677            else:
3678                self._retreat(index)
3679                actions = self._parse_csv(self._parse_drop_column)
3680        elif self._match_text_seq("RENAME", "TO"):
3681            actions = self.expression(exp.RenameTable, this=self._parse_table(schema=True))
3682        elif self._match_text_seq("ALTER"):
3683            self._match(TokenType.COLUMN)
3684            column = self._parse_field(any_token=True)
3685
3686            if self._match_pair(TokenType.DROP, TokenType.DEFAULT):
3687                actions = self.expression(exp.AlterColumn, this=column, drop=True)
3688            elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
3689                actions = self.expression(
3690                    exp.AlterColumn, this=column, default=self._parse_conjunction()
3691                )
3692            else:
3693                self._match_text_seq("SET", "DATA")
3694                actions = self.expression(
3695                    exp.AlterColumn,
3696                    this=column,
3697                    dtype=self._match_text_seq("TYPE") and self._parse_types(),
3698                    collate=self._match(TokenType.COLLATE) and self._parse_term(),
3699                    using=self._match(TokenType.USING) and self._parse_conjunction(),
3700                )
3701
3702        actions = ensure_list(actions)
3703        return self.expression(exp.AlterTable, this=this, exists=exists, actions=actions)
3704
3705    def _parse_show(self) -> t.Optional[exp.Expression]:
3706        parser = self._find_parser(self.SHOW_PARSERS, self._show_trie)  # type: ignore
3707        if parser:
3708            return parser(self)
3709        self._advance()
3710        return self.expression(exp.Show, this=self._prev.text.upper())
3711
3712    def _default_parse_set_item(self) -> exp.Expression:
3713        return self.expression(
3714            exp.SetItem,
3715            this=self._parse_statement(),
3716        )
3717
3718    def _parse_set_item(self) -> t.Optional[exp.Expression]:
3719        parser = self._find_parser(self.SET_PARSERS, self._set_trie)  # type: ignore
3720        return parser(self) if parser else self._default_parse_set_item()
3721
3722    def _parse_merge(self) -> exp.Expression:
3723        self._match(TokenType.INTO)
3724        target = self._parse_table()
3725
3726        self._match(TokenType.USING)
3727        using = self._parse_table()
3728
3729        self._match(TokenType.ON)
3730        on = self._parse_conjunction()
3731
3732        whens = []
3733        while self._match(TokenType.WHEN):
3734            this = self._parse_conjunction()
3735            self._match(TokenType.THEN)
3736
3737            if self._match(TokenType.INSERT):
3738                _this = self._parse_star()
3739                if _this:
3740                    then = self.expression(exp.Insert, this=_this)
3741                else:
3742                    then = self.expression(
3743                        exp.Insert,
3744                        this=self._parse_value(),
3745                        expression=self._match(TokenType.VALUES) and self._parse_value(),
3746                    )
3747            elif self._match(TokenType.UPDATE):
3748                expressions = self._parse_star()
3749                if expressions:
3750                    then = self.expression(exp.Update, expressions=expressions)
3751                else:
3752                    then = self.expression(
3753                        exp.Update,
3754                        expressions=self._match(TokenType.SET)
3755                        and self._parse_csv(self._parse_equality),
3756                    )
3757            elif self._match(TokenType.DELETE):
3758                then = self.expression(exp.Var, this=self._prev.text)
3759
3760            whens.append(self.expression(exp.When, this=this, then=then))
3761
3762        return self.expression(
3763            exp.Merge,
3764            this=target,
3765            using=using,
3766            on=on,
3767            expressions=whens,
3768        )
3769
3770    def _parse_set(self) -> exp.Expression:
3771        return self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item))
3772
3773    def _parse_as_command(self, start: Token) -> exp.Command:
3774        while self._curr:
3775            self._advance()
3776        return exp.Command(this=self._find_sql(start, self._prev))
3777
3778    def _find_parser(
3779        self, parsers: t.Dict[str, t.Callable], trie: t.Dict
3780    ) -> t.Optional[t.Callable]:
3781        index = self._index
3782        this = []
3783        while True:
3784            # The current token might be multiple words
3785            curr = self._curr.text.upper()
3786            key = curr.split(" ")
3787            this.append(curr)
3788            self._advance()
3789            result, trie = in_trie(trie, key)
3790            if result == 0:
3791                break
3792            if result == 2:
3793                subparser = parsers[" ".join(this)]
3794                return subparser
3795        self._retreat(index)
3796        return None
3797
3798    def _match(self, token_type, advance=True):
3799        if not self._curr:
3800            return None
3801
3802        if self._curr.token_type == token_type:
3803            if advance:
3804                self._advance()
3805            return True
3806
3807        return None
3808
3809    def _match_set(self, types, advance=True):
3810        if not self._curr:
3811            return None
3812
3813        if self._curr.token_type in types:
3814            if advance:
3815                self._advance()
3816            return True
3817
3818        return None
3819
3820    def _match_pair(self, token_type_a, token_type_b, advance=True):
3821        if not self._curr or not self._next:
3822            return None
3823
3824        if self._curr.token_type == token_type_a and self._next.token_type == token_type_b:
3825            if advance:
3826                self._advance(2)
3827            return True
3828
3829        return None
3830
3831    def _match_l_paren(self, expression=None):
3832        if not self._match(TokenType.L_PAREN):
3833            self.raise_error("Expecting (")
3834        if expression and self._prev_comments:
3835            expression.comments = self._prev_comments
3836
3837    def _match_r_paren(self, expression=None):
3838        if not self._match(TokenType.R_PAREN):
3839            self.raise_error("Expecting )")
3840        if expression and self._prev_comments:
3841            expression.comments = self._prev_comments
3842
3843    def _match_texts(self, texts, advance=True):
3844        if self._curr and self._curr.text.upper() in texts:
3845            if advance:
3846                self._advance()
3847            return True
3848        return False
3849
3850    def _match_text_seq(self, *texts, advance=True):
3851        index = self._index
3852        for text in texts:
3853            if self._curr and self._curr.text.upper() == text:
3854                self._advance()
3855            else:
3856                self._retreat(index)
3857                return False
3858
3859        if not advance:
3860            self._retreat(index)
3861
3862        return True
3863
3864    def _replace_columns_with_dots(self, this):
3865        if isinstance(this, exp.Dot):
3866            exp.replace_children(this, self._replace_columns_with_dots)
3867        elif isinstance(this, exp.Column):
3868            exp.replace_children(this, self._replace_columns_with_dots)
3869            table = this.args.get("table")
3870            this = (
3871                self.expression(exp.Dot, this=table, expression=this.this)
3872                if table
3873                else self.expression(exp.Var, this=this.name)
3874            )
3875        elif isinstance(this, exp.Identifier):
3876            this = self.expression(exp.Var, this=this.name)
3877        return this
3878
3879    def _replace_lambda(self, node, lambda_variables):
3880        if isinstance(node, exp.Column):
3881            if node.name in lambda_variables:
3882                return node.this
3883        return node

Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer and produces a parsed syntax tree.

Arguments:
  • error_level: the desired error level. Default: ErrorLevel.RAISE
  • error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
  • index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
  • alias_post_tablesample: If the table alias comes after tablesample. Default: False
  • max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
  • null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
Parser( error_level: Optional[sqlglot.errors.ErrorLevel] = None, error_message_context: int = 100, index_offset: int = 0, unnest_column_only: bool = False, alias_post_tablesample: bool = False, max_errors: int = 3, null_ordering: Optional[str] = None)
713    def __init__(
714        self,
715        error_level: t.Optional[ErrorLevel] = None,
716        error_message_context: int = 100,
717        index_offset: int = 0,
718        unnest_column_only: bool = False,
719        alias_post_tablesample: bool = False,
720        max_errors: int = 3,
721        null_ordering: t.Optional[str] = None,
722    ):
723        self.error_level = error_level or ErrorLevel.IMMEDIATE
724        self.error_message_context = error_message_context
725        self.index_offset = index_offset
726        self.unnest_column_only = unnest_column_only
727        self.alias_post_tablesample = alias_post_tablesample
728        self.max_errors = max_errors
729        self.null_ordering = null_ordering
730        self.reset()
def reset(self):
732    def reset(self):
733        self.sql = ""
734        self.errors = []
735        self._tokens = []
736        self._index = 0
737        self._curr = None
738        self._next = None
739        self._prev = None
740        self._prev_comments = None
def parse( self, raw_tokens: List[sqlglot.tokens.Token], sql: Optional[str] = None) -> List[Optional[sqlglot.expressions.Expression]]:
742    def parse(
743        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
744    ) -> t.List[t.Optional[exp.Expression]]:
745        """
746        Parses a list of tokens and returns a list of syntax trees, one tree
747        per parsed SQL statement.
748
749        Args:
750            raw_tokens: the list of tokens.
751            sql: the original SQL string, used to produce helpful debug messages.
752
753        Returns:
754            The list of syntax trees.
755        """
756        return self._parse(
757            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
758        )

Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.

Arguments:
  • raw_tokens: the list of tokens.
  • sql: the original SQL string, used to produce helpful debug messages.
Returns:

The list of syntax trees.

def parse_into( self, expression_types: Union[str, Type[sqlglot.expressions.Expression], Collection[Union[str, Type[sqlglot.expressions.Expression]]]], raw_tokens: List[sqlglot.tokens.Token], sql: Optional[str] = None) -> List[Optional[sqlglot.expressions.Expression]]:
760    def parse_into(
761        self,
762        expression_types: exp.IntoType,
763        raw_tokens: t.List[Token],
764        sql: t.Optional[str] = None,
765    ) -> t.List[t.Optional[exp.Expression]]:
766        """
767        Parses a list of tokens into a given Expression type. If a collection of Expression
768        types is given instead, this method will try to parse the token list into each one
769        of them, stopping at the first for which the parsing succeeds.
770
771        Args:
772            expression_types: the expression type(s) to try and parse the token list into.
773            raw_tokens: the list of tokens.
774            sql: the original SQL string, used to produce helpful debug messages.
775
776        Returns:
777            The target Expression.
778        """
779        errors = []
780        for expression_type in ensure_collection(expression_types):
781            parser = self.EXPRESSION_PARSERS.get(expression_type)
782            if not parser:
783                raise TypeError(f"No parser registered for {expression_type}")
784            try:
785                return self._parse(parser, raw_tokens, sql)
786            except ParseError as e:
787                e.errors[0]["into_expression"] = expression_type
788                errors.append(e)
789        raise ParseError(
790            f"Failed to parse into {expression_types}",
791            errors=merge_errors(errors),
792        ) from errors[-1]

Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.

Arguments:
  • expression_types: the expression type(s) to try and parse the token list into.
  • raw_tokens: the list of tokens.
  • sql: the original SQL string, used to produce helpful debug messages.
Returns:

The target Expression.

def check_errors(self) -> None:
828    def check_errors(self) -> None:
829        """
830        Logs or raises any found errors, depending on the chosen error level setting.
831        """
832        if self.error_level == ErrorLevel.WARN:
833            for error in self.errors:
834                logger.error(str(error))
835        elif self.error_level == ErrorLevel.RAISE and self.errors:
836            raise ParseError(
837                concat_messages(self.errors, self.max_errors),
838                errors=merge_errors(self.errors),
839            )

Logs or raises any found errors, depending on the chosen error level setting.

def raise_error(self, message: str, token: Optional[sqlglot.tokens.Token] = None) -> None:
841    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
842        """
843        Appends an error in the list of recorded errors or raises it, depending on the chosen
844        error level setting.
845        """
846        token = token or self._curr or self._prev or Token.string("")
847        start = self._find_token(token)
848        end = start + len(token.text)
849        start_context = self.sql[max(start - self.error_message_context, 0) : start]
850        highlight = self.sql[start:end]
851        end_context = self.sql[end : end + self.error_message_context]
852
853        error = ParseError.new(
854            f"{message}. Line {token.line}, Col: {token.col}.\n"
855            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
856            description=message,
857            line=token.line,
858            col=token.col,
859            start_context=start_context,
860            highlight=highlight,
861            end_context=end_context,
862        )
863
864        if self.error_level == ErrorLevel.IMMEDIATE:
865            raise error
866
867        self.errors.append(error)

Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.

def expression( self, exp_class: Type[sqlglot.expressions.Expression], comments: Optional[List[str]] = None, **kwargs) -> sqlglot.expressions.Expression:
869    def expression(
870        self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs
871    ) -> exp.Expression:
872        """
873        Creates a new, validated Expression.
874
875        Args:
876            exp_class: the expression class to instantiate.
877            comments: an optional list of comments to attach to the expression.
878            kwargs: the arguments to set for the expression along with their respective values.
879
880        Returns:
881            The target expression.
882        """
883        instance = exp_class(**kwargs)
884        if self._prev_comments:
885            instance.comments = self._prev_comments
886            self._prev_comments = None
887        if comments:
888            instance.comments = comments
889        self.validate_expression(instance)
890        return instance

Creates a new, validated Expression.

Arguments:
  • exp_class: the expression class to instantiate.
  • comments: an optional list of comments to attach to the expression.
  • kwargs: the arguments to set for the expression along with their respective values.
Returns:

The target expression.

def validate_expression( self, expression: sqlglot.expressions.Expression, args: Optional[List] = None) -> None:
892    def validate_expression(
893        self, expression: exp.Expression, args: t.Optional[t.List] = None
894    ) -> None:
895        """
896        Validates an already instantiated expression, making sure that all its mandatory arguments
897        are set.
898
899        Args:
900            expression: the expression to validate.
901            args: an optional list of items that was used to instantiate the expression, if it's a Func.
902        """
903        if self.error_level == ErrorLevel.IGNORE:
904            return
905
906        for error_message in expression.error_messages(args):
907            self.raise_error(error_message)

Validates an already instantiated expression, making sure that all its mandatory arguments are set.

Arguments:
  • expression: the expression to validate.
  • args: an optional list of items that was used to instantiate the expression, if it's a Func.