Edit on GitHub

sqlglot.parser

   1from __future__ import annotations
   2
   3import logging
   4import typing as t
   5from collections import defaultdict
   6
   7from sqlglot import exp
   8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors
   9from sqlglot.helper import (
  10    apply_index_offset,
  11    count_params,
  12    ensure_collection,
  13    ensure_list,
  14    seq_get,
  15)
  16from sqlglot.tokens import Token, Tokenizer, TokenType
  17from sqlglot.trie import in_trie, new_trie
  18
  19logger = logging.getLogger("sqlglot")
  20
  21
  22def parse_var_map(args):
  23    keys = []
  24    values = []
  25    for i in range(0, len(args), 2):
  26        keys.append(args[i])
  27        values.append(args[i + 1])
  28    return exp.VarMap(
  29        keys=exp.Array(expressions=keys),
  30        values=exp.Array(expressions=values),
  31    )
  32
  33
  34class _Parser(type):
  35    def __new__(cls, clsname, bases, attrs):
  36        klass = super().__new__(cls, clsname, bases, attrs)
  37        klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS)
  38        klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS)
  39        return klass
  40
  41
  42class Parser(metaclass=_Parser):
  43    """
  44    Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces
  45    a parsed syntax tree.
  46
  47    Args:
  48        error_level: the desired error level.
  49            Default: ErrorLevel.RAISE
  50        error_message_context: determines the amount of context to capture from a
  51            query string when displaying the error message (in number of characters).
  52            Default: 50.
  53        index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list.
  54            Default: 0
  55        alias_post_tablesample: If the table alias comes after tablesample.
  56            Default: False
  57        max_errors: Maximum number of error messages to include in a raised ParseError.
  58            This is only relevant if error_level is ErrorLevel.RAISE.
  59            Default: 3
  60        null_ordering: Indicates the default null ordering method to use if not explicitly set.
  61            Options are "nulls_are_small", "nulls_are_large", "nulls_are_last".
  62            Default: "nulls_are_small"
  63    """
  64
  65    FUNCTIONS: t.Dict[str, t.Callable] = {
  66        **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()},
  67        "DATE_TO_DATE_STR": lambda args: exp.Cast(
  68            this=seq_get(args, 0),
  69            to=exp.DataType(this=exp.DataType.Type.TEXT),
  70        ),
  71        "TIME_TO_TIME_STR": lambda args: exp.Cast(
  72            this=seq_get(args, 0),
  73            to=exp.DataType(this=exp.DataType.Type.TEXT),
  74        ),
  75        "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
  76            this=exp.Cast(
  77                this=seq_get(args, 0),
  78                to=exp.DataType(this=exp.DataType.Type.TEXT),
  79            ),
  80            start=exp.Literal.number(1),
  81            length=exp.Literal.number(10),
  82        ),
  83        "VAR_MAP": parse_var_map,
  84        "IFNULL": exp.Coalesce.from_arg_list,
  85    }
  86
  87    NO_PAREN_FUNCTIONS = {
  88        TokenType.CURRENT_DATE: exp.CurrentDate,
  89        TokenType.CURRENT_DATETIME: exp.CurrentDate,
  90        TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
  91    }
  92
  93    NESTED_TYPE_TOKENS = {
  94        TokenType.ARRAY,
  95        TokenType.MAP,
  96        TokenType.STRUCT,
  97        TokenType.NULLABLE,
  98    }
  99
 100    TYPE_TOKENS = {
 101        TokenType.BOOLEAN,
 102        TokenType.TINYINT,
 103        TokenType.SMALLINT,
 104        TokenType.INT,
 105        TokenType.BIGINT,
 106        TokenType.FLOAT,
 107        TokenType.DOUBLE,
 108        TokenType.CHAR,
 109        TokenType.NCHAR,
 110        TokenType.VARCHAR,
 111        TokenType.NVARCHAR,
 112        TokenType.TEXT,
 113        TokenType.MEDIUMTEXT,
 114        TokenType.LONGTEXT,
 115        TokenType.MEDIUMBLOB,
 116        TokenType.LONGBLOB,
 117        TokenType.BINARY,
 118        TokenType.VARBINARY,
 119        TokenType.JSON,
 120        TokenType.JSONB,
 121        TokenType.INTERVAL,
 122        TokenType.TIME,
 123        TokenType.TIMESTAMP,
 124        TokenType.TIMESTAMPTZ,
 125        TokenType.TIMESTAMPLTZ,
 126        TokenType.DATETIME,
 127        TokenType.DATE,
 128        TokenType.DECIMAL,
 129        TokenType.UUID,
 130        TokenType.GEOGRAPHY,
 131        TokenType.GEOMETRY,
 132        TokenType.HLLSKETCH,
 133        TokenType.HSTORE,
 134        TokenType.PSEUDO_TYPE,
 135        TokenType.SUPER,
 136        TokenType.SERIAL,
 137        TokenType.SMALLSERIAL,
 138        TokenType.BIGSERIAL,
 139        TokenType.XML,
 140        TokenType.UNIQUEIDENTIFIER,
 141        TokenType.MONEY,
 142        TokenType.SMALLMONEY,
 143        TokenType.ROWVERSION,
 144        TokenType.IMAGE,
 145        TokenType.VARIANT,
 146        TokenType.OBJECT,
 147        *NESTED_TYPE_TOKENS,
 148    }
 149
 150    SUBQUERY_PREDICATES = {
 151        TokenType.ANY: exp.Any,
 152        TokenType.ALL: exp.All,
 153        TokenType.EXISTS: exp.Exists,
 154        TokenType.SOME: exp.Any,
 155    }
 156
 157    RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT}
 158
 159    ID_VAR_TOKENS = {
 160        TokenType.VAR,
 161        TokenType.ANTI,
 162        TokenType.APPLY,
 163        TokenType.AUTO_INCREMENT,
 164        TokenType.BEGIN,
 165        TokenType.BOTH,
 166        TokenType.BUCKET,
 167        TokenType.CACHE,
 168        TokenType.CASCADE,
 169        TokenType.COLLATE,
 170        TokenType.COLUMN,
 171        TokenType.COMMAND,
 172        TokenType.COMMIT,
 173        TokenType.COMPOUND,
 174        TokenType.CONSTRAINT,
 175        TokenType.CURRENT_TIME,
 176        TokenType.DEFAULT,
 177        TokenType.DELETE,
 178        TokenType.DESCRIBE,
 179        TokenType.DIV,
 180        TokenType.END,
 181        TokenType.EXECUTE,
 182        TokenType.ESCAPE,
 183        TokenType.FALSE,
 184        TokenType.FIRST,
 185        TokenType.FILTER,
 186        TokenType.FOLLOWING,
 187        TokenType.FORMAT,
 188        TokenType.FUNCTION,
 189        TokenType.IF,
 190        TokenType.INDEX,
 191        TokenType.ISNULL,
 192        TokenType.INTERVAL,
 193        TokenType.LAZY,
 194        TokenType.LEADING,
 195        TokenType.LEFT,
 196        TokenType.LOCAL,
 197        TokenType.MATERIALIZED,
 198        TokenType.MERGE,
 199        TokenType.NATURAL,
 200        TokenType.NEXT,
 201        TokenType.OFFSET,
 202        TokenType.ONLY,
 203        TokenType.OPTIONS,
 204        TokenType.ORDINALITY,
 205        TokenType.PERCENT,
 206        TokenType.PIVOT,
 207        TokenType.PRECEDING,
 208        TokenType.RANGE,
 209        TokenType.REFERENCES,
 210        TokenType.RIGHT,
 211        TokenType.ROW,
 212        TokenType.ROWS,
 213        TokenType.SCHEMA,
 214        TokenType.SEED,
 215        TokenType.SEMI,
 216        TokenType.SET,
 217        TokenType.SHOW,
 218        TokenType.SORTKEY,
 219        TokenType.TABLE,
 220        TokenType.TEMPORARY,
 221        TokenType.TOP,
 222        TokenType.TRAILING,
 223        TokenType.TRUE,
 224        TokenType.UNBOUNDED,
 225        TokenType.UNIQUE,
 226        TokenType.UNLOGGED,
 227        TokenType.UNPIVOT,
 228        TokenType.PROCEDURE,
 229        TokenType.VIEW,
 230        TokenType.VOLATILE,
 231        TokenType.WINDOW,
 232        *SUBQUERY_PREDICATES,
 233        *TYPE_TOKENS,
 234        *NO_PAREN_FUNCTIONS,
 235    }
 236
 237    TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {
 238        TokenType.APPLY,
 239        TokenType.LEFT,
 240        TokenType.NATURAL,
 241        TokenType.OFFSET,
 242        TokenType.RIGHT,
 243        TokenType.WINDOW,
 244    }
 245
 246    UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
 247
 248    TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH}
 249
 250    FUNC_TOKENS = {
 251        TokenType.COMMAND,
 252        TokenType.CURRENT_DATE,
 253        TokenType.CURRENT_DATETIME,
 254        TokenType.CURRENT_TIMESTAMP,
 255        TokenType.CURRENT_TIME,
 256        TokenType.FILTER,
 257        TokenType.FIRST,
 258        TokenType.FORMAT,
 259        TokenType.IDENTIFIER,
 260        TokenType.INDEX,
 261        TokenType.ISNULL,
 262        TokenType.ILIKE,
 263        TokenType.LIKE,
 264        TokenType.MERGE,
 265        TokenType.OFFSET,
 266        TokenType.PRIMARY_KEY,
 267        TokenType.REPLACE,
 268        TokenType.ROW,
 269        TokenType.UNNEST,
 270        TokenType.VAR,
 271        TokenType.LEFT,
 272        TokenType.RIGHT,
 273        TokenType.DATE,
 274        TokenType.DATETIME,
 275        TokenType.TABLE,
 276        TokenType.TIMESTAMP,
 277        TokenType.TIMESTAMPTZ,
 278        TokenType.WINDOW,
 279        *TYPE_TOKENS,
 280        *SUBQUERY_PREDICATES,
 281    }
 282
 283    CONJUNCTION = {
 284        TokenType.AND: exp.And,
 285        TokenType.OR: exp.Or,
 286    }
 287
 288    EQUALITY = {
 289        TokenType.EQ: exp.EQ,
 290        TokenType.NEQ: exp.NEQ,
 291        TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
 292    }
 293
 294    COMPARISON = {
 295        TokenType.GT: exp.GT,
 296        TokenType.GTE: exp.GTE,
 297        TokenType.LT: exp.LT,
 298        TokenType.LTE: exp.LTE,
 299    }
 300
 301    BITWISE = {
 302        TokenType.AMP: exp.BitwiseAnd,
 303        TokenType.CARET: exp.BitwiseXor,
 304        TokenType.PIPE: exp.BitwiseOr,
 305        TokenType.DPIPE: exp.DPipe,
 306    }
 307
 308    TERM = {
 309        TokenType.DASH: exp.Sub,
 310        TokenType.PLUS: exp.Add,
 311        TokenType.MOD: exp.Mod,
 312        TokenType.COLLATE: exp.Collate,
 313    }
 314
 315    FACTOR = {
 316        TokenType.DIV: exp.IntDiv,
 317        TokenType.LR_ARROW: exp.Distance,
 318        TokenType.SLASH: exp.Div,
 319        TokenType.STAR: exp.Mul,
 320    }
 321
 322    TIMESTAMPS = {
 323        TokenType.TIME,
 324        TokenType.TIMESTAMP,
 325        TokenType.TIMESTAMPTZ,
 326        TokenType.TIMESTAMPLTZ,
 327    }
 328
 329    SET_OPERATIONS = {
 330        TokenType.UNION,
 331        TokenType.INTERSECT,
 332        TokenType.EXCEPT,
 333    }
 334
 335    JOIN_SIDES = {
 336        TokenType.LEFT,
 337        TokenType.RIGHT,
 338        TokenType.FULL,
 339    }
 340
 341    JOIN_KINDS = {
 342        TokenType.INNER,
 343        TokenType.OUTER,
 344        TokenType.CROSS,
 345        TokenType.SEMI,
 346        TokenType.ANTI,
 347    }
 348
 349    LAMBDAS = {
 350        TokenType.ARROW: lambda self, expressions: self.expression(
 351            exp.Lambda,
 352            this=self._parse_conjunction().transform(
 353                self._replace_lambda, {node.name for node in expressions}
 354            ),
 355            expressions=expressions,
 356        ),
 357        TokenType.FARROW: lambda self, expressions: self.expression(
 358            exp.Kwarg,
 359            this=exp.Var(this=expressions[0].name),
 360            expression=self._parse_conjunction(),
 361        ),
 362    }
 363
 364    COLUMN_OPERATORS = {
 365        TokenType.DOT: None,
 366        TokenType.DCOLON: lambda self, this, to: self.expression(
 367            exp.Cast,
 368            this=this,
 369            to=to,
 370        ),
 371        TokenType.ARROW: lambda self, this, path: self.expression(
 372            exp.JSONExtract,
 373            this=this,
 374            expression=path,
 375        ),
 376        TokenType.DARROW: lambda self, this, path: self.expression(
 377            exp.JSONExtractScalar,
 378            this=this,
 379            expression=path,
 380        ),
 381        TokenType.HASH_ARROW: lambda self, this, path: self.expression(
 382            exp.JSONBExtract,
 383            this=this,
 384            expression=path,
 385        ),
 386        TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
 387            exp.JSONBExtractScalar,
 388            this=this,
 389            expression=path,
 390        ),
 391        TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
 392            exp.JSONBContains,
 393            this=this,
 394            expression=key,
 395        ),
 396    }
 397
 398    EXPRESSION_PARSERS = {
 399        exp.Column: lambda self: self._parse_column(),
 400        exp.DataType: lambda self: self._parse_types(),
 401        exp.From: lambda self: self._parse_from(),
 402        exp.Group: lambda self: self._parse_group(),
 403        exp.Identifier: lambda self: self._parse_id_var(),
 404        exp.Lateral: lambda self: self._parse_lateral(),
 405        exp.Join: lambda self: self._parse_join(),
 406        exp.Order: lambda self: self._parse_order(),
 407        exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
 408        exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
 409        exp.Lambda: lambda self: self._parse_lambda(),
 410        exp.Limit: lambda self: self._parse_limit(),
 411        exp.Offset: lambda self: self._parse_offset(),
 412        exp.TableAlias: lambda self: self._parse_table_alias(),
 413        exp.Table: lambda self: self._parse_table(),
 414        exp.Condition: lambda self: self._parse_conjunction(),
 415        exp.Expression: lambda self: self._parse_statement(),
 416        exp.Properties: lambda self: self._parse_properties(),
 417        exp.Where: lambda self: self._parse_where(),
 418        exp.Ordered: lambda self: self._parse_ordered(),
 419        exp.Having: lambda self: self._parse_having(),
 420        exp.With: lambda self: self._parse_with(),
 421        exp.Window: lambda self: self._parse_named_window(),
 422        "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(),
 423    }
 424
 425    STATEMENT_PARSERS = {
 426        TokenType.ALTER: lambda self: self._parse_alter(),
 427        TokenType.BEGIN: lambda self: self._parse_transaction(),
 428        TokenType.CACHE: lambda self: self._parse_cache(),
 429        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
 430        TokenType.CREATE: lambda self: self._parse_create(),
 431        TokenType.DELETE: lambda self: self._parse_delete(),
 432        TokenType.DESC: lambda self: self._parse_describe(),
 433        TokenType.DESCRIBE: lambda self: self._parse_describe(),
 434        TokenType.DROP: lambda self: self._parse_drop(),
 435        TokenType.END: lambda self: self._parse_commit_or_rollback(),
 436        TokenType.INSERT: lambda self: self._parse_insert(),
 437        TokenType.LOAD_DATA: lambda self: self._parse_load_data(),
 438        TokenType.MERGE: lambda self: self._parse_merge(),
 439        TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
 440        TokenType.UNCACHE: lambda self: self._parse_uncache(),
 441        TokenType.UPDATE: lambda self: self._parse_update(),
 442        TokenType.USE: lambda self: self.expression(
 443            exp.Use,
 444            kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"))
 445            and exp.Var(this=self._prev.text),
 446            this=self._parse_table(schema=False),
 447        ),
 448    }
 449
 450    UNARY_PARSERS = {
 451        TokenType.PLUS: lambda self: self._parse_unary(),  # Unary + is handled as a no-op
 452        TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()),
 453        TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()),
 454        TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()),
 455    }
 456
 457    PRIMARY_PARSERS = {
 458        TokenType.STRING: lambda self, token: self.expression(
 459            exp.Literal, this=token.text, is_string=True
 460        ),
 461        TokenType.NUMBER: lambda self, token: self.expression(
 462            exp.Literal, this=token.text, is_string=False
 463        ),
 464        TokenType.STAR: lambda self, _: self.expression(
 465            exp.Star,
 466            **{"except": self._parse_except(), "replace": self._parse_replace()},
 467        ),
 468        TokenType.NULL: lambda self, _: self.expression(exp.Null),
 469        TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True),
 470        TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False),
 471        TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text),
 472        TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text),
 473        TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text),
 474        TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
 475        TokenType.NATIONAL: lambda self, token: self._parse_national(token),
 476        TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
 477    }
 478
 479    PLACEHOLDER_PARSERS = {
 480        TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
 481        TokenType.PARAMETER: lambda self: self._parse_parameter(),
 482        TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text)
 483        if self._match_set((TokenType.NUMBER, TokenType.VAR))
 484        else None,
 485    }
 486
 487    RANGE_PARSERS = {
 488        TokenType.BETWEEN: lambda self, this: self._parse_between(this),
 489        TokenType.GLOB: lambda self, this: self._parse_escape(
 490            self.expression(exp.Glob, this=this, expression=self._parse_bitwise())
 491        ),
 492        TokenType.IN: lambda self, this: self._parse_in(this),
 493        TokenType.IS: lambda self, this: self._parse_is(this),
 494        TokenType.LIKE: lambda self, this: self._parse_escape(
 495            self.expression(exp.Like, this=this, expression=self._parse_bitwise())
 496        ),
 497        TokenType.ILIKE: lambda self, this: self._parse_escape(
 498            self.expression(exp.ILike, this=this, expression=self._parse_bitwise())
 499        ),
 500        TokenType.IRLIKE: lambda self, this: self.expression(
 501            exp.RegexpILike, this=this, expression=self._parse_bitwise()
 502        ),
 503        TokenType.RLIKE: lambda self, this: self.expression(
 504            exp.RegexpLike, this=this, expression=self._parse_bitwise()
 505        ),
 506        TokenType.SIMILAR_TO: lambda self, this: self.expression(
 507            exp.SimilarTo, this=this, expression=self._parse_bitwise()
 508        ),
 509    }
 510
 511    PROPERTY_PARSERS = {
 512        "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
 513        "CHARACTER SET": lambda self: self._parse_character_set(),
 514        "CLUSTER BY": lambda self: self.expression(
 515            exp.Cluster, expressions=self._parse_csv(self._parse_ordered)
 516        ),
 517        "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
 518        "PARTITION BY": lambda self: self._parse_partitioned_by(),
 519        "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
 520        "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
 521        "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
 522        "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 523        "DISTKEY": lambda self: self._parse_distkey(),
 524        "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
 525        "SORTKEY": lambda self: self._parse_sortkey(),
 526        "LIKE": lambda self: self._parse_create_like(),
 527        "RETURNS": lambda self: self._parse_returns(),
 528        "ROW": lambda self: self._parse_row(),
 529        "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty),
 530        "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 531        "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty),
 532        "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty),
 533        "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
 534        "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
 535        "DETERMINISTIC": lambda self: self.expression(
 536            exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE")
 537        ),
 538        "IMMUTABLE": lambda self: self.expression(
 539            exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE")
 540        ),
 541        "STABLE": lambda self: self.expression(
 542            exp.VolatilityProperty, this=exp.Literal.string("STABLE")
 543        ),
 544        "VOLATILE": lambda self: self.expression(
 545            exp.VolatilityProperty, this=exp.Literal.string("VOLATILE")
 546        ),
 547        "WITH": lambda self: self._parse_with_property(),
 548        "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property),
 549        "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"),
 550        "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"),
 551        "BEFORE": lambda self: self._parse_journal(
 552            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 553        ),
 554        "JOURNAL": lambda self: self._parse_journal(
 555            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 556        ),
 557        "AFTER": lambda self: self._parse_afterjournal(
 558            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 559        ),
 560        "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True),
 561        "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False),
 562        "CHECKSUM": lambda self: self._parse_checksum(),
 563        "FREESPACE": lambda self: self._parse_freespace(),
 564        "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio(
 565            no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT"
 566        ),
 567        "MIN": lambda self: self._parse_datablocksize(),
 568        "MINIMUM": lambda self: self._parse_datablocksize(),
 569        "MAX": lambda self: self._parse_datablocksize(),
 570        "MAXIMUM": lambda self: self._parse_datablocksize(),
 571        "DATABLOCKSIZE": lambda self: self._parse_datablocksize(
 572            default=self._prev.text.upper() == "DEFAULT"
 573        ),
 574        "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
 575        "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
 576        "DEFINER": lambda self: self._parse_definer(),
 577        "LOCK": lambda self: self._parse_locking(),
 578        "LOCKING": lambda self: self._parse_locking(),
 579    }
 580
 581    CONSTRAINT_PARSERS = {
 582        "AUTOINCREMENT": lambda self: self._parse_auto_increment(),
 583        "AUTO_INCREMENT": lambda self: self._parse_auto_increment(),
 584        "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False),
 585        "CHARACTER SET": lambda self: self.expression(
 586            exp.CharacterSetColumnConstraint, this=self._parse_var_or_string()
 587        ),
 588        "CHECK": lambda self: self.expression(
 589            exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction)
 590        ),
 591        "COLLATE": lambda self: self.expression(
 592            exp.CollateColumnConstraint, this=self._parse_var()
 593        ),
 594        "COMMENT": lambda self: self.expression(
 595            exp.CommentColumnConstraint, this=self._parse_string()
 596        ),
 597        "DEFAULT": lambda self: self.expression(
 598            exp.DefaultColumnConstraint, this=self._parse_bitwise()
 599        ),
 600        "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()),
 601        "FOREIGN KEY": lambda self: self._parse_foreign_key(),
 602        "FORMAT": lambda self: self.expression(
 603            exp.DateFormatColumnConstraint, this=self._parse_var_or_string()
 604        ),
 605        "GENERATED": lambda self: self._parse_generated_as_identity(),
 606        "IDENTITY": lambda self: self._parse_auto_increment(),
 607        "LIKE": lambda self: self._parse_create_like(),
 608        "NOT": lambda self: self._parse_not_constraint(),
 609        "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True),
 610        "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()),
 611        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 612        "TITLE": lambda self: self.expression(
 613            exp.TitleColumnConstraint, this=self._parse_var_or_string()
 614        ),
 615        "UNIQUE": lambda self: self._parse_unique(),
 616        "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint),
 617    }
 618
 619    SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"}
 620
 621    NO_PAREN_FUNCTION_PARSERS = {
 622        TokenType.CASE: lambda self: self._parse_case(),
 623        TokenType.IF: lambda self: self._parse_if(),
 624        TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()),
 625    }
 626
 627    FUNCTION_PARSERS: t.Dict[str, t.Callable] = {
 628        "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
 629        "TRY_CONVERT": lambda self: self._parse_convert(False),
 630        "EXTRACT": lambda self: self._parse_extract(),
 631        "POSITION": lambda self: self._parse_position(),
 632        "SUBSTRING": lambda self: self._parse_substring(),
 633        "TRIM": lambda self: self._parse_trim(),
 634        "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
 635        "TRY_CAST": lambda self: self._parse_cast(False),
 636        "STRING_AGG": lambda self: self._parse_string_agg(),
 637    }
 638
 639    QUERY_MODIFIER_PARSERS = {
 640        "match": lambda self: self._parse_match_recognize(),
 641        "where": lambda self: self._parse_where(),
 642        "group": lambda self: self._parse_group(),
 643        "having": lambda self: self._parse_having(),
 644        "qualify": lambda self: self._parse_qualify(),
 645        "windows": lambda self: self._parse_window_clause(),
 646        "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute),
 647        "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
 648        "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
 649        "order": lambda self: self._parse_order(),
 650        "limit": lambda self: self._parse_limit(),
 651        "offset": lambda self: self._parse_offset(),
 652        "lock": lambda self: self._parse_lock(),
 653    }
 654
 655    SHOW_PARSERS: t.Dict[str, t.Callable] = {}
 656    SET_PARSERS: t.Dict[str, t.Callable] = {}
 657
 658    MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
 659
 660    CREATABLES = {
 661        TokenType.COLUMN,
 662        TokenType.FUNCTION,
 663        TokenType.INDEX,
 664        TokenType.PROCEDURE,
 665        TokenType.SCHEMA,
 666        TokenType.TABLE,
 667        TokenType.VIEW,
 668    }
 669
 670    TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
 671
 672    INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
 673
 674    WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
 675
 676    ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY}
 677
 678    STRICT_CAST = True
 679
 680    __slots__ = (
 681        "error_level",
 682        "error_message_context",
 683        "sql",
 684        "errors",
 685        "index_offset",
 686        "unnest_column_only",
 687        "alias_post_tablesample",
 688        "max_errors",
 689        "null_ordering",
 690        "_tokens",
 691        "_index",
 692        "_curr",
 693        "_next",
 694        "_prev",
 695        "_prev_comments",
 696        "_show_trie",
 697        "_set_trie",
 698    )
 699
 700    def __init__(
 701        self,
 702        error_level: t.Optional[ErrorLevel] = None,
 703        error_message_context: int = 100,
 704        index_offset: int = 0,
 705        unnest_column_only: bool = False,
 706        alias_post_tablesample: bool = False,
 707        max_errors: int = 3,
 708        null_ordering: t.Optional[str] = None,
 709    ):
 710        self.error_level = error_level or ErrorLevel.IMMEDIATE
 711        self.error_message_context = error_message_context
 712        self.index_offset = index_offset
 713        self.unnest_column_only = unnest_column_only
 714        self.alias_post_tablesample = alias_post_tablesample
 715        self.max_errors = max_errors
 716        self.null_ordering = null_ordering
 717        self.reset()
 718
 719    def reset(self):
 720        self.sql = ""
 721        self.errors = []
 722        self._tokens = []
 723        self._index = 0
 724        self._curr = None
 725        self._next = None
 726        self._prev = None
 727        self._prev_comments = None
 728
 729    def parse(
 730        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
 731    ) -> t.List[t.Optional[exp.Expression]]:
 732        """
 733        Parses a list of tokens and returns a list of syntax trees, one tree
 734        per parsed SQL statement.
 735
 736        Args:
 737            raw_tokens: the list of tokens.
 738            sql: the original SQL string, used to produce helpful debug messages.
 739
 740        Returns:
 741            The list of syntax trees.
 742        """
 743        return self._parse(
 744            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
 745        )
 746
 747    def parse_into(
 748        self,
 749        expression_types: exp.IntoType,
 750        raw_tokens: t.List[Token],
 751        sql: t.Optional[str] = None,
 752    ) -> t.List[t.Optional[exp.Expression]]:
 753        """
 754        Parses a list of tokens into a given Expression type. If a collection of Expression
 755        types is given instead, this method will try to parse the token list into each one
 756        of them, stopping at the first for which the parsing succeeds.
 757
 758        Args:
 759            expression_types: the expression type(s) to try and parse the token list into.
 760            raw_tokens: the list of tokens.
 761            sql: the original SQL string, used to produce helpful debug messages.
 762
 763        Returns:
 764            The target Expression.
 765        """
 766        errors = []
 767        for expression_type in ensure_collection(expression_types):
 768            parser = self.EXPRESSION_PARSERS.get(expression_type)
 769            if not parser:
 770                raise TypeError(f"No parser registered for {expression_type}")
 771            try:
 772                return self._parse(parser, raw_tokens, sql)
 773            except ParseError as e:
 774                e.errors[0]["into_expression"] = expression_type
 775                errors.append(e)
 776        raise ParseError(
 777            f"Failed to parse into {expression_types}",
 778            errors=merge_errors(errors),
 779        ) from errors[-1]
 780
 781    def _parse(
 782        self,
 783        parse_method: t.Callable[[Parser], t.Optional[exp.Expression]],
 784        raw_tokens: t.List[Token],
 785        sql: t.Optional[str] = None,
 786    ) -> t.List[t.Optional[exp.Expression]]:
 787        self.reset()
 788        self.sql = sql or ""
 789        total = len(raw_tokens)
 790        chunks: t.List[t.List[Token]] = [[]]
 791
 792        for i, token in enumerate(raw_tokens):
 793            if token.token_type == TokenType.SEMICOLON:
 794                if i < total - 1:
 795                    chunks.append([])
 796            else:
 797                chunks[-1].append(token)
 798
 799        expressions = []
 800
 801        for tokens in chunks:
 802            self._index = -1
 803            self._tokens = tokens
 804            self._advance()
 805
 806            expressions.append(parse_method(self))
 807
 808            if self._index < len(self._tokens):
 809                self.raise_error("Invalid expression / Unexpected token")
 810
 811            self.check_errors()
 812
 813        return expressions
 814
 815    def check_errors(self) -> None:
 816        """
 817        Logs or raises any found errors, depending on the chosen error level setting.
 818        """
 819        if self.error_level == ErrorLevel.WARN:
 820            for error in self.errors:
 821                logger.error(str(error))
 822        elif self.error_level == ErrorLevel.RAISE and self.errors:
 823            raise ParseError(
 824                concat_messages(self.errors, self.max_errors),
 825                errors=merge_errors(self.errors),
 826            )
 827
 828    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
 829        """
 830        Appends an error in the list of recorded errors or raises it, depending on the chosen
 831        error level setting.
 832        """
 833        token = token or self._curr or self._prev or Token.string("")
 834        start = self._find_token(token)
 835        end = start + len(token.text)
 836        start_context = self.sql[max(start - self.error_message_context, 0) : start]
 837        highlight = self.sql[start:end]
 838        end_context = self.sql[end : end + self.error_message_context]
 839
 840        error = ParseError.new(
 841            f"{message}. Line {token.line}, Col: {token.col}.\n"
 842            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
 843            description=message,
 844            line=token.line,
 845            col=token.col,
 846            start_context=start_context,
 847            highlight=highlight,
 848            end_context=end_context,
 849        )
 850
 851        if self.error_level == ErrorLevel.IMMEDIATE:
 852            raise error
 853
 854        self.errors.append(error)
 855
 856    def expression(
 857        self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs
 858    ) -> exp.Expression:
 859        """
 860        Creates a new, validated Expression.
 861
 862        Args:
 863            exp_class: the expression class to instantiate.
 864            comments: an optional list of comments to attach to the expression.
 865            kwargs: the arguments to set for the expression along with their respective values.
 866
 867        Returns:
 868            The target expression.
 869        """
 870        instance = exp_class(**kwargs)
 871        if self._prev_comments:
 872            instance.comments = self._prev_comments
 873            self._prev_comments = None
 874        if comments:
 875            instance.comments = comments
 876        self.validate_expression(instance)
 877        return instance
 878
 879    def validate_expression(
 880        self, expression: exp.Expression, args: t.Optional[t.List] = None
 881    ) -> None:
 882        """
 883        Validates an already instantiated expression, making sure that all its mandatory arguments
 884        are set.
 885
 886        Args:
 887            expression: the expression to validate.
 888            args: an optional list of items that was used to instantiate the expression, if it's a Func.
 889        """
 890        if self.error_level == ErrorLevel.IGNORE:
 891            return
 892
 893        for error_message in expression.error_messages(args):
 894            self.raise_error(error_message)
 895
 896    def _find_sql(self, start: Token, end: Token) -> str:
 897        return self.sql[self._find_token(start) : self._find_token(end) + len(end.text)]
 898
 899    def _find_token(self, token: Token) -> int:
 900        line = 1
 901        col = 1
 902        index = 0
 903
 904        while line < token.line or col < token.col:
 905            if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK:
 906                line += 1
 907                col = 1
 908            else:
 909                col += 1
 910            index += 1
 911
 912        return index
 913
 914    def _advance(self, times: int = 1) -> None:
 915        self._index += times
 916        self._curr = seq_get(self._tokens, self._index)
 917        self._next = seq_get(self._tokens, self._index + 1)
 918        if self._index > 0:
 919            self._prev = self._tokens[self._index - 1]
 920            self._prev_comments = self._prev.comments
 921        else:
 922            self._prev = None
 923            self._prev_comments = None
 924
 925    def _retreat(self, index: int) -> None:
 926        self._advance(index - self._index)
 927
 928    def _parse_command(self) -> exp.Expression:
 929        return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string())
 930
 931    def _parse_statement(self) -> t.Optional[exp.Expression]:
 932        if self._curr is None:
 933            return None
 934
 935        if self._match_set(self.STATEMENT_PARSERS):
 936            return self.STATEMENT_PARSERS[self._prev.token_type](self)
 937
 938        if self._match_set(Tokenizer.COMMANDS):
 939            return self._parse_command()
 940
 941        expression = self._parse_expression()
 942        expression = self._parse_set_operations(expression) if expression else self._parse_select()
 943
 944        self._parse_query_modifiers(expression)
 945        return expression
 946
 947    def _parse_drop(self, default_kind: t.Optional[str] = None) -> t.Optional[exp.Expression]:
 948        start = self._prev
 949        temporary = self._match(TokenType.TEMPORARY)
 950        materialized = self._match(TokenType.MATERIALIZED)
 951        kind = self._match_set(self.CREATABLES) and self._prev.text
 952        if not kind:
 953            if default_kind:
 954                kind = default_kind
 955            else:
 956                return self._parse_as_command(start)
 957
 958        return self.expression(
 959            exp.Drop,
 960            exists=self._parse_exists(),
 961            this=self._parse_table(schema=True),
 962            kind=kind,
 963            temporary=temporary,
 964            materialized=materialized,
 965            cascade=self._match(TokenType.CASCADE),
 966        )
 967
 968    def _parse_exists(self, not_: bool = False) -> t.Optional[bool]:
 969        return (
 970            self._match(TokenType.IF)
 971            and (not not_ or self._match(TokenType.NOT))
 972            and self._match(TokenType.EXISTS)
 973        )
 974
 975    def _parse_create(self) -> t.Optional[exp.Expression]:
 976        start = self._prev
 977        replace = self._prev.text.upper() == "REPLACE" or self._match_pair(
 978            TokenType.OR, TokenType.REPLACE
 979        )
 980        set_ = self._match(TokenType.SET)  # Teradata
 981        multiset = self._match_text_seq("MULTISET")  # Teradata
 982        global_temporary = self._match_text_seq("GLOBAL", "TEMPORARY")  # Teradata
 983        volatile = self._match(TokenType.VOLATILE)  # Teradata
 984        temporary = self._match(TokenType.TEMPORARY)
 985        transient = self._match_text_seq("TRANSIENT")
 986        external = self._match_text_seq("EXTERNAL")
 987        unique = self._match(TokenType.UNIQUE)
 988        materialized = self._match(TokenType.MATERIALIZED)
 989
 990        if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
 991            self._match(TokenType.TABLE)
 992
 993        properties = None
 994        create_token = self._match_set(self.CREATABLES) and self._prev
 995
 996        if not create_token:
 997            properties = self._parse_properties()  # exp.Properties.Location.POST_CREATE
 998            create_token = self._match_set(self.CREATABLES) and self._prev
 999
1000            if not properties or not create_token:
1001                return self._parse_as_command(start)
1002
1003        exists = self._parse_exists(not_=True)
1004        this = None
1005        expression = None
1006        data = None
1007        statistics = None
1008        no_primary_index = None
1009        indexes = None
1010        no_schema_binding = None
1011        begin = None
1012
1013        if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1014            this = self._parse_user_defined_function(kind=create_token.token_type)
1015            properties = self._parse_properties()
1016
1017            self._match(TokenType.ALIAS)
1018            begin = self._match(TokenType.BEGIN)
1019            return_ = self._match_text_seq("RETURN")
1020            expression = self._parse_statement()
1021
1022            if return_:
1023                expression = self.expression(exp.Return, this=expression)
1024        elif create_token.token_type == TokenType.INDEX:
1025            this = self._parse_index()
1026        elif create_token.token_type in (
1027            TokenType.TABLE,
1028            TokenType.VIEW,
1029            TokenType.SCHEMA,
1030        ):
1031            table_parts = self._parse_table_parts(schema=True)
1032
1033            # exp.Properties.Location.POST_NAME
1034            if self._match(TokenType.COMMA):
1035                temp_properties = self._parse_properties(before=True)
1036                if properties and temp_properties:
1037                    properties.expressions.append(temp_properties.expressions)
1038                elif temp_properties:
1039                    properties = temp_properties
1040
1041            this = self._parse_schema(this=table_parts)
1042
1043            # exp.Properties.Location.POST_SCHEMA and POST_WITH
1044            temp_properties = self._parse_properties()
1045            if properties and temp_properties:
1046                properties.expressions.append(temp_properties.expressions)
1047            elif temp_properties:
1048                properties = temp_properties
1049
1050            self._match(TokenType.ALIAS)
1051
1052            # exp.Properties.Location.POST_ALIAS
1053            if not (
1054                self._match(TokenType.SELECT, advance=False)
1055                or self._match(TokenType.WITH, advance=False)
1056                or self._match(TokenType.L_PAREN, advance=False)
1057            ):
1058                temp_properties = self._parse_properties()
1059                if properties and temp_properties:
1060                    properties.expressions.append(temp_properties.expressions)
1061                elif temp_properties:
1062                    properties = temp_properties
1063
1064            expression = self._parse_ddl_select()
1065
1066            if create_token.token_type == TokenType.TABLE:
1067                if self._match_text_seq("WITH", "DATA"):
1068                    data = True
1069                elif self._match_text_seq("WITH", "NO", "DATA"):
1070                    data = False
1071
1072                if self._match_text_seq("AND", "STATISTICS"):
1073                    statistics = True
1074                elif self._match_text_seq("AND", "NO", "STATISTICS"):
1075                    statistics = False
1076
1077                no_primary_index = self._match_text_seq("NO", "PRIMARY", "INDEX")
1078
1079                indexes = []
1080                while True:
1081                    index = self._parse_create_table_index()
1082
1083                    # exp.Properties.Location.POST_INDEX
1084                    if self._match(TokenType.PARTITION_BY, advance=False):
1085                        temp_properties = self._parse_properties()
1086                        if properties and temp_properties:
1087                            properties.expressions.append(temp_properties.expressions)
1088                        elif temp_properties:
1089                            properties = temp_properties
1090
1091                    if not index:
1092                        break
1093                    else:
1094                        indexes.append(index)
1095            elif create_token.token_type == TokenType.VIEW:
1096                if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
1097                    no_schema_binding = True
1098
1099        return self.expression(
1100            exp.Create,
1101            this=this,
1102            kind=create_token.text,
1103            expression=expression,
1104            set=set_,
1105            multiset=multiset,
1106            global_temporary=global_temporary,
1107            volatile=volatile,
1108            exists=exists,
1109            properties=properties,
1110            temporary=temporary,
1111            transient=transient,
1112            external=external,
1113            replace=replace,
1114            unique=unique,
1115            materialized=materialized,
1116            data=data,
1117            statistics=statistics,
1118            no_primary_index=no_primary_index,
1119            indexes=indexes,
1120            no_schema_binding=no_schema_binding,
1121            begin=begin,
1122        )
1123
1124    def _parse_property_before(self) -> t.Optional[exp.Expression]:
1125        self._match(TokenType.COMMA)
1126
1127        # parsers look to _prev for no/dual/default, so need to consume first
1128        self._match_text_seq("NO")
1129        self._match_text_seq("DUAL")
1130        self._match_text_seq("DEFAULT")
1131
1132        if self.PROPERTY_PARSERS.get(self._curr.text.upper()):
1133            return self.PROPERTY_PARSERS[self._curr.text.upper()](self)
1134
1135        return None
1136
1137    def _parse_property(self) -> t.Optional[exp.Expression]:
1138        if self._match_texts(self.PROPERTY_PARSERS):
1139            return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
1140
1141        if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET):
1142            return self._parse_character_set(default=True)
1143
1144        if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY):
1145            return self._parse_sortkey(compound=True)
1146
1147        if self._match_text_seq("SQL", "SECURITY"):
1148            return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER"))
1149
1150        assignment = self._match_pair(
1151            TokenType.VAR, TokenType.EQ, advance=False
1152        ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False)
1153
1154        if assignment:
1155            key = self._parse_var_or_string()
1156            self._match(TokenType.EQ)
1157            return self.expression(exp.Property, this=key, value=self._parse_column())
1158
1159        return None
1160
1161    def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression:
1162        self._match(TokenType.EQ)
1163        self._match(TokenType.ALIAS)
1164        return self.expression(
1165            exp_class,
1166            this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
1167        )
1168
1169    def _parse_properties(self, before=None) -> t.Optional[exp.Expression]:
1170        properties = []
1171
1172        while True:
1173            if before:
1174                identified_property = self._parse_property_before()
1175            else:
1176                identified_property = self._parse_property()
1177
1178            if not identified_property:
1179                break
1180            for p in ensure_collection(identified_property):
1181                properties.append(p)
1182
1183        if properties:
1184            return self.expression(exp.Properties, expressions=properties)
1185
1186        return None
1187
1188    def _parse_fallback(self, no=False) -> exp.Expression:
1189        self._match_text_seq("FALLBACK")
1190        return self.expression(
1191            exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
1192        )
1193
1194    def _parse_with_property(
1195        self,
1196    ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]:
1197        if self._match(TokenType.L_PAREN, advance=False):
1198            return self._parse_wrapped_csv(self._parse_property)
1199
1200        if not self._next:
1201            return None
1202
1203        if self._next.text.upper() == "JOURNAL":
1204            return self._parse_withjournaltable()
1205
1206        return self._parse_withisolatedloading()
1207
1208    # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
1209    def _parse_definer(self) -> t.Optional[exp.Expression]:
1210        self._match(TokenType.EQ)
1211
1212        user = self._parse_id_var()
1213        self._match(TokenType.PARAMETER)
1214        host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
1215
1216        if not user or not host:
1217            return None
1218
1219        return exp.DefinerProperty(this=f"{user}@{host}")
1220
1221    def _parse_withjournaltable(self) -> exp.Expression:
1222        self._match_text_seq("WITH", "JOURNAL", "TABLE")
1223        self._match(TokenType.EQ)
1224        return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
1225
1226    def _parse_log(self, no=False) -> exp.Expression:
1227        self._match_text_seq("LOG")
1228        return self.expression(exp.LogProperty, no=no)
1229
1230    def _parse_journal(self, no=False, dual=False) -> exp.Expression:
1231        before = self._match_text_seq("BEFORE")
1232        self._match_text_seq("JOURNAL")
1233        return self.expression(exp.JournalProperty, no=no, dual=dual, before=before)
1234
1235    def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression:
1236        self._match_text_seq("NOT")
1237        self._match_text_seq("LOCAL")
1238        self._match_text_seq("AFTER", "JOURNAL")
1239        return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local)
1240
1241    def _parse_checksum(self) -> exp.Expression:
1242        self._match_text_seq("CHECKSUM")
1243        self._match(TokenType.EQ)
1244
1245        on = None
1246        if self._match(TokenType.ON):
1247            on = True
1248        elif self._match_text_seq("OFF"):
1249            on = False
1250        default = self._match(TokenType.DEFAULT)
1251
1252        return self.expression(
1253            exp.ChecksumProperty,
1254            on=on,
1255            default=default,
1256        )
1257
1258    def _parse_freespace(self) -> exp.Expression:
1259        self._match_text_seq("FREESPACE")
1260        self._match(TokenType.EQ)
1261        return self.expression(
1262            exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
1263        )
1264
1265    def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression:
1266        self._match_text_seq("MERGEBLOCKRATIO")
1267        if self._match(TokenType.EQ):
1268            return self.expression(
1269                exp.MergeBlockRatioProperty,
1270                this=self._parse_number(),
1271                percent=self._match(TokenType.PERCENT),
1272            )
1273        else:
1274            return self.expression(
1275                exp.MergeBlockRatioProperty,
1276                no=no,
1277                default=default,
1278            )
1279
1280    def _parse_datablocksize(self, default=None) -> exp.Expression:
1281        if default:
1282            self._match_text_seq("DATABLOCKSIZE")
1283            return self.expression(exp.DataBlocksizeProperty, default=True)
1284        elif self._match_texts(("MIN", "MINIMUM")):
1285            self._match_text_seq("DATABLOCKSIZE")
1286            return self.expression(exp.DataBlocksizeProperty, min=True)
1287        elif self._match_texts(("MAX", "MAXIMUM")):
1288            self._match_text_seq("DATABLOCKSIZE")
1289            return self.expression(exp.DataBlocksizeProperty, min=False)
1290
1291        self._match_text_seq("DATABLOCKSIZE")
1292        self._match(TokenType.EQ)
1293        size = self._parse_number()
1294        units = None
1295        if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
1296            units = self._prev.text
1297        return self.expression(exp.DataBlocksizeProperty, size=size, units=units)
1298
1299    def _parse_blockcompression(self) -> exp.Expression:
1300        self._match_text_seq("BLOCKCOMPRESSION")
1301        self._match(TokenType.EQ)
1302        always = self._match_text_seq("ALWAYS")
1303        manual = self._match_text_seq("MANUAL")
1304        never = self._match_text_seq("NEVER")
1305        default = self._match_text_seq("DEFAULT")
1306        autotemp = None
1307        if self._match_text_seq("AUTOTEMP"):
1308            autotemp = self._parse_schema()
1309
1310        return self.expression(
1311            exp.BlockCompressionProperty,
1312            always=always,
1313            manual=manual,
1314            never=never,
1315            default=default,
1316            autotemp=autotemp,
1317        )
1318
1319    def _parse_withisolatedloading(self) -> exp.Expression:
1320        self._match(TokenType.WITH)
1321        no = self._match_text_seq("NO")
1322        concurrent = self._match_text_seq("CONCURRENT")
1323        self._match_text_seq("ISOLATED", "LOADING")
1324        for_all = self._match_text_seq("FOR", "ALL")
1325        for_insert = self._match_text_seq("FOR", "INSERT")
1326        for_none = self._match_text_seq("FOR", "NONE")
1327        return self.expression(
1328            exp.IsolatedLoadingProperty,
1329            no=no,
1330            concurrent=concurrent,
1331            for_all=for_all,
1332            for_insert=for_insert,
1333            for_none=for_none,
1334        )
1335
1336    def _parse_locking(self) -> exp.Expression:
1337        if self._match(TokenType.TABLE):
1338            kind = "TABLE"
1339        elif self._match(TokenType.VIEW):
1340            kind = "VIEW"
1341        elif self._match(TokenType.ROW):
1342            kind = "ROW"
1343        elif self._match_text_seq("DATABASE"):
1344            kind = "DATABASE"
1345        else:
1346            kind = None
1347
1348        if kind in ("DATABASE", "TABLE", "VIEW"):
1349            this = self._parse_table_parts()
1350        else:
1351            this = None
1352
1353        if self._match(TokenType.FOR):
1354            for_or_in = "FOR"
1355        elif self._match(TokenType.IN):
1356            for_or_in = "IN"
1357        else:
1358            for_or_in = None
1359
1360        if self._match_text_seq("ACCESS"):
1361            lock_type = "ACCESS"
1362        elif self._match_texts(("EXCL", "EXCLUSIVE")):
1363            lock_type = "EXCLUSIVE"
1364        elif self._match_text_seq("SHARE"):
1365            lock_type = "SHARE"
1366        elif self._match_text_seq("READ"):
1367            lock_type = "READ"
1368        elif self._match_text_seq("WRITE"):
1369            lock_type = "WRITE"
1370        elif self._match_text_seq("CHECKSUM"):
1371            lock_type = "CHECKSUM"
1372        else:
1373            lock_type = None
1374
1375        override = self._match_text_seq("OVERRIDE")
1376
1377        return self.expression(
1378            exp.LockingProperty,
1379            this=this,
1380            kind=kind,
1381            for_or_in=for_or_in,
1382            lock_type=lock_type,
1383            override=override,
1384        )
1385
1386    def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]:
1387        if self._match(TokenType.PARTITION_BY):
1388            return self._parse_csv(self._parse_conjunction)
1389        return []
1390
1391    def _parse_partitioned_by(self) -> exp.Expression:
1392        self._match(TokenType.EQ)
1393        return self.expression(
1394            exp.PartitionedByProperty,
1395            this=self._parse_schema() or self._parse_bracket(self._parse_field()),
1396        )
1397
1398    def _parse_distkey(self) -> exp.Expression:
1399        return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
1400
1401    def _parse_create_like(self) -> t.Optional[exp.Expression]:
1402        table = self._parse_table(schema=True)
1403        options = []
1404        while self._match_texts(("INCLUDING", "EXCLUDING")):
1405            this = self._prev.text.upper()
1406            id_var = self._parse_id_var()
1407
1408            if not id_var:
1409                return None
1410
1411            options.append(
1412                self.expression(
1413                    exp.Property,
1414                    this=this,
1415                    value=exp.Var(this=id_var.this.upper()),
1416                )
1417            )
1418        return self.expression(exp.LikeProperty, this=table, expressions=options)
1419
1420    def _parse_sortkey(self, compound: bool = False) -> exp.Expression:
1421        return self.expression(
1422            exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound
1423        )
1424
1425    def _parse_character_set(self, default: bool = False) -> exp.Expression:
1426        self._match(TokenType.EQ)
1427        return self.expression(
1428            exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default
1429        )
1430
1431    def _parse_returns(self) -> exp.Expression:
1432        value: t.Optional[exp.Expression]
1433        is_table = self._match(TokenType.TABLE)
1434
1435        if is_table:
1436            if self._match(TokenType.LT):
1437                value = self.expression(
1438                    exp.Schema,
1439                    this="TABLE",
1440                    expressions=self._parse_csv(self._parse_struct_kwargs),
1441                )
1442                if not self._match(TokenType.GT):
1443                    self.raise_error("Expecting >")
1444            else:
1445                value = self._parse_schema(exp.Var(this="TABLE"))
1446        else:
1447            value = self._parse_types()
1448
1449        return self.expression(exp.ReturnsProperty, this=value, is_table=is_table)
1450
1451    def _parse_describe(self) -> exp.Expression:
1452        kind = self._match_set(self.CREATABLES) and self._prev.text
1453        this = self._parse_table()
1454
1455        return self.expression(exp.Describe, this=this, kind=kind)
1456
1457    def _parse_insert(self) -> exp.Expression:
1458        overwrite = self._match(TokenType.OVERWRITE)
1459        local = self._match(TokenType.LOCAL)
1460
1461        this: t.Optional[exp.Expression]
1462
1463        alternative = None
1464        if self._match_text_seq("DIRECTORY"):
1465            this = self.expression(
1466                exp.Directory,
1467                this=self._parse_var_or_string(),
1468                local=local,
1469                row_format=self._parse_row_format(match_row=True),
1470            )
1471        else:
1472            if self._match(TokenType.OR):
1473                alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text
1474
1475            self._match(TokenType.INTO)
1476            self._match(TokenType.TABLE)
1477            this = self._parse_table(schema=True)
1478
1479        return self.expression(
1480            exp.Insert,
1481            this=this,
1482            exists=self._parse_exists(),
1483            partition=self._parse_partition(),
1484            expression=self._parse_ddl_select(),
1485            overwrite=overwrite,
1486            alternative=alternative,
1487        )
1488
1489    def _parse_row(self) -> t.Optional[exp.Expression]:
1490        if not self._match(TokenType.FORMAT):
1491            return None
1492        return self._parse_row_format()
1493
1494    def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]:
1495        if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
1496            return None
1497
1498        if self._match_text_seq("SERDE"):
1499            return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string())
1500
1501        self._match_text_seq("DELIMITED")
1502
1503        kwargs = {}
1504
1505        if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
1506            kwargs["fields"] = self._parse_string()
1507            if self._match_text_seq("ESCAPED", "BY"):
1508                kwargs["escaped"] = self._parse_string()
1509        if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
1510            kwargs["collection_items"] = self._parse_string()
1511        if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
1512            kwargs["map_keys"] = self._parse_string()
1513        if self._match_text_seq("LINES", "TERMINATED", "BY"):
1514            kwargs["lines"] = self._parse_string()
1515        if self._match_text_seq("NULL", "DEFINED", "AS"):
1516            kwargs["null"] = self._parse_string()
1517
1518        return self.expression(exp.RowFormatDelimitedProperty, **kwargs)  # type: ignore
1519
1520    def _parse_load_data(self) -> exp.Expression:
1521        local = self._match(TokenType.LOCAL)
1522        self._match_text_seq("INPATH")
1523        inpath = self._parse_string()
1524        overwrite = self._match(TokenType.OVERWRITE)
1525        self._match_pair(TokenType.INTO, TokenType.TABLE)
1526
1527        return self.expression(
1528            exp.LoadData,
1529            this=self._parse_table(schema=True),
1530            local=local,
1531            overwrite=overwrite,
1532            inpath=inpath,
1533            partition=self._parse_partition(),
1534            input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
1535            serde=self._match_text_seq("SERDE") and self._parse_string(),
1536        )
1537
1538    def _parse_delete(self) -> exp.Expression:
1539        self._match(TokenType.FROM)
1540
1541        return self.expression(
1542            exp.Delete,
1543            this=self._parse_table(schema=True),
1544            using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()),
1545            where=self._parse_where(),
1546        )
1547
1548    def _parse_update(self) -> exp.Expression:
1549        return self.expression(
1550            exp.Update,
1551            **{  # type: ignore
1552                "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS),
1553                "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality),
1554                "from": self._parse_from(),
1555                "where": self._parse_where(),
1556            },
1557        )
1558
1559    def _parse_uncache(self) -> exp.Expression:
1560        if not self._match(TokenType.TABLE):
1561            self.raise_error("Expecting TABLE after UNCACHE")
1562
1563        return self.expression(
1564            exp.Uncache,
1565            exists=self._parse_exists(),
1566            this=self._parse_table(schema=True),
1567        )
1568
1569    def _parse_cache(self) -> exp.Expression:
1570        lazy = self._match(TokenType.LAZY)
1571        self._match(TokenType.TABLE)
1572        table = self._parse_table(schema=True)
1573        options = []
1574
1575        if self._match(TokenType.OPTIONS):
1576            self._match_l_paren()
1577            k = self._parse_string()
1578            self._match(TokenType.EQ)
1579            v = self._parse_string()
1580            options = [k, v]
1581            self._match_r_paren()
1582
1583        self._match(TokenType.ALIAS)
1584        return self.expression(
1585            exp.Cache,
1586            this=table,
1587            lazy=lazy,
1588            options=options,
1589            expression=self._parse_select(nested=True),
1590        )
1591
1592    def _parse_partition(self) -> t.Optional[exp.Expression]:
1593        if not self._match(TokenType.PARTITION):
1594            return None
1595
1596        return self.expression(
1597            exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction)
1598        )
1599
1600    def _parse_value(self) -> exp.Expression:
1601        if self._match(TokenType.L_PAREN):
1602            expressions = self._parse_csv(self._parse_conjunction)
1603            self._match_r_paren()
1604            return self.expression(exp.Tuple, expressions=expressions)
1605
1606        # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows.
1607        # Source: https://prestodb.io/docs/current/sql/values.html
1608        return self.expression(exp.Tuple, expressions=[self._parse_conjunction()])
1609
1610    def _parse_select(
1611        self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True
1612    ) -> t.Optional[exp.Expression]:
1613        cte = self._parse_with()
1614        if cte:
1615            this = self._parse_statement()
1616
1617            if not this:
1618                self.raise_error("Failed to parse any statement following CTE")
1619                return cte
1620
1621            if "with" in this.arg_types:
1622                this.set("with", cte)
1623            else:
1624                self.raise_error(f"{this.key} does not support CTE")
1625                this = cte
1626        elif self._match(TokenType.SELECT):
1627            comments = self._prev_comments
1628
1629            hint = self._parse_hint()
1630            all_ = self._match(TokenType.ALL)
1631            distinct = self._match(TokenType.DISTINCT)
1632
1633            if distinct:
1634                distinct = self.expression(
1635                    exp.Distinct,
1636                    on=self._parse_value() if self._match(TokenType.ON) else None,
1637                )
1638
1639            if all_ and distinct:
1640                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
1641
1642            limit = self._parse_limit(top=True)
1643            expressions = self._parse_csv(self._parse_expression)
1644
1645            this = self.expression(
1646                exp.Select,
1647                hint=hint,
1648                distinct=distinct,
1649                expressions=expressions,
1650                limit=limit,
1651            )
1652            this.comments = comments
1653
1654            into = self._parse_into()
1655            if into:
1656                this.set("into", into)
1657
1658            from_ = self._parse_from()
1659            if from_:
1660                this.set("from", from_)
1661
1662            self._parse_query_modifiers(this)
1663        elif (table or nested) and self._match(TokenType.L_PAREN):
1664            this = self._parse_table() if table else self._parse_select(nested=True)
1665            self._parse_query_modifiers(this)
1666            this = self._parse_set_operations(this)
1667            self._match_r_paren()
1668
1669            # early return so that subquery unions aren't parsed again
1670            # SELECT * FROM (SELECT 1) UNION ALL SELECT 1
1671            # Union ALL should be a property of the top select node, not the subquery
1672            return self._parse_subquery(this, parse_alias=parse_subquery_alias)
1673        elif self._match(TokenType.VALUES):
1674            this = self.expression(
1675                exp.Values,
1676                expressions=self._parse_csv(self._parse_value),
1677                alias=self._parse_table_alias(),
1678            )
1679        else:
1680            this = None
1681
1682        return self._parse_set_operations(this)
1683
1684    def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]:
1685        if not skip_with_token and not self._match(TokenType.WITH):
1686            return None
1687
1688        recursive = self._match(TokenType.RECURSIVE)
1689
1690        expressions = []
1691        while True:
1692            expressions.append(self._parse_cte())
1693
1694            if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
1695                break
1696            else:
1697                self._match(TokenType.WITH)
1698
1699        return self.expression(exp.With, expressions=expressions, recursive=recursive)
1700
1701    def _parse_cte(self) -> exp.Expression:
1702        alias = self._parse_table_alias()
1703        if not alias or not alias.this:
1704            self.raise_error("Expected CTE to have alias")
1705
1706        self._match(TokenType.ALIAS)
1707
1708        return self.expression(
1709            exp.CTE,
1710            this=self._parse_wrapped(self._parse_statement),
1711            alias=alias,
1712        )
1713
1714    def _parse_table_alias(
1715        self, alias_tokens: t.Optional[t.Collection[TokenType]] = None
1716    ) -> t.Optional[exp.Expression]:
1717        any_token = self._match(TokenType.ALIAS)
1718        alias = self._parse_id_var(
1719            any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS
1720        )
1721        index = self._index
1722
1723        if self._match(TokenType.L_PAREN):
1724            columns = self._parse_csv(self._parse_function_parameter)
1725            self._match_r_paren() if columns else self._retreat(index)
1726        else:
1727            columns = None
1728
1729        if not alias and not columns:
1730            return None
1731
1732        return self.expression(exp.TableAlias, this=alias, columns=columns)
1733
1734    def _parse_subquery(
1735        self, this: t.Optional[exp.Expression], parse_alias: bool = True
1736    ) -> exp.Expression:
1737        return self.expression(
1738            exp.Subquery,
1739            this=this,
1740            pivots=self._parse_pivots(),
1741            alias=self._parse_table_alias() if parse_alias else None,
1742        )
1743
1744    def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None:
1745        if not isinstance(this, self.MODIFIABLES):
1746            return
1747
1748        table = isinstance(this, exp.Table)
1749
1750        while True:
1751            lateral = self._parse_lateral()
1752            join = self._parse_join()
1753            comma = None if table else self._match(TokenType.COMMA)
1754            if lateral:
1755                this.append("laterals", lateral)
1756            if join:
1757                this.append("joins", join)
1758            if comma:
1759                this.args["from"].append("expressions", self._parse_table())
1760            if not (lateral or join or comma):
1761                break
1762
1763        for key, parser in self.QUERY_MODIFIER_PARSERS.items():
1764            expression = parser(self)
1765
1766            if expression:
1767                this.set(key, expression)
1768
1769    def _parse_hint(self) -> t.Optional[exp.Expression]:
1770        if self._match(TokenType.HINT):
1771            hints = self._parse_csv(self._parse_function)
1772            if not self._match_pair(TokenType.STAR, TokenType.SLASH):
1773                self.raise_error("Expected */ after HINT")
1774            return self.expression(exp.Hint, expressions=hints)
1775
1776        return None
1777
1778    def _parse_into(self) -> t.Optional[exp.Expression]:
1779        if not self._match(TokenType.INTO):
1780            return None
1781
1782        temp = self._match(TokenType.TEMPORARY)
1783        unlogged = self._match(TokenType.UNLOGGED)
1784        self._match(TokenType.TABLE)
1785
1786        return self.expression(
1787            exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged
1788        )
1789
1790    def _parse_from(self) -> t.Optional[exp.Expression]:
1791        if not self._match(TokenType.FROM):
1792            return None
1793
1794        return self.expression(
1795            exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table)
1796        )
1797
1798    def _parse_match_recognize(self) -> t.Optional[exp.Expression]:
1799        if not self._match(TokenType.MATCH_RECOGNIZE):
1800            return None
1801        self._match_l_paren()
1802
1803        partition = self._parse_partition_by()
1804        order = self._parse_order()
1805        measures = (
1806            self._parse_alias(self._parse_conjunction())
1807            if self._match_text_seq("MEASURES")
1808            else None
1809        )
1810
1811        if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
1812            rows = exp.Var(this="ONE ROW PER MATCH")
1813        elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
1814            text = "ALL ROWS PER MATCH"
1815            if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
1816                text += f" SHOW EMPTY MATCHES"
1817            elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
1818                text += f" OMIT EMPTY MATCHES"
1819            elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
1820                text += f" WITH UNMATCHED ROWS"
1821            rows = exp.Var(this=text)
1822        else:
1823            rows = None
1824
1825        if self._match_text_seq("AFTER", "MATCH", "SKIP"):
1826            text = "AFTER MATCH SKIP"
1827            if self._match_text_seq("PAST", "LAST", "ROW"):
1828                text += f" PAST LAST ROW"
1829            elif self._match_text_seq("TO", "NEXT", "ROW"):
1830                text += f" TO NEXT ROW"
1831            elif self._match_text_seq("TO", "FIRST"):
1832                text += f" TO FIRST {self._advance_any().text}"  # type: ignore
1833            elif self._match_text_seq("TO", "LAST"):
1834                text += f" TO LAST {self._advance_any().text}"  # type: ignore
1835            after = exp.Var(this=text)
1836        else:
1837            after = None
1838
1839        if self._match_text_seq("PATTERN"):
1840            self._match_l_paren()
1841
1842            if not self._curr:
1843                self.raise_error("Expecting )", self._curr)
1844
1845            paren = 1
1846            start = self._curr
1847
1848            while self._curr and paren > 0:
1849                if self._curr.token_type == TokenType.L_PAREN:
1850                    paren += 1
1851                if self._curr.token_type == TokenType.R_PAREN:
1852                    paren -= 1
1853                end = self._prev
1854                self._advance()
1855            if paren > 0:
1856                self.raise_error("Expecting )", self._curr)
1857            pattern = exp.Var(this=self._find_sql(start, end))
1858        else:
1859            pattern = None
1860
1861        define = (
1862            self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None
1863        )
1864        self._match_r_paren()
1865
1866        return self.expression(
1867            exp.MatchRecognize,
1868            partition_by=partition,
1869            order=order,
1870            measures=measures,
1871            rows=rows,
1872            after=after,
1873            pattern=pattern,
1874            define=define,
1875        )
1876
1877    def _parse_lateral(self) -> t.Optional[exp.Expression]:
1878        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY)
1879        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
1880
1881        if outer_apply or cross_apply:
1882            this = self._parse_select(table=True)
1883            view = None
1884            outer = not cross_apply
1885        elif self._match(TokenType.LATERAL):
1886            this = self._parse_select(table=True)
1887            view = self._match(TokenType.VIEW)
1888            outer = self._match(TokenType.OUTER)
1889        else:
1890            return None
1891
1892        if not this:
1893            this = self._parse_function() or self._parse_id_var(any_token=False)
1894            while self._match(TokenType.DOT):
1895                this = exp.Dot(
1896                    this=this,
1897                    expression=self._parse_function() or self._parse_id_var(any_token=False),
1898                )
1899
1900        table_alias: t.Optional[exp.Expression]
1901
1902        if view:
1903            table = self._parse_id_var(any_token=False)
1904            columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
1905            table_alias = self.expression(exp.TableAlias, this=table, columns=columns)
1906        else:
1907            table_alias = self._parse_table_alias()
1908
1909        expression = self.expression(
1910            exp.Lateral,
1911            this=this,
1912            view=view,
1913            outer=outer,
1914            alias=table_alias,
1915        )
1916
1917        if outer_apply or cross_apply:
1918            return self.expression(exp.Join, this=expression, side=None if cross_apply else "LEFT")
1919
1920        return expression
1921
1922    def _parse_join_side_and_kind(
1923        self,
1924    ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
1925        return (
1926            self._match(TokenType.NATURAL) and self._prev,
1927            self._match_set(self.JOIN_SIDES) and self._prev,
1928            self._match_set(self.JOIN_KINDS) and self._prev,
1929        )
1930
1931    def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]:
1932        natural, side, kind = self._parse_join_side_and_kind()
1933
1934        if not skip_join_token and not self._match(TokenType.JOIN):
1935            return None
1936
1937        kwargs: t.Dict[
1938            str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]]
1939        ] = {"this": self._parse_table()}
1940
1941        if natural:
1942            kwargs["natural"] = True
1943        if side:
1944            kwargs["side"] = side.text
1945        if kind:
1946            kwargs["kind"] = kind.text
1947
1948        if self._match(TokenType.ON):
1949            kwargs["on"] = self._parse_conjunction()
1950        elif self._match(TokenType.USING):
1951            kwargs["using"] = self._parse_wrapped_id_vars()
1952
1953        return self.expression(exp.Join, **kwargs)  # type: ignore
1954
1955    def _parse_index(self) -> exp.Expression:
1956        index = self._parse_id_var()
1957        self._match(TokenType.ON)
1958        self._match(TokenType.TABLE)  # hive
1959
1960        return self.expression(
1961            exp.Index,
1962            this=index,
1963            table=self.expression(exp.Table, this=self._parse_id_var()),
1964            columns=self._parse_expression(),
1965        )
1966
1967    def _parse_create_table_index(self) -> t.Optional[exp.Expression]:
1968        unique = self._match(TokenType.UNIQUE)
1969        primary = self._match_text_seq("PRIMARY")
1970        amp = self._match_text_seq("AMP")
1971        if not self._match(TokenType.INDEX):
1972            return None
1973        index = self._parse_id_var()
1974        columns = None
1975        if self._match(TokenType.L_PAREN, advance=False):
1976            columns = self._parse_wrapped_csv(self._parse_column)
1977        return self.expression(
1978            exp.Index,
1979            this=index,
1980            columns=columns,
1981            unique=unique,
1982            primary=primary,
1983            amp=amp,
1984        )
1985
1986    def _parse_table_parts(self, schema: bool = False) -> exp.Expression:
1987        catalog = None
1988        db = None
1989        table = (not schema and self._parse_function()) or self._parse_id_var(any_token=False)
1990
1991        while self._match(TokenType.DOT):
1992            if catalog:
1993                # This allows nesting the table in arbitrarily many dot expressions if needed
1994                table = self.expression(exp.Dot, this=table, expression=self._parse_id_var())
1995            else:
1996                catalog = db
1997                db = table
1998                table = self._parse_id_var()
1999
2000        if not table:
2001            self.raise_error(f"Expected table name but got {self._curr}")
2002
2003        return self.expression(
2004            exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots()
2005        )
2006
2007    def _parse_table(
2008        self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
2009    ) -> t.Optional[exp.Expression]:
2010        lateral = self._parse_lateral()
2011
2012        if lateral:
2013            return lateral
2014
2015        unnest = self._parse_unnest()
2016
2017        if unnest:
2018            return unnest
2019
2020        values = self._parse_derived_table_values()
2021
2022        if values:
2023            return values
2024
2025        subquery = self._parse_select(table=True)
2026
2027        if subquery:
2028            return subquery
2029
2030        this = self._parse_table_parts(schema=schema)
2031
2032        if schema:
2033            return self._parse_schema(this=this)
2034
2035        if self.alias_post_tablesample:
2036            table_sample = self._parse_table_sample()
2037
2038        alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
2039
2040        if alias:
2041            this.set("alias", alias)
2042
2043        if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
2044            this.set(
2045                "hints",
2046                self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)),
2047            )
2048            self._match_r_paren()
2049
2050        if not self.alias_post_tablesample:
2051            table_sample = self._parse_table_sample()
2052
2053        if table_sample:
2054            table_sample.set("this", this)
2055            this = table_sample
2056
2057        return this
2058
2059    def _parse_unnest(self) -> t.Optional[exp.Expression]:
2060        if not self._match(TokenType.UNNEST):
2061            return None
2062
2063        expressions = self._parse_wrapped_csv(self._parse_column)
2064        ordinality = bool(self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY))
2065        alias = self._parse_table_alias()
2066
2067        if alias and self.unnest_column_only:
2068            if alias.args.get("columns"):
2069                self.raise_error("Unexpected extra column alias in unnest.")
2070            alias.set("columns", [alias.this])
2071            alias.set("this", None)
2072
2073        offset = None
2074        if self._match_pair(TokenType.WITH, TokenType.OFFSET):
2075            self._match(TokenType.ALIAS)
2076            offset = self._parse_conjunction()
2077
2078        return self.expression(
2079            exp.Unnest,
2080            expressions=expressions,
2081            ordinality=ordinality,
2082            alias=alias,
2083            offset=offset,
2084        )
2085
2086    def _parse_derived_table_values(self) -> t.Optional[exp.Expression]:
2087        is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES)
2088        if not is_derived and not self._match(TokenType.VALUES):
2089            return None
2090
2091        expressions = self._parse_csv(self._parse_value)
2092
2093        if is_derived:
2094            self._match_r_paren()
2095
2096        return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias())
2097
2098    def _parse_table_sample(self) -> t.Optional[exp.Expression]:
2099        if not self._match(TokenType.TABLE_SAMPLE):
2100            return None
2101
2102        method = self._parse_var()
2103        bucket_numerator = None
2104        bucket_denominator = None
2105        bucket_field = None
2106        percent = None
2107        rows = None
2108        size = None
2109        seed = None
2110
2111        self._match_l_paren()
2112
2113        if self._match(TokenType.BUCKET):
2114            bucket_numerator = self._parse_number()
2115            self._match(TokenType.OUT_OF)
2116            bucket_denominator = bucket_denominator = self._parse_number()
2117            self._match(TokenType.ON)
2118            bucket_field = self._parse_field()
2119        else:
2120            num = self._parse_number()
2121
2122            if self._match(TokenType.PERCENT):
2123                percent = num
2124            elif self._match(TokenType.ROWS):
2125                rows = num
2126            else:
2127                size = num
2128
2129        self._match_r_paren()
2130
2131        if self._match(TokenType.SEED):
2132            seed = self._parse_wrapped(self._parse_number)
2133
2134        return self.expression(
2135            exp.TableSample,
2136            method=method,
2137            bucket_numerator=bucket_numerator,
2138            bucket_denominator=bucket_denominator,
2139            bucket_field=bucket_field,
2140            percent=percent,
2141            rows=rows,
2142            size=size,
2143            seed=seed,
2144        )
2145
2146    def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]:
2147        return list(iter(self._parse_pivot, None))
2148
2149    def _parse_pivot(self) -> t.Optional[exp.Expression]:
2150        index = self._index
2151
2152        if self._match(TokenType.PIVOT):
2153            unpivot = False
2154        elif self._match(TokenType.UNPIVOT):
2155            unpivot = True
2156        else:
2157            return None
2158
2159        expressions = []
2160        field = None
2161
2162        if not self._match(TokenType.L_PAREN):
2163            self._retreat(index)
2164            return None
2165
2166        if unpivot:
2167            expressions = self._parse_csv(self._parse_column)
2168        else:
2169            expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function()))
2170
2171        if not self._match(TokenType.FOR):
2172            self.raise_error("Expecting FOR")
2173
2174        value = self._parse_column()
2175
2176        if not self._match(TokenType.IN):
2177            self.raise_error("Expecting IN")
2178
2179        field = self._parse_in(value)
2180
2181        self._match_r_paren()
2182
2183        return self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot)
2184
2185    def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]:
2186        if not skip_where_token and not self._match(TokenType.WHERE):
2187            return None
2188
2189        return self.expression(
2190            exp.Where, comments=self._prev_comments, this=self._parse_conjunction()
2191        )
2192
2193    def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]:
2194        if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
2195            return None
2196
2197        elements = defaultdict(list)
2198
2199        while True:
2200            expressions = self._parse_csv(self._parse_conjunction)
2201            if expressions:
2202                elements["expressions"].extend(expressions)
2203
2204            grouping_sets = self._parse_grouping_sets()
2205            if grouping_sets:
2206                elements["grouping_sets"].extend(grouping_sets)
2207
2208            rollup = None
2209            cube = None
2210
2211            with_ = self._match(TokenType.WITH)
2212            if self._match(TokenType.ROLLUP):
2213                rollup = with_ or self._parse_wrapped_csv(self._parse_column)
2214                elements["rollup"].extend(ensure_list(rollup))
2215
2216            if self._match(TokenType.CUBE):
2217                cube = with_ or self._parse_wrapped_csv(self._parse_column)
2218                elements["cube"].extend(ensure_list(cube))
2219
2220            if not (expressions or grouping_sets or rollup or cube):
2221                break
2222
2223        return self.expression(exp.Group, **elements)  # type: ignore
2224
2225    def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
2226        if not self._match(TokenType.GROUPING_SETS):
2227            return None
2228
2229        return self._parse_wrapped_csv(self._parse_grouping_set)
2230
2231    def _parse_grouping_set(self) -> t.Optional[exp.Expression]:
2232        if self._match(TokenType.L_PAREN):
2233            grouping_set = self._parse_csv(self._parse_column)
2234            self._match_r_paren()
2235            return self.expression(exp.Tuple, expressions=grouping_set)
2236
2237        return self._parse_column()
2238
2239    def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]:
2240        if not skip_having_token and not self._match(TokenType.HAVING):
2241            return None
2242        return self.expression(exp.Having, this=self._parse_conjunction())
2243
2244    def _parse_qualify(self) -> t.Optional[exp.Expression]:
2245        if not self._match(TokenType.QUALIFY):
2246            return None
2247        return self.expression(exp.Qualify, this=self._parse_conjunction())
2248
2249    def _parse_order(
2250        self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
2251    ) -> t.Optional[exp.Expression]:
2252        if not skip_order_token and not self._match(TokenType.ORDER_BY):
2253            return this
2254
2255        return self.expression(
2256            exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
2257        )
2258
2259    def _parse_sort(
2260        self, token_type: TokenType, exp_class: t.Type[exp.Expression]
2261    ) -> t.Optional[exp.Expression]:
2262        if not self._match(token_type):
2263            return None
2264        return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered))
2265
2266    def _parse_ordered(self) -> exp.Expression:
2267        this = self._parse_conjunction()
2268        self._match(TokenType.ASC)
2269        is_desc = self._match(TokenType.DESC)
2270        is_nulls_first = self._match(TokenType.NULLS_FIRST)
2271        is_nulls_last = self._match(TokenType.NULLS_LAST)
2272        desc = is_desc or False
2273        asc = not desc
2274        nulls_first = is_nulls_first or False
2275        explicitly_null_ordered = is_nulls_first or is_nulls_last
2276        if (
2277            not explicitly_null_ordered
2278            and (
2279                (asc and self.null_ordering == "nulls_are_small")
2280                or (desc and self.null_ordering != "nulls_are_small")
2281            )
2282            and self.null_ordering != "nulls_are_last"
2283        ):
2284            nulls_first = True
2285
2286        return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first)
2287
2288    def _parse_limit(
2289        self, this: t.Optional[exp.Expression] = None, top: bool = False
2290    ) -> t.Optional[exp.Expression]:
2291        if self._match(TokenType.TOP if top else TokenType.LIMIT):
2292            limit_paren = self._match(TokenType.L_PAREN)
2293            limit_exp = self.expression(
2294                exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term()
2295            )
2296
2297            if limit_paren:
2298                self._match_r_paren()
2299
2300            return limit_exp
2301
2302        if self._match(TokenType.FETCH):
2303            direction = self._match_set((TokenType.FIRST, TokenType.NEXT))
2304            direction = self._prev.text if direction else "FIRST"
2305            count = self._parse_number()
2306            self._match_set((TokenType.ROW, TokenType.ROWS))
2307            self._match(TokenType.ONLY)
2308            return self.expression(exp.Fetch, direction=direction, count=count)
2309
2310        return this
2311
2312    def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2313        if not self._match_set((TokenType.OFFSET, TokenType.COMMA)):
2314            return this
2315
2316        count = self._parse_number()
2317        self._match_set((TokenType.ROW, TokenType.ROWS))
2318        return self.expression(exp.Offset, this=this, expression=count)
2319
2320    def _parse_lock(self) -> t.Optional[exp.Expression]:
2321        if self._match_text_seq("FOR", "UPDATE"):
2322            return self.expression(exp.Lock, update=True)
2323        if self._match_text_seq("FOR", "SHARE"):
2324            return self.expression(exp.Lock, update=False)
2325
2326        return None
2327
2328    def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2329        if not self._match_set(self.SET_OPERATIONS):
2330            return this
2331
2332        token_type = self._prev.token_type
2333
2334        if token_type == TokenType.UNION:
2335            expression = exp.Union
2336        elif token_type == TokenType.EXCEPT:
2337            expression = exp.Except
2338        else:
2339            expression = exp.Intersect
2340
2341        return self.expression(
2342            expression,
2343            this=this,
2344            distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
2345            expression=self._parse_set_operations(self._parse_select(nested=True)),
2346        )
2347
2348    def _parse_expression(self) -> t.Optional[exp.Expression]:
2349        return self._parse_alias(self._parse_conjunction())
2350
2351    def _parse_conjunction(self) -> t.Optional[exp.Expression]:
2352        return self._parse_tokens(self._parse_equality, self.CONJUNCTION)
2353
2354    def _parse_equality(self) -> t.Optional[exp.Expression]:
2355        return self._parse_tokens(self._parse_comparison, self.EQUALITY)
2356
2357    def _parse_comparison(self) -> t.Optional[exp.Expression]:
2358        return self._parse_tokens(self._parse_range, self.COMPARISON)
2359
2360    def _parse_range(self) -> t.Optional[exp.Expression]:
2361        this = self._parse_bitwise()
2362        negate = self._match(TokenType.NOT)
2363
2364        if self._match_set(self.RANGE_PARSERS):
2365            this = self.RANGE_PARSERS[self._prev.token_type](self, this)
2366        elif self._match(TokenType.ISNULL):
2367            this = self.expression(exp.Is, this=this, expression=exp.Null())
2368
2369        # Postgres supports ISNULL and NOTNULL for conditions.
2370        # https://blog.andreiavram.ro/postgresql-null-composite-type/
2371        if self._match(TokenType.NOTNULL):
2372            this = self.expression(exp.Is, this=this, expression=exp.Null())
2373            this = self.expression(exp.Not, this=this)
2374
2375        if negate:
2376            this = self.expression(exp.Not, this=this)
2377
2378        if self._match(TokenType.IS):
2379            this = self._parse_is(this)
2380
2381        return this
2382
2383    def _parse_is(self, this: t.Optional[exp.Expression]) -> exp.Expression:
2384        negate = self._match(TokenType.NOT)
2385        if self._match(TokenType.DISTINCT_FROM):
2386            klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ
2387            return self.expression(klass, this=this, expression=self._parse_expression())
2388
2389        this = self.expression(
2390            exp.Is,
2391            this=this,
2392            expression=self._parse_null() or self._parse_boolean(),
2393        )
2394        return self.expression(exp.Not, this=this) if negate else this
2395
2396    def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression:
2397        unnest = self._parse_unnest()
2398        if unnest:
2399            this = self.expression(exp.In, this=this, unnest=unnest)
2400        elif self._match(TokenType.L_PAREN):
2401            expressions = self._parse_csv(self._parse_select_or_expression)
2402
2403            if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable):
2404                this = self.expression(exp.In, this=this, query=expressions[0])
2405            else:
2406                this = self.expression(exp.In, this=this, expressions=expressions)
2407
2408            self._match_r_paren()
2409        else:
2410            this = self.expression(exp.In, this=this, field=self._parse_field())
2411
2412        return this
2413
2414    def _parse_between(self, this: exp.Expression) -> exp.Expression:
2415        low = self._parse_bitwise()
2416        self._match(TokenType.AND)
2417        high = self._parse_bitwise()
2418        return self.expression(exp.Between, this=this, low=low, high=high)
2419
2420    def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2421        if not self._match(TokenType.ESCAPE):
2422            return this
2423        return self.expression(exp.Escape, this=this, expression=self._parse_string())
2424
2425    def _parse_bitwise(self) -> t.Optional[exp.Expression]:
2426        this = self._parse_term()
2427
2428        while True:
2429            if self._match_set(self.BITWISE):
2430                this = self.expression(
2431                    self.BITWISE[self._prev.token_type],
2432                    this=this,
2433                    expression=self._parse_term(),
2434                )
2435            elif self._match_pair(TokenType.LT, TokenType.LT):
2436                this = self.expression(
2437                    exp.BitwiseLeftShift, this=this, expression=self._parse_term()
2438                )
2439            elif self._match_pair(TokenType.GT, TokenType.GT):
2440                this = self.expression(
2441                    exp.BitwiseRightShift, this=this, expression=self._parse_term()
2442                )
2443            else:
2444                break
2445
2446        return this
2447
2448    def _parse_term(self) -> t.Optional[exp.Expression]:
2449        return self._parse_tokens(self._parse_factor, self.TERM)
2450
2451    def _parse_factor(self) -> t.Optional[exp.Expression]:
2452        return self._parse_tokens(self._parse_unary, self.FACTOR)
2453
2454    def _parse_unary(self) -> t.Optional[exp.Expression]:
2455        if self._match_set(self.UNARY_PARSERS):
2456            return self.UNARY_PARSERS[self._prev.token_type](self)
2457        return self._parse_at_time_zone(self._parse_type())
2458
2459    def _parse_type(self) -> t.Optional[exp.Expression]:
2460        if self._match(TokenType.INTERVAL):
2461            return self.expression(exp.Interval, this=self._parse_term(), unit=self._parse_var())
2462
2463        index = self._index
2464        type_token = self._parse_types(check_func=True)
2465        this = self._parse_column()
2466
2467        if type_token:
2468            if this and not isinstance(this, exp.Star):
2469                return self.expression(exp.Cast, this=this, to=type_token)
2470            if not type_token.args.get("expressions"):
2471                self._retreat(index)
2472                return self._parse_column()
2473            return type_token
2474
2475        return this
2476
2477    def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]:
2478        index = self._index
2479
2480        prefix = self._match_text_seq("SYSUDTLIB", ".")
2481
2482        if not self._match_set(self.TYPE_TOKENS):
2483            return None
2484
2485        type_token = self._prev.token_type
2486
2487        if type_token == TokenType.PSEUDO_TYPE:
2488            return self.expression(exp.PseudoType, this=self._prev.text)
2489
2490        nested = type_token in self.NESTED_TYPE_TOKENS
2491        is_struct = type_token == TokenType.STRUCT
2492        expressions = None
2493        maybe_func = False
2494
2495        if self._match(TokenType.L_PAREN):
2496            if is_struct:
2497                expressions = self._parse_csv(self._parse_struct_kwargs)
2498            elif nested:
2499                expressions = self._parse_csv(self._parse_types)
2500            else:
2501                expressions = self._parse_csv(self._parse_conjunction)
2502
2503            if not expressions:
2504                self._retreat(index)
2505                return None
2506
2507            self._match_r_paren()
2508            maybe_func = True
2509
2510        if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2511            this = exp.DataType(
2512                this=exp.DataType.Type.ARRAY,
2513                expressions=[exp.DataType.build(type_token.value, expressions=expressions)],
2514                nested=True,
2515            )
2516
2517            while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2518                this = exp.DataType(
2519                    this=exp.DataType.Type.ARRAY,
2520                    expressions=[this],
2521                    nested=True,
2522                )
2523
2524            return this
2525
2526        if self._match(TokenType.L_BRACKET):
2527            self._retreat(index)
2528            return None
2529
2530        values: t.Optional[t.List[t.Optional[exp.Expression]]] = None
2531        if nested and self._match(TokenType.LT):
2532            if is_struct:
2533                expressions = self._parse_csv(self._parse_struct_kwargs)
2534            else:
2535                expressions = self._parse_csv(self._parse_types)
2536
2537            if not self._match(TokenType.GT):
2538                self.raise_error("Expecting >")
2539
2540            if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)):
2541                values = self._parse_csv(self._parse_conjunction)
2542                self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN))
2543
2544        value: t.Optional[exp.Expression] = None
2545        if type_token in self.TIMESTAMPS:
2546            if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ:
2547                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions)
2548            elif (
2549                self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ
2550            ):
2551                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions)
2552            elif self._match(TokenType.WITHOUT_TIME_ZONE):
2553                if type_token == TokenType.TIME:
2554                    value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions)
2555                else:
2556                    value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2557
2558            maybe_func = maybe_func and value is None
2559
2560            if value is None:
2561                value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2562        elif type_token == TokenType.INTERVAL:
2563            value = self.expression(exp.Interval, unit=self._parse_var())
2564
2565        if maybe_func and check_func:
2566            index2 = self._index
2567            peek = self._parse_string()
2568
2569            if not peek:
2570                self._retreat(index)
2571                return None
2572
2573            self._retreat(index2)
2574
2575        if value:
2576            return value
2577
2578        return exp.DataType(
2579            this=exp.DataType.Type[type_token.value.upper()],
2580            expressions=expressions,
2581            nested=nested,
2582            values=values,
2583            prefix=prefix,
2584        )
2585
2586    def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]:
2587        if self._curr and self._curr.token_type in self.TYPE_TOKENS:
2588            return self._parse_types()
2589
2590        this = self._parse_id_var()
2591        self._match(TokenType.COLON)
2592        data_type = self._parse_types()
2593
2594        if not data_type:
2595            return None
2596        return self.expression(exp.StructKwarg, this=this, expression=data_type)
2597
2598    def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2599        if not self._match(TokenType.AT_TIME_ZONE):
2600            return this
2601        return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary())
2602
2603    def _parse_column(self) -> t.Optional[exp.Expression]:
2604        this = self._parse_field()
2605        if isinstance(this, exp.Identifier):
2606            this = self.expression(exp.Column, this=this)
2607        elif not this:
2608            return self._parse_bracket(this)
2609        this = self._parse_bracket(this)
2610
2611        while self._match_set(self.COLUMN_OPERATORS):
2612            op_token = self._prev.token_type
2613            op = self.COLUMN_OPERATORS.get(op_token)
2614
2615            if op_token == TokenType.DCOLON:
2616                field = self._parse_types()
2617                if not field:
2618                    self.raise_error("Expected type")
2619            elif op:
2620                self._advance()
2621                value = self._prev.text
2622                field = (
2623                    exp.Literal.number(value)
2624                    if self._prev.token_type == TokenType.NUMBER
2625                    else exp.Literal.string(value)
2626                )
2627            else:
2628                field = self._parse_star() or self._parse_function() or self._parse_id_var()
2629
2630            if isinstance(field, exp.Func):
2631                # bigquery allows function calls like x.y.count(...)
2632                # SAFE.SUBSTR(...)
2633                # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
2634                this = self._replace_columns_with_dots(this)
2635
2636            if op:
2637                this = op(self, this, field)
2638            elif isinstance(this, exp.Column) and not this.args.get("catalog"):
2639                this = self.expression(
2640                    exp.Column,
2641                    this=field,
2642                    table=this.this,
2643                    db=this.args.get("table"),
2644                    catalog=this.args.get("db"),
2645                )
2646            else:
2647                this = self.expression(exp.Dot, this=this, expression=field)
2648            this = self._parse_bracket(this)
2649
2650        return this
2651
2652    def _parse_primary(self) -> t.Optional[exp.Expression]:
2653        if self._match_set(self.PRIMARY_PARSERS):
2654            token_type = self._prev.token_type
2655            primary = self.PRIMARY_PARSERS[token_type](self, self._prev)
2656
2657            if token_type == TokenType.STRING:
2658                expressions = [primary]
2659                while self._match(TokenType.STRING):
2660                    expressions.append(exp.Literal.string(self._prev.text))
2661                if len(expressions) > 1:
2662                    return self.expression(exp.Concat, expressions=expressions)
2663            return primary
2664
2665        if self._match_pair(TokenType.DOT, TokenType.NUMBER):
2666            return exp.Literal.number(f"0.{self._prev.text}")
2667
2668        if self._match(TokenType.L_PAREN):
2669            comments = self._prev_comments
2670            query = self._parse_select()
2671
2672            if query:
2673                expressions = [query]
2674            else:
2675                expressions = self._parse_csv(
2676                    lambda: self._parse_alias(self._parse_conjunction(), explicit=True)
2677                )
2678
2679            this = seq_get(expressions, 0)
2680            self._parse_query_modifiers(this)
2681            self._match_r_paren()
2682
2683            if isinstance(this, exp.Subqueryable):
2684                this = self._parse_set_operations(
2685                    self._parse_subquery(this=this, parse_alias=False)
2686                )
2687            elif len(expressions) > 1:
2688                this = self.expression(exp.Tuple, expressions=expressions)
2689            else:
2690                this = self.expression(exp.Paren, this=this)
2691
2692            if this and comments:
2693                this.comments = comments
2694
2695            return this
2696
2697        return None
2698
2699    def _parse_field(self, any_token: bool = False) -> t.Optional[exp.Expression]:
2700        return self._parse_primary() or self._parse_function() or self._parse_id_var(any_token)
2701
2702    def _parse_function(
2703        self, functions: t.Optional[t.Dict[str, t.Callable]] = None
2704    ) -> t.Optional[exp.Expression]:
2705        if not self._curr:
2706            return None
2707
2708        token_type = self._curr.token_type
2709
2710        if self._match_set(self.NO_PAREN_FUNCTION_PARSERS):
2711            return self.NO_PAREN_FUNCTION_PARSERS[token_type](self)
2712
2713        if not self._next or self._next.token_type != TokenType.L_PAREN:
2714            if token_type in self.NO_PAREN_FUNCTIONS:
2715                self._advance()
2716                return self.expression(self.NO_PAREN_FUNCTIONS[token_type])
2717
2718            return None
2719
2720        if token_type not in self.FUNC_TOKENS:
2721            return None
2722
2723        this = self._curr.text
2724        upper = this.upper()
2725        self._advance(2)
2726
2727        parser = self.FUNCTION_PARSERS.get(upper)
2728
2729        if parser:
2730            this = parser(self)
2731        else:
2732            subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
2733
2734            if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH):
2735                this = self.expression(subquery_predicate, this=self._parse_select())
2736                self._match_r_paren()
2737                return this
2738
2739            if functions is None:
2740                functions = self.FUNCTIONS
2741
2742            function = functions.get(upper)
2743            args = self._parse_csv(self._parse_lambda)
2744
2745            if function:
2746                # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the
2747                # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists.
2748                if count_params(function) == 2:
2749                    params = None
2750                    if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN):
2751                        params = self._parse_csv(self._parse_lambda)
2752
2753                    this = function(args, params)
2754                else:
2755                    this = function(args)
2756
2757                self.validate_expression(this, args)
2758            else:
2759                this = self.expression(exp.Anonymous, this=this, expressions=args)
2760
2761        self._match_r_paren(this)
2762        return self._parse_window(this)
2763
2764    def _parse_function_parameter(self) -> t.Optional[exp.Expression]:
2765        return self._parse_column_def(self._parse_id_var())
2766
2767    def _parse_user_defined_function(
2768        self, kind: t.Optional[TokenType] = None
2769    ) -> t.Optional[exp.Expression]:
2770        this = self._parse_id_var()
2771
2772        while self._match(TokenType.DOT):
2773            this = self.expression(exp.Dot, this=this, expression=self._parse_id_var())
2774
2775        if not self._match(TokenType.L_PAREN):
2776            return this
2777
2778        expressions = self._parse_csv(self._parse_function_parameter)
2779        self._match_r_paren()
2780        return self.expression(
2781            exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True
2782        )
2783
2784    def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]:
2785        literal = self._parse_primary()
2786        if literal:
2787            return self.expression(exp.Introducer, this=token.text, expression=literal)
2788
2789        return self.expression(exp.Identifier, this=token.text)
2790
2791    def _parse_national(self, token: Token) -> exp.Expression:
2792        return self.expression(exp.National, this=exp.Literal.string(token.text))
2793
2794    def _parse_session_parameter(self) -> exp.Expression:
2795        kind = None
2796        this = self._parse_id_var() or self._parse_primary()
2797
2798        if this and self._match(TokenType.DOT):
2799            kind = this.name
2800            this = self._parse_var() or self._parse_primary()
2801
2802        return self.expression(exp.SessionParameter, this=this, kind=kind)
2803
2804    def _parse_lambda(self) -> t.Optional[exp.Expression]:
2805        index = self._index
2806
2807        if self._match(TokenType.L_PAREN):
2808            expressions = self._parse_csv(self._parse_id_var)
2809
2810            if not self._match(TokenType.R_PAREN):
2811                self._retreat(index)
2812        else:
2813            expressions = [self._parse_id_var()]
2814
2815        if self._match_set(self.LAMBDAS):
2816            return self.LAMBDAS[self._prev.token_type](self, expressions)
2817
2818        self._retreat(index)
2819
2820        this: t.Optional[exp.Expression]
2821
2822        if self._match(TokenType.DISTINCT):
2823            this = self.expression(
2824                exp.Distinct, expressions=self._parse_csv(self._parse_conjunction)
2825            )
2826        else:
2827            this = self._parse_select_or_expression()
2828
2829        if self._match(TokenType.IGNORE_NULLS):
2830            this = self.expression(exp.IgnoreNulls, this=this)
2831        else:
2832            self._match(TokenType.RESPECT_NULLS)
2833
2834        return self._parse_limit(self._parse_order(this))
2835
2836    def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2837        index = self._index
2838        if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT):
2839            self._retreat(index)
2840            return this
2841
2842        args = self._parse_csv(
2843            lambda: self._parse_constraint()
2844            or self._parse_column_def(self._parse_field(any_token=True))
2845        )
2846        self._match_r_paren()
2847        return self.expression(exp.Schema, this=this, expressions=args)
2848
2849    def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2850        kind = self._parse_types()
2851
2852        if self._match_text_seq("FOR", "ORDINALITY"):
2853            return self.expression(exp.ColumnDef, this=this, ordinality=True)
2854
2855        constraints = []
2856        while True:
2857            constraint = self._parse_column_constraint()
2858            if not constraint:
2859                break
2860            constraints.append(constraint)
2861
2862        if not kind and not constraints:
2863            return this
2864
2865        return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints)
2866
2867    def _parse_auto_increment(self) -> exp.Expression:
2868        start = None
2869        increment = None
2870
2871        if self._match(TokenType.L_PAREN, advance=False):
2872            args = self._parse_wrapped_csv(self._parse_bitwise)
2873            start = seq_get(args, 0)
2874            increment = seq_get(args, 1)
2875        elif self._match_text_seq("START"):
2876            start = self._parse_bitwise()
2877            self._match_text_seq("INCREMENT")
2878            increment = self._parse_bitwise()
2879
2880        if start and increment:
2881            return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment)
2882
2883        return exp.AutoIncrementColumnConstraint()
2884
2885    def _parse_generated_as_identity(self) -> exp.Expression:
2886        if self._match(TokenType.BY_DEFAULT):
2887            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False)
2888        else:
2889            self._match_text_seq("ALWAYS")
2890            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True)
2891
2892        self._match_text_seq("AS", "IDENTITY")
2893        if self._match(TokenType.L_PAREN):
2894            if self._match_text_seq("START", "WITH"):
2895                this.set("start", self._parse_bitwise())
2896            if self._match_text_seq("INCREMENT", "BY"):
2897                this.set("increment", self._parse_bitwise())
2898            if self._match_text_seq("MINVALUE"):
2899                this.set("minvalue", self._parse_bitwise())
2900            if self._match_text_seq("MAXVALUE"):
2901                this.set("maxvalue", self._parse_bitwise())
2902
2903            if self._match_text_seq("CYCLE"):
2904                this.set("cycle", True)
2905            elif self._match_text_seq("NO", "CYCLE"):
2906                this.set("cycle", False)
2907
2908            self._match_r_paren()
2909
2910        return this
2911
2912    def _parse_not_constraint(self) -> t.Optional[exp.Expression]:
2913        if self._match_text_seq("NULL"):
2914            return self.expression(exp.NotNullColumnConstraint)
2915        if self._match_text_seq("CASESPECIFIC"):
2916            return self.expression(exp.CaseSpecificColumnConstraint, not_=True)
2917        return None
2918
2919    def _parse_column_constraint(self) -> t.Optional[exp.Expression]:
2920        this = self._parse_references()
2921        if this:
2922            return this
2923
2924        if self._match(TokenType.CONSTRAINT):
2925            this = self._parse_id_var()
2926
2927        if self._match_texts(self.CONSTRAINT_PARSERS):
2928            return self.expression(
2929                exp.ColumnConstraint,
2930                this=this,
2931                kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self),
2932            )
2933
2934        return this
2935
2936    def _parse_constraint(self) -> t.Optional[exp.Expression]:
2937        if not self._match(TokenType.CONSTRAINT):
2938            return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS)
2939
2940        this = self._parse_id_var()
2941        expressions = []
2942
2943        while True:
2944            constraint = self._parse_unnamed_constraint() or self._parse_function()
2945            if not constraint:
2946                break
2947            expressions.append(constraint)
2948
2949        return self.expression(exp.Constraint, this=this, expressions=expressions)
2950
2951    def _parse_unnamed_constraint(
2952        self, constraints: t.Optional[t.Collection[str]] = None
2953    ) -> t.Optional[exp.Expression]:
2954        if not self._match_texts(constraints or self.CONSTRAINT_PARSERS):
2955            return None
2956
2957        constraint = self._prev.text.upper()
2958        if constraint not in self.CONSTRAINT_PARSERS:
2959            self.raise_error(f"No parser found for schema constraint {constraint}.")
2960
2961        return self.CONSTRAINT_PARSERS[constraint](self)
2962
2963    def _parse_unique(self) -> exp.Expression:
2964        if not self._match(TokenType.L_PAREN, advance=False):
2965            return self.expression(exp.UniqueColumnConstraint)
2966        return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars())
2967
2968    def _parse_key_constraint_options(self) -> t.List[str]:
2969        options = []
2970        while True:
2971            if not self._curr:
2972                break
2973
2974            if self._match(TokenType.ON):
2975                action = None
2976                on = self._advance_any() and self._prev.text
2977
2978                if self._match(TokenType.NO_ACTION):
2979                    action = "NO ACTION"
2980                elif self._match(TokenType.CASCADE):
2981                    action = "CASCADE"
2982                elif self._match_pair(TokenType.SET, TokenType.NULL):
2983                    action = "SET NULL"
2984                elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
2985                    action = "SET DEFAULT"
2986                else:
2987                    self.raise_error("Invalid key constraint")
2988
2989                options.append(f"ON {on} {action}")
2990            elif self._match_text_seq("NOT", "ENFORCED"):
2991                options.append("NOT ENFORCED")
2992            elif self._match_text_seq("DEFERRABLE"):
2993                options.append("DEFERRABLE")
2994            elif self._match_text_seq("INITIALLY", "DEFERRED"):
2995                options.append("INITIALLY DEFERRED")
2996            elif self._match_text_seq("NORELY"):
2997                options.append("NORELY")
2998            elif self._match_text_seq("MATCH", "FULL"):
2999                options.append("MATCH FULL")
3000            else:
3001                break
3002
3003        return options
3004
3005    def _parse_references(self) -> t.Optional[exp.Expression]:
3006        if not self._match(TokenType.REFERENCES):
3007            return None
3008
3009        expressions = None
3010        this = self._parse_id_var()
3011
3012        if self._match(TokenType.L_PAREN, advance=False):
3013            expressions = self._parse_wrapped_id_vars()
3014
3015        options = self._parse_key_constraint_options()
3016        return self.expression(exp.Reference, this=this, expressions=expressions, options=options)
3017
3018    def _parse_foreign_key(self) -> exp.Expression:
3019        expressions = self._parse_wrapped_id_vars()
3020        reference = self._parse_references()
3021        options = {}
3022
3023        while self._match(TokenType.ON):
3024            if not self._match_set((TokenType.DELETE, TokenType.UPDATE)):
3025                self.raise_error("Expected DELETE or UPDATE")
3026
3027            kind = self._prev.text.lower()
3028
3029            if self._match(TokenType.NO_ACTION):
3030                action = "NO ACTION"
3031            elif self._match(TokenType.SET):
3032                self._match_set((TokenType.NULL, TokenType.DEFAULT))
3033                action = "SET " + self._prev.text.upper()
3034            else:
3035                self._advance()
3036                action = self._prev.text.upper()
3037
3038            options[kind] = action
3039
3040        return self.expression(
3041            exp.ForeignKey, expressions=expressions, reference=reference, **options  # type: ignore
3042        )
3043
3044    def _parse_primary_key(self) -> exp.Expression:
3045        desc = (
3046            self._match_set((TokenType.ASC, TokenType.DESC))
3047            and self._prev.token_type == TokenType.DESC
3048        )
3049
3050        if not self._match(TokenType.L_PAREN, advance=False):
3051            return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc)
3052
3053        expressions = self._parse_wrapped_id_vars()
3054        options = self._parse_key_constraint_options()
3055        return self.expression(exp.PrimaryKey, expressions=expressions, options=options)
3056
3057    def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3058        if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)):
3059            return this
3060
3061        bracket_kind = self._prev.token_type
3062        expressions: t.List[t.Optional[exp.Expression]]
3063
3064        if self._match(TokenType.COLON):
3065            expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())]
3066        else:
3067            expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction()))
3068
3069        # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
3070        if bracket_kind == TokenType.L_BRACE:
3071            this = self.expression(exp.Struct, expressions=expressions)
3072        elif not this or this.name.upper() == "ARRAY":
3073            this = self.expression(exp.Array, expressions=expressions)
3074        else:
3075            expressions = apply_index_offset(expressions, -self.index_offset)
3076            this = self.expression(exp.Bracket, this=this, expressions=expressions)
3077
3078        if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET:
3079            self.raise_error("Expected ]")
3080        elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE:
3081            self.raise_error("Expected }")
3082
3083        this.comments = self._prev_comments
3084        return self._parse_bracket(this)
3085
3086    def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3087        if self._match(TokenType.COLON):
3088            return self.expression(exp.Slice, this=this, expression=self._parse_conjunction())
3089        return this
3090
3091    def _parse_case(self) -> t.Optional[exp.Expression]:
3092        ifs = []
3093        default = None
3094
3095        expression = self._parse_conjunction()
3096
3097        while self._match(TokenType.WHEN):
3098            this = self._parse_conjunction()
3099            self._match(TokenType.THEN)
3100            then = self._parse_conjunction()
3101            ifs.append(self.expression(exp.If, this=this, true=then))
3102
3103        if self._match(TokenType.ELSE):
3104            default = self._parse_conjunction()
3105
3106        if not self._match(TokenType.END):
3107            self.raise_error("Expected END after CASE", self._prev)
3108
3109        return self._parse_window(
3110            self.expression(exp.Case, this=expression, ifs=ifs, default=default)
3111        )
3112
3113    def _parse_if(self) -> t.Optional[exp.Expression]:
3114        if self._match(TokenType.L_PAREN):
3115            args = self._parse_csv(self._parse_conjunction)
3116            this = exp.If.from_arg_list(args)
3117            self.validate_expression(this, args)
3118            self._match_r_paren()
3119        else:
3120            condition = self._parse_conjunction()
3121            self._match(TokenType.THEN)
3122            true = self._parse_conjunction()
3123            false = self._parse_conjunction() if self._match(TokenType.ELSE) else None
3124            self._match(TokenType.END)
3125            this = self.expression(exp.If, this=condition, true=true, false=false)
3126
3127        return self._parse_window(this)
3128
3129    def _parse_extract(self) -> exp.Expression:
3130        this = self._parse_function() or self._parse_var() or self._parse_type()
3131
3132        if self._match(TokenType.FROM):
3133            return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3134
3135        if not self._match(TokenType.COMMA):
3136            self.raise_error("Expected FROM or comma after EXTRACT", self._prev)
3137
3138        return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3139
3140    def _parse_cast(self, strict: bool) -> exp.Expression:
3141        this = self._parse_conjunction()
3142
3143        if not self._match(TokenType.ALIAS):
3144            self.raise_error("Expected AS after CAST")
3145
3146        to = self._parse_types()
3147
3148        if not to:
3149            self.raise_error("Expected TYPE after CAST")
3150        elif to.this == exp.DataType.Type.CHAR:
3151            if self._match(TokenType.CHARACTER_SET):
3152                to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())
3153
3154        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3155
3156    def _parse_string_agg(self) -> exp.Expression:
3157        expression: t.Optional[exp.Expression]
3158
3159        if self._match(TokenType.DISTINCT):
3160            args = self._parse_csv(self._parse_conjunction)
3161            expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)])
3162        else:
3163            args = self._parse_csv(self._parse_conjunction)
3164            expression = seq_get(args, 0)
3165
3166        index = self._index
3167        if not self._match(TokenType.R_PAREN):
3168            # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]])
3169            order = self._parse_order(this=expression)
3170            return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3171
3172        # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]).
3173        # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that
3174        # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them.
3175        if not self._match(TokenType.WITHIN_GROUP):
3176            self._retreat(index)
3177            this = exp.GroupConcat.from_arg_list(args)
3178            self.validate_expression(this, args)
3179            return this
3180
3181        self._match_l_paren()  # The corresponding match_r_paren will be called in parse_function (caller)
3182        order = self._parse_order(this=expression)
3183        return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3184
3185    def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]:
3186        to: t.Optional[exp.Expression]
3187        this = self._parse_column()
3188
3189        if self._match(TokenType.USING):
3190            to = self.expression(exp.CharacterSet, this=self._parse_var())
3191        elif self._match(TokenType.COMMA):
3192            to = self._parse_types()
3193        else:
3194            to = None
3195
3196        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3197
3198    def _parse_position(self, haystack_first: bool = False) -> exp.Expression:
3199        args = self._parse_csv(self._parse_bitwise)
3200
3201        if self._match(TokenType.IN):
3202            return self.expression(
3203                exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0)
3204            )
3205
3206        if haystack_first:
3207            haystack = seq_get(args, 0)
3208            needle = seq_get(args, 1)
3209        else:
3210            needle = seq_get(args, 0)
3211            haystack = seq_get(args, 1)
3212
3213        this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2))
3214
3215        self.validate_expression(this, args)
3216
3217        return this
3218
3219    def _parse_join_hint(self, func_name: str) -> exp.Expression:
3220        args = self._parse_csv(self._parse_table)
3221        return exp.JoinHint(this=func_name.upper(), expressions=args)
3222
3223    def _parse_substring(self) -> exp.Expression:
3224        # Postgres supports the form: substring(string [from int] [for int])
3225        # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
3226
3227        args = self._parse_csv(self._parse_bitwise)
3228
3229        if self._match(TokenType.FROM):
3230            args.append(self._parse_bitwise())
3231            if self._match(TokenType.FOR):
3232                args.append(self._parse_bitwise())
3233
3234        this = exp.Substring.from_arg_list(args)
3235        self.validate_expression(this, args)
3236
3237        return this
3238
3239    def _parse_trim(self) -> exp.Expression:
3240        # https://www.w3resource.com/sql/character-functions/trim.php
3241        # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html
3242
3243        position = None
3244        collation = None
3245
3246        if self._match_set(self.TRIM_TYPES):
3247            position = self._prev.text.upper()
3248
3249        expression = self._parse_term()
3250        if self._match_set((TokenType.FROM, TokenType.COMMA)):
3251            this = self._parse_term()
3252        else:
3253            this = expression
3254            expression = None
3255
3256        if self._match(TokenType.COLLATE):
3257            collation = self._parse_term()
3258
3259        return self.expression(
3260            exp.Trim,
3261            this=this,
3262            position=position,
3263            expression=expression,
3264            collation=collation,
3265        )
3266
3267    def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3268        return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window)
3269
3270    def _parse_named_window(self) -> t.Optional[exp.Expression]:
3271        return self._parse_window(self._parse_id_var(), alias=True)
3272
3273    def _parse_window(
3274        self, this: t.Optional[exp.Expression], alias: bool = False
3275    ) -> t.Optional[exp.Expression]:
3276        if self._match(TokenType.FILTER):
3277            where = self._parse_wrapped(self._parse_where)
3278            this = self.expression(exp.Filter, this=this, expression=where)
3279
3280        # T-SQL allows the OVER (...) syntax after WITHIN GROUP.
3281        # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
3282        if self._match(TokenType.WITHIN_GROUP):
3283            order = self._parse_wrapped(self._parse_order)
3284            this = self.expression(exp.WithinGroup, this=this, expression=order)
3285
3286        # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
3287        # Some dialects choose to implement and some do not.
3288        # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
3289
3290        # There is some code above in _parse_lambda that handles
3291        #   SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ...
3292
3293        # The below changes handle
3294        #   SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ...
3295
3296        # Oracle allows both formats
3297        #   (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html)
3298        #   and Snowflake chose to do the same for familiarity
3299        #   https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes
3300        if self._match(TokenType.IGNORE_NULLS):
3301            this = self.expression(exp.IgnoreNulls, this=this)
3302        elif self._match(TokenType.RESPECT_NULLS):
3303            this = self.expression(exp.RespectNulls, this=this)
3304
3305        # bigquery select from window x AS (partition by ...)
3306        if alias:
3307            self._match(TokenType.ALIAS)
3308        elif not self._match(TokenType.OVER):
3309            return this
3310
3311        if not self._match(TokenType.L_PAREN):
3312            return self.expression(exp.Window, this=this, alias=self._parse_id_var(False))
3313
3314        window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
3315        partition = self._parse_partition_by()
3316        order = self._parse_order()
3317        kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text
3318
3319        if kind:
3320            self._match(TokenType.BETWEEN)
3321            start = self._parse_window_spec()
3322            self._match(TokenType.AND)
3323            end = self._parse_window_spec()
3324
3325            spec = self.expression(
3326                exp.WindowSpec,
3327                kind=kind,
3328                start=start["value"],
3329                start_side=start["side"],
3330                end=end["value"],
3331                end_side=end["side"],
3332            )
3333        else:
3334            spec = None
3335
3336        self._match_r_paren()
3337
3338        return self.expression(
3339            exp.Window,
3340            this=this,
3341            partition_by=partition,
3342            order=order,
3343            spec=spec,
3344            alias=window_alias,
3345        )
3346
3347    def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]:
3348        self._match(TokenType.BETWEEN)
3349
3350        return {
3351            "value": (
3352                self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text
3353            )
3354            or self._parse_bitwise(),
3355            "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text,
3356        }
3357
3358    def _parse_alias(
3359        self, this: t.Optional[exp.Expression], explicit: bool = False
3360    ) -> t.Optional[exp.Expression]:
3361        any_token = self._match(TokenType.ALIAS)
3362
3363        if explicit and not any_token:
3364            return this
3365
3366        if self._match(TokenType.L_PAREN):
3367            aliases = self.expression(
3368                exp.Aliases,
3369                this=this,
3370                expressions=self._parse_csv(lambda: self._parse_id_var(any_token)),
3371            )
3372            self._match_r_paren(aliases)
3373            return aliases
3374
3375        alias = self._parse_id_var(any_token)
3376
3377        if alias:
3378            return self.expression(exp.Alias, this=this, alias=alias)
3379
3380        return this
3381
3382    def _parse_id_var(
3383        self,
3384        any_token: bool = True,
3385        tokens: t.Optional[t.Collection[TokenType]] = None,
3386        prefix_tokens: t.Optional[t.Collection[TokenType]] = None,
3387    ) -> t.Optional[exp.Expression]:
3388        identifier = self._parse_identifier()
3389
3390        if identifier:
3391            return identifier
3392
3393        prefix = ""
3394
3395        if prefix_tokens:
3396            while self._match_set(prefix_tokens):
3397                prefix += self._prev.text
3398
3399        if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS):
3400            quoted = self._prev.token_type == TokenType.STRING
3401            return exp.Identifier(this=prefix + self._prev.text, quoted=quoted)
3402
3403        return None
3404
3405    def _parse_string(self) -> t.Optional[exp.Expression]:
3406        if self._match(TokenType.STRING):
3407            return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev)
3408        return self._parse_placeholder()
3409
3410    def _parse_number(self) -> t.Optional[exp.Expression]:
3411        if self._match(TokenType.NUMBER):
3412            return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev)
3413        return self._parse_placeholder()
3414
3415    def _parse_identifier(self) -> t.Optional[exp.Expression]:
3416        if self._match(TokenType.IDENTIFIER):
3417            return self.expression(exp.Identifier, this=self._prev.text, quoted=True)
3418        return self._parse_placeholder()
3419
3420    def _parse_var(self, any_token: bool = False) -> t.Optional[exp.Expression]:
3421        if (any_token and self._advance_any()) or self._match(TokenType.VAR):
3422            return self.expression(exp.Var, this=self._prev.text)
3423        return self._parse_placeholder()
3424
3425    def _advance_any(self) -> t.Optional[Token]:
3426        if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS:
3427            self._advance()
3428            return self._prev
3429        return None
3430
3431    def _parse_var_or_string(self) -> t.Optional[exp.Expression]:
3432        return self._parse_var() or self._parse_string()
3433
3434    def _parse_null(self) -> t.Optional[exp.Expression]:
3435        if self._match(TokenType.NULL):
3436            return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev)
3437        return None
3438
3439    def _parse_boolean(self) -> t.Optional[exp.Expression]:
3440        if self._match(TokenType.TRUE):
3441            return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev)
3442        if self._match(TokenType.FALSE):
3443            return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev)
3444        return None
3445
3446    def _parse_star(self) -> t.Optional[exp.Expression]:
3447        if self._match(TokenType.STAR):
3448            return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev)
3449        return None
3450
3451    def _parse_parameter(self) -> exp.Expression:
3452        wrapped = self._match(TokenType.L_BRACE)
3453        this = self._parse_var() or self._parse_primary()
3454        self._match(TokenType.R_BRACE)
3455        return self.expression(exp.Parameter, this=this, wrapped=wrapped)
3456
3457    def _parse_placeholder(self) -> t.Optional[exp.Expression]:
3458        if self._match_set(self.PLACEHOLDER_PARSERS):
3459            placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self)
3460            if placeholder:
3461                return placeholder
3462            self._advance(-1)
3463        return None
3464
3465    def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3466        if not self._match(TokenType.EXCEPT):
3467            return None
3468        if self._match(TokenType.L_PAREN, advance=False):
3469            return self._parse_wrapped_csv(self._parse_column)
3470        return self._parse_csv(self._parse_column)
3471
3472    def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3473        if not self._match(TokenType.REPLACE):
3474            return None
3475        if self._match(TokenType.L_PAREN, advance=False):
3476            return self._parse_wrapped_csv(self._parse_expression)
3477        return self._parse_csv(self._parse_expression)
3478
3479    def _parse_csv(
3480        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
3481    ) -> t.List[t.Optional[exp.Expression]]:
3482        parse_result = parse_method()
3483        items = [parse_result] if parse_result is not None else []
3484
3485        while self._match(sep):
3486            if parse_result and self._prev_comments:
3487                parse_result.comments = self._prev_comments
3488
3489            parse_result = parse_method()
3490            if parse_result is not None:
3491                items.append(parse_result)
3492
3493        return items
3494
3495    def _parse_tokens(
3496        self, parse_method: t.Callable, expressions: t.Dict
3497    ) -> t.Optional[exp.Expression]:
3498        this = parse_method()
3499
3500        while self._match_set(expressions):
3501            this = self.expression(
3502                expressions[self._prev.token_type],
3503                this=this,
3504                comments=self._prev_comments,
3505                expression=parse_method(),
3506            )
3507
3508        return this
3509
3510    def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]:
3511        return self._parse_wrapped_csv(self._parse_id_var)
3512
3513    def _parse_wrapped_csv(
3514        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
3515    ) -> t.List[t.Optional[exp.Expression]]:
3516        return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep))
3517
3518    def _parse_wrapped(self, parse_method: t.Callable) -> t.Any:
3519        self._match_l_paren()
3520        parse_result = parse_method()
3521        self._match_r_paren()
3522        return parse_result
3523
3524    def _parse_select_or_expression(self) -> t.Optional[exp.Expression]:
3525        return self._parse_select() or self._parse_expression()
3526
3527    def _parse_ddl_select(self) -> t.Optional[exp.Expression]:
3528        return self._parse_set_operations(
3529            self._parse_select(nested=True, parse_subquery_alias=False)
3530        )
3531
3532    def _parse_transaction(self) -> exp.Expression:
3533        this = None
3534        if self._match_texts(self.TRANSACTION_KIND):
3535            this = self._prev.text
3536
3537        self._match_texts({"TRANSACTION", "WORK"})
3538
3539        modes = []
3540        while True:
3541            mode = []
3542            while self._match(TokenType.VAR):
3543                mode.append(self._prev.text)
3544
3545            if mode:
3546                modes.append(" ".join(mode))
3547            if not self._match(TokenType.COMMA):
3548                break
3549
3550        return self.expression(exp.Transaction, this=this, modes=modes)
3551
3552    def _parse_commit_or_rollback(self) -> exp.Expression:
3553        chain = None
3554        savepoint = None
3555        is_rollback = self._prev.token_type == TokenType.ROLLBACK
3556
3557        self._match_texts({"TRANSACTION", "WORK"})
3558
3559        if self._match_text_seq("TO"):
3560            self._match_text_seq("SAVEPOINT")
3561            savepoint = self._parse_id_var()
3562
3563        if self._match(TokenType.AND):
3564            chain = not self._match_text_seq("NO")
3565            self._match_text_seq("CHAIN")
3566
3567        if is_rollback:
3568            return self.expression(exp.Rollback, savepoint=savepoint)
3569        return self.expression(exp.Commit, chain=chain)
3570
3571    def _parse_add_column(self) -> t.Optional[exp.Expression]:
3572        if not self._match_text_seq("ADD"):
3573            return None
3574
3575        self._match(TokenType.COLUMN)
3576        exists_column = self._parse_exists(not_=True)
3577        expression = self._parse_column_def(self._parse_field(any_token=True))
3578
3579        if expression:
3580            expression.set("exists", exists_column)
3581
3582        return expression
3583
3584    def _parse_drop_column(self) -> t.Optional[exp.Expression]:
3585        return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN")
3586
3587    # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html
3588    def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression:
3589        return self.expression(
3590            exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists
3591        )
3592
3593    def _parse_add_constraint(self) -> t.Optional[exp.Expression]:
3594        this = None
3595        kind = self._prev.token_type
3596
3597        if kind == TokenType.CONSTRAINT:
3598            this = self._parse_id_var()
3599
3600            if self._match_text_seq("CHECK"):
3601                expression = self._parse_wrapped(self._parse_conjunction)
3602                enforced = self._match_text_seq("ENFORCED")
3603
3604                return self.expression(
3605                    exp.AddConstraint, this=this, expression=expression, enforced=enforced
3606                )
3607
3608        if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY):
3609            expression = self._parse_foreign_key()
3610        elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY):
3611            expression = self._parse_primary_key()
3612
3613        return self.expression(exp.AddConstraint, this=this, expression=expression)
3614
3615    def _parse_alter(self) -> t.Optional[exp.Expression]:
3616        if not self._match(TokenType.TABLE):
3617            return self._parse_as_command(self._prev)
3618
3619        exists = self._parse_exists()
3620        this = self._parse_table(schema=True)
3621
3622        actions: t.Optional[exp.Expression | t.List[t.Optional[exp.Expression]]] = None
3623
3624        index = self._index
3625        if self._match(TokenType.DELETE):
3626            actions = [self.expression(exp.Delete, where=self._parse_where())]
3627        elif self._match_text_seq("ADD"):
3628            if self._match_set(self.ADD_CONSTRAINT_TOKENS):
3629                actions = self._parse_csv(self._parse_add_constraint)
3630            else:
3631                self._retreat(index)
3632                actions = self._parse_csv(self._parse_add_column)
3633        elif self._match_text_seq("DROP"):
3634            partition_exists = self._parse_exists()
3635
3636            if self._match(TokenType.PARTITION, advance=False):
3637                actions = self._parse_csv(
3638                    lambda: self._parse_drop_partition(exists=partition_exists)
3639                )
3640            else:
3641                self._retreat(index)
3642                actions = self._parse_csv(self._parse_drop_column)
3643        elif self._match_text_seq("RENAME", "TO"):
3644            actions = self.expression(exp.RenameTable, this=self._parse_table(schema=True))
3645        elif self._match_text_seq("ALTER"):
3646            self._match(TokenType.COLUMN)
3647            column = self._parse_field(any_token=True)
3648
3649            if self._match_pair(TokenType.DROP, TokenType.DEFAULT):
3650                actions = self.expression(exp.AlterColumn, this=column, drop=True)
3651            elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
3652                actions = self.expression(
3653                    exp.AlterColumn, this=column, default=self._parse_conjunction()
3654                )
3655            else:
3656                self._match_text_seq("SET", "DATA")
3657                actions = self.expression(
3658                    exp.AlterColumn,
3659                    this=column,
3660                    dtype=self._match_text_seq("TYPE") and self._parse_types(),
3661                    collate=self._match(TokenType.COLLATE) and self._parse_term(),
3662                    using=self._match(TokenType.USING) and self._parse_conjunction(),
3663                )
3664
3665        actions = ensure_list(actions)
3666        return self.expression(exp.AlterTable, this=this, exists=exists, actions=actions)
3667
3668    def _parse_show(self) -> t.Optional[exp.Expression]:
3669        parser = self._find_parser(self.SHOW_PARSERS, self._show_trie)  # type: ignore
3670        if parser:
3671            return parser(self)
3672        self._advance()
3673        return self.expression(exp.Show, this=self._prev.text.upper())
3674
3675    def _default_parse_set_item(self) -> exp.Expression:
3676        return self.expression(
3677            exp.SetItem,
3678            this=self._parse_statement(),
3679        )
3680
3681    def _parse_set_item(self) -> t.Optional[exp.Expression]:
3682        parser = self._find_parser(self.SET_PARSERS, self._set_trie)  # type: ignore
3683        return parser(self) if parser else self._default_parse_set_item()
3684
3685    def _parse_merge(self) -> exp.Expression:
3686        self._match(TokenType.INTO)
3687        target = self._parse_table()
3688
3689        self._match(TokenType.USING)
3690        using = self._parse_table()
3691
3692        self._match(TokenType.ON)
3693        on = self._parse_conjunction()
3694
3695        whens = []
3696        while self._match(TokenType.WHEN):
3697            this = self._parse_conjunction()
3698            self._match(TokenType.THEN)
3699
3700            if self._match(TokenType.INSERT):
3701                _this = self._parse_star()
3702                if _this:
3703                    then = self.expression(exp.Insert, this=_this)
3704                else:
3705                    then = self.expression(
3706                        exp.Insert,
3707                        this=self._parse_value(),
3708                        expression=self._match(TokenType.VALUES) and self._parse_value(),
3709                    )
3710            elif self._match(TokenType.UPDATE):
3711                expressions = self._parse_star()
3712                if expressions:
3713                    then = self.expression(exp.Update, expressions=expressions)
3714                else:
3715                    then = self.expression(
3716                        exp.Update,
3717                        expressions=self._match(TokenType.SET)
3718                        and self._parse_csv(self._parse_equality),
3719                    )
3720            elif self._match(TokenType.DELETE):
3721                then = self.expression(exp.Var, this=self._prev.text)
3722
3723            whens.append(self.expression(exp.When, this=this, then=then))
3724
3725        return self.expression(
3726            exp.Merge,
3727            this=target,
3728            using=using,
3729            on=on,
3730            expressions=whens,
3731        )
3732
3733    def _parse_set(self) -> exp.Expression:
3734        return self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item))
3735
3736    def _parse_as_command(self, start: Token) -> exp.Command:
3737        while self._curr:
3738            self._advance()
3739        return exp.Command(this=self._find_sql(start, self._prev))
3740
3741    def _find_parser(
3742        self, parsers: t.Dict[str, t.Callable], trie: t.Dict
3743    ) -> t.Optional[t.Callable]:
3744        index = self._index
3745        this = []
3746        while True:
3747            # The current token might be multiple words
3748            curr = self._curr.text.upper()
3749            key = curr.split(" ")
3750            this.append(curr)
3751            self._advance()
3752            result, trie = in_trie(trie, key)
3753            if result == 0:
3754                break
3755            if result == 2:
3756                subparser = parsers[" ".join(this)]
3757                return subparser
3758        self._retreat(index)
3759        return None
3760
3761    def _match(self, token_type, advance=True):
3762        if not self._curr:
3763            return None
3764
3765        if self._curr.token_type == token_type:
3766            if advance:
3767                self._advance()
3768            return True
3769
3770        return None
3771
3772    def _match_set(self, types):
3773        if not self._curr:
3774            return None
3775
3776        if self._curr.token_type in types:
3777            self._advance()
3778            return True
3779
3780        return None
3781
3782    def _match_pair(self, token_type_a, token_type_b, advance=True):
3783        if not self._curr or not self._next:
3784            return None
3785
3786        if self._curr.token_type == token_type_a and self._next.token_type == token_type_b:
3787            if advance:
3788                self._advance(2)
3789            return True
3790
3791        return None
3792
3793    def _match_l_paren(self, expression=None):
3794        if not self._match(TokenType.L_PAREN):
3795            self.raise_error("Expecting (")
3796        if expression and self._prev_comments:
3797            expression.comments = self._prev_comments
3798
3799    def _match_r_paren(self, expression=None):
3800        if not self._match(TokenType.R_PAREN):
3801            self.raise_error("Expecting )")
3802        if expression and self._prev_comments:
3803            expression.comments = self._prev_comments
3804
3805    def _match_texts(self, texts):
3806        if self._curr and self._curr.text.upper() in texts:
3807            self._advance()
3808            return True
3809        return False
3810
3811    def _match_text_seq(self, *texts, advance=True):
3812        index = self._index
3813        for text in texts:
3814            if self._curr and self._curr.text.upper() == text:
3815                self._advance()
3816            else:
3817                self._retreat(index)
3818                return False
3819
3820        if not advance:
3821            self._retreat(index)
3822
3823        return True
3824
3825    def _replace_columns_with_dots(self, this):
3826        if isinstance(this, exp.Dot):
3827            exp.replace_children(this, self._replace_columns_with_dots)
3828        elif isinstance(this, exp.Column):
3829            exp.replace_children(this, self._replace_columns_with_dots)
3830            table = this.args.get("table")
3831            this = (
3832                self.expression(exp.Dot, this=table, expression=this.this)
3833                if table
3834                else self.expression(exp.Var, this=this.name)
3835            )
3836        elif isinstance(this, exp.Identifier):
3837            this = self.expression(exp.Var, this=this.name)
3838        return this
3839
3840    def _replace_lambda(self, node, lambda_variables):
3841        if isinstance(node, exp.Column):
3842            if node.name in lambda_variables:
3843                return node.this
3844        return node
def parse_var_map(args):
23def parse_var_map(args):
24    keys = []
25    values = []
26    for i in range(0, len(args), 2):
27        keys.append(args[i])
28        values.append(args[i + 1])
29    return exp.VarMap(
30        keys=exp.Array(expressions=keys),
31        values=exp.Array(expressions=values),
32    )
class Parser:
  43class Parser(metaclass=_Parser):
  44    """
  45    Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces
  46    a parsed syntax tree.
  47
  48    Args:
  49        error_level: the desired error level.
  50            Default: ErrorLevel.RAISE
  51        error_message_context: determines the amount of context to capture from a
  52            query string when displaying the error message (in number of characters).
  53            Default: 50.
  54        index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list.
  55            Default: 0
  56        alias_post_tablesample: If the table alias comes after tablesample.
  57            Default: False
  58        max_errors: Maximum number of error messages to include in a raised ParseError.
  59            This is only relevant if error_level is ErrorLevel.RAISE.
  60            Default: 3
  61        null_ordering: Indicates the default null ordering method to use if not explicitly set.
  62            Options are "nulls_are_small", "nulls_are_large", "nulls_are_last".
  63            Default: "nulls_are_small"
  64    """
  65
  66    FUNCTIONS: t.Dict[str, t.Callable] = {
  67        **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()},
  68        "DATE_TO_DATE_STR": lambda args: exp.Cast(
  69            this=seq_get(args, 0),
  70            to=exp.DataType(this=exp.DataType.Type.TEXT),
  71        ),
  72        "TIME_TO_TIME_STR": lambda args: exp.Cast(
  73            this=seq_get(args, 0),
  74            to=exp.DataType(this=exp.DataType.Type.TEXT),
  75        ),
  76        "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
  77            this=exp.Cast(
  78                this=seq_get(args, 0),
  79                to=exp.DataType(this=exp.DataType.Type.TEXT),
  80            ),
  81            start=exp.Literal.number(1),
  82            length=exp.Literal.number(10),
  83        ),
  84        "VAR_MAP": parse_var_map,
  85        "IFNULL": exp.Coalesce.from_arg_list,
  86    }
  87
  88    NO_PAREN_FUNCTIONS = {
  89        TokenType.CURRENT_DATE: exp.CurrentDate,
  90        TokenType.CURRENT_DATETIME: exp.CurrentDate,
  91        TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
  92    }
  93
  94    NESTED_TYPE_TOKENS = {
  95        TokenType.ARRAY,
  96        TokenType.MAP,
  97        TokenType.STRUCT,
  98        TokenType.NULLABLE,
  99    }
 100
 101    TYPE_TOKENS = {
 102        TokenType.BOOLEAN,
 103        TokenType.TINYINT,
 104        TokenType.SMALLINT,
 105        TokenType.INT,
 106        TokenType.BIGINT,
 107        TokenType.FLOAT,
 108        TokenType.DOUBLE,
 109        TokenType.CHAR,
 110        TokenType.NCHAR,
 111        TokenType.VARCHAR,
 112        TokenType.NVARCHAR,
 113        TokenType.TEXT,
 114        TokenType.MEDIUMTEXT,
 115        TokenType.LONGTEXT,
 116        TokenType.MEDIUMBLOB,
 117        TokenType.LONGBLOB,
 118        TokenType.BINARY,
 119        TokenType.VARBINARY,
 120        TokenType.JSON,
 121        TokenType.JSONB,
 122        TokenType.INTERVAL,
 123        TokenType.TIME,
 124        TokenType.TIMESTAMP,
 125        TokenType.TIMESTAMPTZ,
 126        TokenType.TIMESTAMPLTZ,
 127        TokenType.DATETIME,
 128        TokenType.DATE,
 129        TokenType.DECIMAL,
 130        TokenType.UUID,
 131        TokenType.GEOGRAPHY,
 132        TokenType.GEOMETRY,
 133        TokenType.HLLSKETCH,
 134        TokenType.HSTORE,
 135        TokenType.PSEUDO_TYPE,
 136        TokenType.SUPER,
 137        TokenType.SERIAL,
 138        TokenType.SMALLSERIAL,
 139        TokenType.BIGSERIAL,
 140        TokenType.XML,
 141        TokenType.UNIQUEIDENTIFIER,
 142        TokenType.MONEY,
 143        TokenType.SMALLMONEY,
 144        TokenType.ROWVERSION,
 145        TokenType.IMAGE,
 146        TokenType.VARIANT,
 147        TokenType.OBJECT,
 148        *NESTED_TYPE_TOKENS,
 149    }
 150
 151    SUBQUERY_PREDICATES = {
 152        TokenType.ANY: exp.Any,
 153        TokenType.ALL: exp.All,
 154        TokenType.EXISTS: exp.Exists,
 155        TokenType.SOME: exp.Any,
 156    }
 157
 158    RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT}
 159
 160    ID_VAR_TOKENS = {
 161        TokenType.VAR,
 162        TokenType.ANTI,
 163        TokenType.APPLY,
 164        TokenType.AUTO_INCREMENT,
 165        TokenType.BEGIN,
 166        TokenType.BOTH,
 167        TokenType.BUCKET,
 168        TokenType.CACHE,
 169        TokenType.CASCADE,
 170        TokenType.COLLATE,
 171        TokenType.COLUMN,
 172        TokenType.COMMAND,
 173        TokenType.COMMIT,
 174        TokenType.COMPOUND,
 175        TokenType.CONSTRAINT,
 176        TokenType.CURRENT_TIME,
 177        TokenType.DEFAULT,
 178        TokenType.DELETE,
 179        TokenType.DESCRIBE,
 180        TokenType.DIV,
 181        TokenType.END,
 182        TokenType.EXECUTE,
 183        TokenType.ESCAPE,
 184        TokenType.FALSE,
 185        TokenType.FIRST,
 186        TokenType.FILTER,
 187        TokenType.FOLLOWING,
 188        TokenType.FORMAT,
 189        TokenType.FUNCTION,
 190        TokenType.IF,
 191        TokenType.INDEX,
 192        TokenType.ISNULL,
 193        TokenType.INTERVAL,
 194        TokenType.LAZY,
 195        TokenType.LEADING,
 196        TokenType.LEFT,
 197        TokenType.LOCAL,
 198        TokenType.MATERIALIZED,
 199        TokenType.MERGE,
 200        TokenType.NATURAL,
 201        TokenType.NEXT,
 202        TokenType.OFFSET,
 203        TokenType.ONLY,
 204        TokenType.OPTIONS,
 205        TokenType.ORDINALITY,
 206        TokenType.PERCENT,
 207        TokenType.PIVOT,
 208        TokenType.PRECEDING,
 209        TokenType.RANGE,
 210        TokenType.REFERENCES,
 211        TokenType.RIGHT,
 212        TokenType.ROW,
 213        TokenType.ROWS,
 214        TokenType.SCHEMA,
 215        TokenType.SEED,
 216        TokenType.SEMI,
 217        TokenType.SET,
 218        TokenType.SHOW,
 219        TokenType.SORTKEY,
 220        TokenType.TABLE,
 221        TokenType.TEMPORARY,
 222        TokenType.TOP,
 223        TokenType.TRAILING,
 224        TokenType.TRUE,
 225        TokenType.UNBOUNDED,
 226        TokenType.UNIQUE,
 227        TokenType.UNLOGGED,
 228        TokenType.UNPIVOT,
 229        TokenType.PROCEDURE,
 230        TokenType.VIEW,
 231        TokenType.VOLATILE,
 232        TokenType.WINDOW,
 233        *SUBQUERY_PREDICATES,
 234        *TYPE_TOKENS,
 235        *NO_PAREN_FUNCTIONS,
 236    }
 237
 238    TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {
 239        TokenType.APPLY,
 240        TokenType.LEFT,
 241        TokenType.NATURAL,
 242        TokenType.OFFSET,
 243        TokenType.RIGHT,
 244        TokenType.WINDOW,
 245    }
 246
 247    UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
 248
 249    TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH}
 250
 251    FUNC_TOKENS = {
 252        TokenType.COMMAND,
 253        TokenType.CURRENT_DATE,
 254        TokenType.CURRENT_DATETIME,
 255        TokenType.CURRENT_TIMESTAMP,
 256        TokenType.CURRENT_TIME,
 257        TokenType.FILTER,
 258        TokenType.FIRST,
 259        TokenType.FORMAT,
 260        TokenType.IDENTIFIER,
 261        TokenType.INDEX,
 262        TokenType.ISNULL,
 263        TokenType.ILIKE,
 264        TokenType.LIKE,
 265        TokenType.MERGE,
 266        TokenType.OFFSET,
 267        TokenType.PRIMARY_KEY,
 268        TokenType.REPLACE,
 269        TokenType.ROW,
 270        TokenType.UNNEST,
 271        TokenType.VAR,
 272        TokenType.LEFT,
 273        TokenType.RIGHT,
 274        TokenType.DATE,
 275        TokenType.DATETIME,
 276        TokenType.TABLE,
 277        TokenType.TIMESTAMP,
 278        TokenType.TIMESTAMPTZ,
 279        TokenType.WINDOW,
 280        *TYPE_TOKENS,
 281        *SUBQUERY_PREDICATES,
 282    }
 283
 284    CONJUNCTION = {
 285        TokenType.AND: exp.And,
 286        TokenType.OR: exp.Or,
 287    }
 288
 289    EQUALITY = {
 290        TokenType.EQ: exp.EQ,
 291        TokenType.NEQ: exp.NEQ,
 292        TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
 293    }
 294
 295    COMPARISON = {
 296        TokenType.GT: exp.GT,
 297        TokenType.GTE: exp.GTE,
 298        TokenType.LT: exp.LT,
 299        TokenType.LTE: exp.LTE,
 300    }
 301
 302    BITWISE = {
 303        TokenType.AMP: exp.BitwiseAnd,
 304        TokenType.CARET: exp.BitwiseXor,
 305        TokenType.PIPE: exp.BitwiseOr,
 306        TokenType.DPIPE: exp.DPipe,
 307    }
 308
 309    TERM = {
 310        TokenType.DASH: exp.Sub,
 311        TokenType.PLUS: exp.Add,
 312        TokenType.MOD: exp.Mod,
 313        TokenType.COLLATE: exp.Collate,
 314    }
 315
 316    FACTOR = {
 317        TokenType.DIV: exp.IntDiv,
 318        TokenType.LR_ARROW: exp.Distance,
 319        TokenType.SLASH: exp.Div,
 320        TokenType.STAR: exp.Mul,
 321    }
 322
 323    TIMESTAMPS = {
 324        TokenType.TIME,
 325        TokenType.TIMESTAMP,
 326        TokenType.TIMESTAMPTZ,
 327        TokenType.TIMESTAMPLTZ,
 328    }
 329
 330    SET_OPERATIONS = {
 331        TokenType.UNION,
 332        TokenType.INTERSECT,
 333        TokenType.EXCEPT,
 334    }
 335
 336    JOIN_SIDES = {
 337        TokenType.LEFT,
 338        TokenType.RIGHT,
 339        TokenType.FULL,
 340    }
 341
 342    JOIN_KINDS = {
 343        TokenType.INNER,
 344        TokenType.OUTER,
 345        TokenType.CROSS,
 346        TokenType.SEMI,
 347        TokenType.ANTI,
 348    }
 349
 350    LAMBDAS = {
 351        TokenType.ARROW: lambda self, expressions: self.expression(
 352            exp.Lambda,
 353            this=self._parse_conjunction().transform(
 354                self._replace_lambda, {node.name for node in expressions}
 355            ),
 356            expressions=expressions,
 357        ),
 358        TokenType.FARROW: lambda self, expressions: self.expression(
 359            exp.Kwarg,
 360            this=exp.Var(this=expressions[0].name),
 361            expression=self._parse_conjunction(),
 362        ),
 363    }
 364
 365    COLUMN_OPERATORS = {
 366        TokenType.DOT: None,
 367        TokenType.DCOLON: lambda self, this, to: self.expression(
 368            exp.Cast,
 369            this=this,
 370            to=to,
 371        ),
 372        TokenType.ARROW: lambda self, this, path: self.expression(
 373            exp.JSONExtract,
 374            this=this,
 375            expression=path,
 376        ),
 377        TokenType.DARROW: lambda self, this, path: self.expression(
 378            exp.JSONExtractScalar,
 379            this=this,
 380            expression=path,
 381        ),
 382        TokenType.HASH_ARROW: lambda self, this, path: self.expression(
 383            exp.JSONBExtract,
 384            this=this,
 385            expression=path,
 386        ),
 387        TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
 388            exp.JSONBExtractScalar,
 389            this=this,
 390            expression=path,
 391        ),
 392        TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
 393            exp.JSONBContains,
 394            this=this,
 395            expression=key,
 396        ),
 397    }
 398
 399    EXPRESSION_PARSERS = {
 400        exp.Column: lambda self: self._parse_column(),
 401        exp.DataType: lambda self: self._parse_types(),
 402        exp.From: lambda self: self._parse_from(),
 403        exp.Group: lambda self: self._parse_group(),
 404        exp.Identifier: lambda self: self._parse_id_var(),
 405        exp.Lateral: lambda self: self._parse_lateral(),
 406        exp.Join: lambda self: self._parse_join(),
 407        exp.Order: lambda self: self._parse_order(),
 408        exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
 409        exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
 410        exp.Lambda: lambda self: self._parse_lambda(),
 411        exp.Limit: lambda self: self._parse_limit(),
 412        exp.Offset: lambda self: self._parse_offset(),
 413        exp.TableAlias: lambda self: self._parse_table_alias(),
 414        exp.Table: lambda self: self._parse_table(),
 415        exp.Condition: lambda self: self._parse_conjunction(),
 416        exp.Expression: lambda self: self._parse_statement(),
 417        exp.Properties: lambda self: self._parse_properties(),
 418        exp.Where: lambda self: self._parse_where(),
 419        exp.Ordered: lambda self: self._parse_ordered(),
 420        exp.Having: lambda self: self._parse_having(),
 421        exp.With: lambda self: self._parse_with(),
 422        exp.Window: lambda self: self._parse_named_window(),
 423        "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(),
 424    }
 425
 426    STATEMENT_PARSERS = {
 427        TokenType.ALTER: lambda self: self._parse_alter(),
 428        TokenType.BEGIN: lambda self: self._parse_transaction(),
 429        TokenType.CACHE: lambda self: self._parse_cache(),
 430        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
 431        TokenType.CREATE: lambda self: self._parse_create(),
 432        TokenType.DELETE: lambda self: self._parse_delete(),
 433        TokenType.DESC: lambda self: self._parse_describe(),
 434        TokenType.DESCRIBE: lambda self: self._parse_describe(),
 435        TokenType.DROP: lambda self: self._parse_drop(),
 436        TokenType.END: lambda self: self._parse_commit_or_rollback(),
 437        TokenType.INSERT: lambda self: self._parse_insert(),
 438        TokenType.LOAD_DATA: lambda self: self._parse_load_data(),
 439        TokenType.MERGE: lambda self: self._parse_merge(),
 440        TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
 441        TokenType.UNCACHE: lambda self: self._parse_uncache(),
 442        TokenType.UPDATE: lambda self: self._parse_update(),
 443        TokenType.USE: lambda self: self.expression(
 444            exp.Use,
 445            kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"))
 446            and exp.Var(this=self._prev.text),
 447            this=self._parse_table(schema=False),
 448        ),
 449    }
 450
 451    UNARY_PARSERS = {
 452        TokenType.PLUS: lambda self: self._parse_unary(),  # Unary + is handled as a no-op
 453        TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()),
 454        TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()),
 455        TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()),
 456    }
 457
 458    PRIMARY_PARSERS = {
 459        TokenType.STRING: lambda self, token: self.expression(
 460            exp.Literal, this=token.text, is_string=True
 461        ),
 462        TokenType.NUMBER: lambda self, token: self.expression(
 463            exp.Literal, this=token.text, is_string=False
 464        ),
 465        TokenType.STAR: lambda self, _: self.expression(
 466            exp.Star,
 467            **{"except": self._parse_except(), "replace": self._parse_replace()},
 468        ),
 469        TokenType.NULL: lambda self, _: self.expression(exp.Null),
 470        TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True),
 471        TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False),
 472        TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text),
 473        TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text),
 474        TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text),
 475        TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
 476        TokenType.NATIONAL: lambda self, token: self._parse_national(token),
 477        TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
 478    }
 479
 480    PLACEHOLDER_PARSERS = {
 481        TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
 482        TokenType.PARAMETER: lambda self: self._parse_parameter(),
 483        TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text)
 484        if self._match_set((TokenType.NUMBER, TokenType.VAR))
 485        else None,
 486    }
 487
 488    RANGE_PARSERS = {
 489        TokenType.BETWEEN: lambda self, this: self._parse_between(this),
 490        TokenType.GLOB: lambda self, this: self._parse_escape(
 491            self.expression(exp.Glob, this=this, expression=self._parse_bitwise())
 492        ),
 493        TokenType.IN: lambda self, this: self._parse_in(this),
 494        TokenType.IS: lambda self, this: self._parse_is(this),
 495        TokenType.LIKE: lambda self, this: self._parse_escape(
 496            self.expression(exp.Like, this=this, expression=self._parse_bitwise())
 497        ),
 498        TokenType.ILIKE: lambda self, this: self._parse_escape(
 499            self.expression(exp.ILike, this=this, expression=self._parse_bitwise())
 500        ),
 501        TokenType.IRLIKE: lambda self, this: self.expression(
 502            exp.RegexpILike, this=this, expression=self._parse_bitwise()
 503        ),
 504        TokenType.RLIKE: lambda self, this: self.expression(
 505            exp.RegexpLike, this=this, expression=self._parse_bitwise()
 506        ),
 507        TokenType.SIMILAR_TO: lambda self, this: self.expression(
 508            exp.SimilarTo, this=this, expression=self._parse_bitwise()
 509        ),
 510    }
 511
 512    PROPERTY_PARSERS = {
 513        "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
 514        "CHARACTER SET": lambda self: self._parse_character_set(),
 515        "CLUSTER BY": lambda self: self.expression(
 516            exp.Cluster, expressions=self._parse_csv(self._parse_ordered)
 517        ),
 518        "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
 519        "PARTITION BY": lambda self: self._parse_partitioned_by(),
 520        "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
 521        "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
 522        "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
 523        "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 524        "DISTKEY": lambda self: self._parse_distkey(),
 525        "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
 526        "SORTKEY": lambda self: self._parse_sortkey(),
 527        "LIKE": lambda self: self._parse_create_like(),
 528        "RETURNS": lambda self: self._parse_returns(),
 529        "ROW": lambda self: self._parse_row(),
 530        "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty),
 531        "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 532        "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty),
 533        "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty),
 534        "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
 535        "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
 536        "DETERMINISTIC": lambda self: self.expression(
 537            exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE")
 538        ),
 539        "IMMUTABLE": lambda self: self.expression(
 540            exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE")
 541        ),
 542        "STABLE": lambda self: self.expression(
 543            exp.VolatilityProperty, this=exp.Literal.string("STABLE")
 544        ),
 545        "VOLATILE": lambda self: self.expression(
 546            exp.VolatilityProperty, this=exp.Literal.string("VOLATILE")
 547        ),
 548        "WITH": lambda self: self._parse_with_property(),
 549        "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property),
 550        "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"),
 551        "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"),
 552        "BEFORE": lambda self: self._parse_journal(
 553            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 554        ),
 555        "JOURNAL": lambda self: self._parse_journal(
 556            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 557        ),
 558        "AFTER": lambda self: self._parse_afterjournal(
 559            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 560        ),
 561        "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True),
 562        "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False),
 563        "CHECKSUM": lambda self: self._parse_checksum(),
 564        "FREESPACE": lambda self: self._parse_freespace(),
 565        "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio(
 566            no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT"
 567        ),
 568        "MIN": lambda self: self._parse_datablocksize(),
 569        "MINIMUM": lambda self: self._parse_datablocksize(),
 570        "MAX": lambda self: self._parse_datablocksize(),
 571        "MAXIMUM": lambda self: self._parse_datablocksize(),
 572        "DATABLOCKSIZE": lambda self: self._parse_datablocksize(
 573            default=self._prev.text.upper() == "DEFAULT"
 574        ),
 575        "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
 576        "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
 577        "DEFINER": lambda self: self._parse_definer(),
 578        "LOCK": lambda self: self._parse_locking(),
 579        "LOCKING": lambda self: self._parse_locking(),
 580    }
 581
 582    CONSTRAINT_PARSERS = {
 583        "AUTOINCREMENT": lambda self: self._parse_auto_increment(),
 584        "AUTO_INCREMENT": lambda self: self._parse_auto_increment(),
 585        "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False),
 586        "CHARACTER SET": lambda self: self.expression(
 587            exp.CharacterSetColumnConstraint, this=self._parse_var_or_string()
 588        ),
 589        "CHECK": lambda self: self.expression(
 590            exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction)
 591        ),
 592        "COLLATE": lambda self: self.expression(
 593            exp.CollateColumnConstraint, this=self._parse_var()
 594        ),
 595        "COMMENT": lambda self: self.expression(
 596            exp.CommentColumnConstraint, this=self._parse_string()
 597        ),
 598        "DEFAULT": lambda self: self.expression(
 599            exp.DefaultColumnConstraint, this=self._parse_bitwise()
 600        ),
 601        "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()),
 602        "FOREIGN KEY": lambda self: self._parse_foreign_key(),
 603        "FORMAT": lambda self: self.expression(
 604            exp.DateFormatColumnConstraint, this=self._parse_var_or_string()
 605        ),
 606        "GENERATED": lambda self: self._parse_generated_as_identity(),
 607        "IDENTITY": lambda self: self._parse_auto_increment(),
 608        "LIKE": lambda self: self._parse_create_like(),
 609        "NOT": lambda self: self._parse_not_constraint(),
 610        "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True),
 611        "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()),
 612        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 613        "TITLE": lambda self: self.expression(
 614            exp.TitleColumnConstraint, this=self._parse_var_or_string()
 615        ),
 616        "UNIQUE": lambda self: self._parse_unique(),
 617        "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint),
 618    }
 619
 620    SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"}
 621
 622    NO_PAREN_FUNCTION_PARSERS = {
 623        TokenType.CASE: lambda self: self._parse_case(),
 624        TokenType.IF: lambda self: self._parse_if(),
 625        TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()),
 626    }
 627
 628    FUNCTION_PARSERS: t.Dict[str, t.Callable] = {
 629        "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
 630        "TRY_CONVERT": lambda self: self._parse_convert(False),
 631        "EXTRACT": lambda self: self._parse_extract(),
 632        "POSITION": lambda self: self._parse_position(),
 633        "SUBSTRING": lambda self: self._parse_substring(),
 634        "TRIM": lambda self: self._parse_trim(),
 635        "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
 636        "TRY_CAST": lambda self: self._parse_cast(False),
 637        "STRING_AGG": lambda self: self._parse_string_agg(),
 638    }
 639
 640    QUERY_MODIFIER_PARSERS = {
 641        "match": lambda self: self._parse_match_recognize(),
 642        "where": lambda self: self._parse_where(),
 643        "group": lambda self: self._parse_group(),
 644        "having": lambda self: self._parse_having(),
 645        "qualify": lambda self: self._parse_qualify(),
 646        "windows": lambda self: self._parse_window_clause(),
 647        "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute),
 648        "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
 649        "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
 650        "order": lambda self: self._parse_order(),
 651        "limit": lambda self: self._parse_limit(),
 652        "offset": lambda self: self._parse_offset(),
 653        "lock": lambda self: self._parse_lock(),
 654    }
 655
 656    SHOW_PARSERS: t.Dict[str, t.Callable] = {}
 657    SET_PARSERS: t.Dict[str, t.Callable] = {}
 658
 659    MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
 660
 661    CREATABLES = {
 662        TokenType.COLUMN,
 663        TokenType.FUNCTION,
 664        TokenType.INDEX,
 665        TokenType.PROCEDURE,
 666        TokenType.SCHEMA,
 667        TokenType.TABLE,
 668        TokenType.VIEW,
 669    }
 670
 671    TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
 672
 673    INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
 674
 675    WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
 676
 677    ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY}
 678
 679    STRICT_CAST = True
 680
 681    __slots__ = (
 682        "error_level",
 683        "error_message_context",
 684        "sql",
 685        "errors",
 686        "index_offset",
 687        "unnest_column_only",
 688        "alias_post_tablesample",
 689        "max_errors",
 690        "null_ordering",
 691        "_tokens",
 692        "_index",
 693        "_curr",
 694        "_next",
 695        "_prev",
 696        "_prev_comments",
 697        "_show_trie",
 698        "_set_trie",
 699    )
 700
 701    def __init__(
 702        self,
 703        error_level: t.Optional[ErrorLevel] = None,
 704        error_message_context: int = 100,
 705        index_offset: int = 0,
 706        unnest_column_only: bool = False,
 707        alias_post_tablesample: bool = False,
 708        max_errors: int = 3,
 709        null_ordering: t.Optional[str] = None,
 710    ):
 711        self.error_level = error_level or ErrorLevel.IMMEDIATE
 712        self.error_message_context = error_message_context
 713        self.index_offset = index_offset
 714        self.unnest_column_only = unnest_column_only
 715        self.alias_post_tablesample = alias_post_tablesample
 716        self.max_errors = max_errors
 717        self.null_ordering = null_ordering
 718        self.reset()
 719
 720    def reset(self):
 721        self.sql = ""
 722        self.errors = []
 723        self._tokens = []
 724        self._index = 0
 725        self._curr = None
 726        self._next = None
 727        self._prev = None
 728        self._prev_comments = None
 729
 730    def parse(
 731        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
 732    ) -> t.List[t.Optional[exp.Expression]]:
 733        """
 734        Parses a list of tokens and returns a list of syntax trees, one tree
 735        per parsed SQL statement.
 736
 737        Args:
 738            raw_tokens: the list of tokens.
 739            sql: the original SQL string, used to produce helpful debug messages.
 740
 741        Returns:
 742            The list of syntax trees.
 743        """
 744        return self._parse(
 745            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
 746        )
 747
 748    def parse_into(
 749        self,
 750        expression_types: exp.IntoType,
 751        raw_tokens: t.List[Token],
 752        sql: t.Optional[str] = None,
 753    ) -> t.List[t.Optional[exp.Expression]]:
 754        """
 755        Parses a list of tokens into a given Expression type. If a collection of Expression
 756        types is given instead, this method will try to parse the token list into each one
 757        of them, stopping at the first for which the parsing succeeds.
 758
 759        Args:
 760            expression_types: the expression type(s) to try and parse the token list into.
 761            raw_tokens: the list of tokens.
 762            sql: the original SQL string, used to produce helpful debug messages.
 763
 764        Returns:
 765            The target Expression.
 766        """
 767        errors = []
 768        for expression_type in ensure_collection(expression_types):
 769            parser = self.EXPRESSION_PARSERS.get(expression_type)
 770            if not parser:
 771                raise TypeError(f"No parser registered for {expression_type}")
 772            try:
 773                return self._parse(parser, raw_tokens, sql)
 774            except ParseError as e:
 775                e.errors[0]["into_expression"] = expression_type
 776                errors.append(e)
 777        raise ParseError(
 778            f"Failed to parse into {expression_types}",
 779            errors=merge_errors(errors),
 780        ) from errors[-1]
 781
 782    def _parse(
 783        self,
 784        parse_method: t.Callable[[Parser], t.Optional[exp.Expression]],
 785        raw_tokens: t.List[Token],
 786        sql: t.Optional[str] = None,
 787    ) -> t.List[t.Optional[exp.Expression]]:
 788        self.reset()
 789        self.sql = sql or ""
 790        total = len(raw_tokens)
 791        chunks: t.List[t.List[Token]] = [[]]
 792
 793        for i, token in enumerate(raw_tokens):
 794            if token.token_type == TokenType.SEMICOLON:
 795                if i < total - 1:
 796                    chunks.append([])
 797            else:
 798                chunks[-1].append(token)
 799
 800        expressions = []
 801
 802        for tokens in chunks:
 803            self._index = -1
 804            self._tokens = tokens
 805            self._advance()
 806
 807            expressions.append(parse_method(self))
 808
 809            if self._index < len(self._tokens):
 810                self.raise_error("Invalid expression / Unexpected token")
 811
 812            self.check_errors()
 813
 814        return expressions
 815
 816    def check_errors(self) -> None:
 817        """
 818        Logs or raises any found errors, depending on the chosen error level setting.
 819        """
 820        if self.error_level == ErrorLevel.WARN:
 821            for error in self.errors:
 822                logger.error(str(error))
 823        elif self.error_level == ErrorLevel.RAISE and self.errors:
 824            raise ParseError(
 825                concat_messages(self.errors, self.max_errors),
 826                errors=merge_errors(self.errors),
 827            )
 828
 829    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
 830        """
 831        Appends an error in the list of recorded errors or raises it, depending on the chosen
 832        error level setting.
 833        """
 834        token = token or self._curr or self._prev or Token.string("")
 835        start = self._find_token(token)
 836        end = start + len(token.text)
 837        start_context = self.sql[max(start - self.error_message_context, 0) : start]
 838        highlight = self.sql[start:end]
 839        end_context = self.sql[end : end + self.error_message_context]
 840
 841        error = ParseError.new(
 842            f"{message}. Line {token.line}, Col: {token.col}.\n"
 843            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
 844            description=message,
 845            line=token.line,
 846            col=token.col,
 847            start_context=start_context,
 848            highlight=highlight,
 849            end_context=end_context,
 850        )
 851
 852        if self.error_level == ErrorLevel.IMMEDIATE:
 853            raise error
 854
 855        self.errors.append(error)
 856
 857    def expression(
 858        self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs
 859    ) -> exp.Expression:
 860        """
 861        Creates a new, validated Expression.
 862
 863        Args:
 864            exp_class: the expression class to instantiate.
 865            comments: an optional list of comments to attach to the expression.
 866            kwargs: the arguments to set for the expression along with their respective values.
 867
 868        Returns:
 869            The target expression.
 870        """
 871        instance = exp_class(**kwargs)
 872        if self._prev_comments:
 873            instance.comments = self._prev_comments
 874            self._prev_comments = None
 875        if comments:
 876            instance.comments = comments
 877        self.validate_expression(instance)
 878        return instance
 879
 880    def validate_expression(
 881        self, expression: exp.Expression, args: t.Optional[t.List] = None
 882    ) -> None:
 883        """
 884        Validates an already instantiated expression, making sure that all its mandatory arguments
 885        are set.
 886
 887        Args:
 888            expression: the expression to validate.
 889            args: an optional list of items that was used to instantiate the expression, if it's a Func.
 890        """
 891        if self.error_level == ErrorLevel.IGNORE:
 892            return
 893
 894        for error_message in expression.error_messages(args):
 895            self.raise_error(error_message)
 896
 897    def _find_sql(self, start: Token, end: Token) -> str:
 898        return self.sql[self._find_token(start) : self._find_token(end) + len(end.text)]
 899
 900    def _find_token(self, token: Token) -> int:
 901        line = 1
 902        col = 1
 903        index = 0
 904
 905        while line < token.line or col < token.col:
 906            if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK:
 907                line += 1
 908                col = 1
 909            else:
 910                col += 1
 911            index += 1
 912
 913        return index
 914
 915    def _advance(self, times: int = 1) -> None:
 916        self._index += times
 917        self._curr = seq_get(self._tokens, self._index)
 918        self._next = seq_get(self._tokens, self._index + 1)
 919        if self._index > 0:
 920            self._prev = self._tokens[self._index - 1]
 921            self._prev_comments = self._prev.comments
 922        else:
 923            self._prev = None
 924            self._prev_comments = None
 925
 926    def _retreat(self, index: int) -> None:
 927        self._advance(index - self._index)
 928
 929    def _parse_command(self) -> exp.Expression:
 930        return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string())
 931
 932    def _parse_statement(self) -> t.Optional[exp.Expression]:
 933        if self._curr is None:
 934            return None
 935
 936        if self._match_set(self.STATEMENT_PARSERS):
 937            return self.STATEMENT_PARSERS[self._prev.token_type](self)
 938
 939        if self._match_set(Tokenizer.COMMANDS):
 940            return self._parse_command()
 941
 942        expression = self._parse_expression()
 943        expression = self._parse_set_operations(expression) if expression else self._parse_select()
 944
 945        self._parse_query_modifiers(expression)
 946        return expression
 947
 948    def _parse_drop(self, default_kind: t.Optional[str] = None) -> t.Optional[exp.Expression]:
 949        start = self._prev
 950        temporary = self._match(TokenType.TEMPORARY)
 951        materialized = self._match(TokenType.MATERIALIZED)
 952        kind = self._match_set(self.CREATABLES) and self._prev.text
 953        if not kind:
 954            if default_kind:
 955                kind = default_kind
 956            else:
 957                return self._parse_as_command(start)
 958
 959        return self.expression(
 960            exp.Drop,
 961            exists=self._parse_exists(),
 962            this=self._parse_table(schema=True),
 963            kind=kind,
 964            temporary=temporary,
 965            materialized=materialized,
 966            cascade=self._match(TokenType.CASCADE),
 967        )
 968
 969    def _parse_exists(self, not_: bool = False) -> t.Optional[bool]:
 970        return (
 971            self._match(TokenType.IF)
 972            and (not not_ or self._match(TokenType.NOT))
 973            and self._match(TokenType.EXISTS)
 974        )
 975
 976    def _parse_create(self) -> t.Optional[exp.Expression]:
 977        start = self._prev
 978        replace = self._prev.text.upper() == "REPLACE" or self._match_pair(
 979            TokenType.OR, TokenType.REPLACE
 980        )
 981        set_ = self._match(TokenType.SET)  # Teradata
 982        multiset = self._match_text_seq("MULTISET")  # Teradata
 983        global_temporary = self._match_text_seq("GLOBAL", "TEMPORARY")  # Teradata
 984        volatile = self._match(TokenType.VOLATILE)  # Teradata
 985        temporary = self._match(TokenType.TEMPORARY)
 986        transient = self._match_text_seq("TRANSIENT")
 987        external = self._match_text_seq("EXTERNAL")
 988        unique = self._match(TokenType.UNIQUE)
 989        materialized = self._match(TokenType.MATERIALIZED)
 990
 991        if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
 992            self._match(TokenType.TABLE)
 993
 994        properties = None
 995        create_token = self._match_set(self.CREATABLES) and self._prev
 996
 997        if not create_token:
 998            properties = self._parse_properties()  # exp.Properties.Location.POST_CREATE
 999            create_token = self._match_set(self.CREATABLES) and self._prev
1000
1001            if not properties or not create_token:
1002                return self._parse_as_command(start)
1003
1004        exists = self._parse_exists(not_=True)
1005        this = None
1006        expression = None
1007        data = None
1008        statistics = None
1009        no_primary_index = None
1010        indexes = None
1011        no_schema_binding = None
1012        begin = None
1013
1014        if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1015            this = self._parse_user_defined_function(kind=create_token.token_type)
1016            properties = self._parse_properties()
1017
1018            self._match(TokenType.ALIAS)
1019            begin = self._match(TokenType.BEGIN)
1020            return_ = self._match_text_seq("RETURN")
1021            expression = self._parse_statement()
1022
1023            if return_:
1024                expression = self.expression(exp.Return, this=expression)
1025        elif create_token.token_type == TokenType.INDEX:
1026            this = self._parse_index()
1027        elif create_token.token_type in (
1028            TokenType.TABLE,
1029            TokenType.VIEW,
1030            TokenType.SCHEMA,
1031        ):
1032            table_parts = self._parse_table_parts(schema=True)
1033
1034            # exp.Properties.Location.POST_NAME
1035            if self._match(TokenType.COMMA):
1036                temp_properties = self._parse_properties(before=True)
1037                if properties and temp_properties:
1038                    properties.expressions.append(temp_properties.expressions)
1039                elif temp_properties:
1040                    properties = temp_properties
1041
1042            this = self._parse_schema(this=table_parts)
1043
1044            # exp.Properties.Location.POST_SCHEMA and POST_WITH
1045            temp_properties = self._parse_properties()
1046            if properties and temp_properties:
1047                properties.expressions.append(temp_properties.expressions)
1048            elif temp_properties:
1049                properties = temp_properties
1050
1051            self._match(TokenType.ALIAS)
1052
1053            # exp.Properties.Location.POST_ALIAS
1054            if not (
1055                self._match(TokenType.SELECT, advance=False)
1056                or self._match(TokenType.WITH, advance=False)
1057                or self._match(TokenType.L_PAREN, advance=False)
1058            ):
1059                temp_properties = self._parse_properties()
1060                if properties and temp_properties:
1061                    properties.expressions.append(temp_properties.expressions)
1062                elif temp_properties:
1063                    properties = temp_properties
1064
1065            expression = self._parse_ddl_select()
1066
1067            if create_token.token_type == TokenType.TABLE:
1068                if self._match_text_seq("WITH", "DATA"):
1069                    data = True
1070                elif self._match_text_seq("WITH", "NO", "DATA"):
1071                    data = False
1072
1073                if self._match_text_seq("AND", "STATISTICS"):
1074                    statistics = True
1075                elif self._match_text_seq("AND", "NO", "STATISTICS"):
1076                    statistics = False
1077
1078                no_primary_index = self._match_text_seq("NO", "PRIMARY", "INDEX")
1079
1080                indexes = []
1081                while True:
1082                    index = self._parse_create_table_index()
1083
1084                    # exp.Properties.Location.POST_INDEX
1085                    if self._match(TokenType.PARTITION_BY, advance=False):
1086                        temp_properties = self._parse_properties()
1087                        if properties and temp_properties:
1088                            properties.expressions.append(temp_properties.expressions)
1089                        elif temp_properties:
1090                            properties = temp_properties
1091
1092                    if not index:
1093                        break
1094                    else:
1095                        indexes.append(index)
1096            elif create_token.token_type == TokenType.VIEW:
1097                if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
1098                    no_schema_binding = True
1099
1100        return self.expression(
1101            exp.Create,
1102            this=this,
1103            kind=create_token.text,
1104            expression=expression,
1105            set=set_,
1106            multiset=multiset,
1107            global_temporary=global_temporary,
1108            volatile=volatile,
1109            exists=exists,
1110            properties=properties,
1111            temporary=temporary,
1112            transient=transient,
1113            external=external,
1114            replace=replace,
1115            unique=unique,
1116            materialized=materialized,
1117            data=data,
1118            statistics=statistics,
1119            no_primary_index=no_primary_index,
1120            indexes=indexes,
1121            no_schema_binding=no_schema_binding,
1122            begin=begin,
1123        )
1124
1125    def _parse_property_before(self) -> t.Optional[exp.Expression]:
1126        self._match(TokenType.COMMA)
1127
1128        # parsers look to _prev for no/dual/default, so need to consume first
1129        self._match_text_seq("NO")
1130        self._match_text_seq("DUAL")
1131        self._match_text_seq("DEFAULT")
1132
1133        if self.PROPERTY_PARSERS.get(self._curr.text.upper()):
1134            return self.PROPERTY_PARSERS[self._curr.text.upper()](self)
1135
1136        return None
1137
1138    def _parse_property(self) -> t.Optional[exp.Expression]:
1139        if self._match_texts(self.PROPERTY_PARSERS):
1140            return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
1141
1142        if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET):
1143            return self._parse_character_set(default=True)
1144
1145        if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY):
1146            return self._parse_sortkey(compound=True)
1147
1148        if self._match_text_seq("SQL", "SECURITY"):
1149            return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER"))
1150
1151        assignment = self._match_pair(
1152            TokenType.VAR, TokenType.EQ, advance=False
1153        ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False)
1154
1155        if assignment:
1156            key = self._parse_var_or_string()
1157            self._match(TokenType.EQ)
1158            return self.expression(exp.Property, this=key, value=self._parse_column())
1159
1160        return None
1161
1162    def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression:
1163        self._match(TokenType.EQ)
1164        self._match(TokenType.ALIAS)
1165        return self.expression(
1166            exp_class,
1167            this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
1168        )
1169
1170    def _parse_properties(self, before=None) -> t.Optional[exp.Expression]:
1171        properties = []
1172
1173        while True:
1174            if before:
1175                identified_property = self._parse_property_before()
1176            else:
1177                identified_property = self._parse_property()
1178
1179            if not identified_property:
1180                break
1181            for p in ensure_collection(identified_property):
1182                properties.append(p)
1183
1184        if properties:
1185            return self.expression(exp.Properties, expressions=properties)
1186
1187        return None
1188
1189    def _parse_fallback(self, no=False) -> exp.Expression:
1190        self._match_text_seq("FALLBACK")
1191        return self.expression(
1192            exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
1193        )
1194
1195    def _parse_with_property(
1196        self,
1197    ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]:
1198        if self._match(TokenType.L_PAREN, advance=False):
1199            return self._parse_wrapped_csv(self._parse_property)
1200
1201        if not self._next:
1202            return None
1203
1204        if self._next.text.upper() == "JOURNAL":
1205            return self._parse_withjournaltable()
1206
1207        return self._parse_withisolatedloading()
1208
1209    # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
1210    def _parse_definer(self) -> t.Optional[exp.Expression]:
1211        self._match(TokenType.EQ)
1212
1213        user = self._parse_id_var()
1214        self._match(TokenType.PARAMETER)
1215        host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
1216
1217        if not user or not host:
1218            return None
1219
1220        return exp.DefinerProperty(this=f"{user}@{host}")
1221
1222    def _parse_withjournaltable(self) -> exp.Expression:
1223        self._match_text_seq("WITH", "JOURNAL", "TABLE")
1224        self._match(TokenType.EQ)
1225        return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
1226
1227    def _parse_log(self, no=False) -> exp.Expression:
1228        self._match_text_seq("LOG")
1229        return self.expression(exp.LogProperty, no=no)
1230
1231    def _parse_journal(self, no=False, dual=False) -> exp.Expression:
1232        before = self._match_text_seq("BEFORE")
1233        self._match_text_seq("JOURNAL")
1234        return self.expression(exp.JournalProperty, no=no, dual=dual, before=before)
1235
1236    def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression:
1237        self._match_text_seq("NOT")
1238        self._match_text_seq("LOCAL")
1239        self._match_text_seq("AFTER", "JOURNAL")
1240        return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local)
1241
1242    def _parse_checksum(self) -> exp.Expression:
1243        self._match_text_seq("CHECKSUM")
1244        self._match(TokenType.EQ)
1245
1246        on = None
1247        if self._match(TokenType.ON):
1248            on = True
1249        elif self._match_text_seq("OFF"):
1250            on = False
1251        default = self._match(TokenType.DEFAULT)
1252
1253        return self.expression(
1254            exp.ChecksumProperty,
1255            on=on,
1256            default=default,
1257        )
1258
1259    def _parse_freespace(self) -> exp.Expression:
1260        self._match_text_seq("FREESPACE")
1261        self._match(TokenType.EQ)
1262        return self.expression(
1263            exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
1264        )
1265
1266    def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression:
1267        self._match_text_seq("MERGEBLOCKRATIO")
1268        if self._match(TokenType.EQ):
1269            return self.expression(
1270                exp.MergeBlockRatioProperty,
1271                this=self._parse_number(),
1272                percent=self._match(TokenType.PERCENT),
1273            )
1274        else:
1275            return self.expression(
1276                exp.MergeBlockRatioProperty,
1277                no=no,
1278                default=default,
1279            )
1280
1281    def _parse_datablocksize(self, default=None) -> exp.Expression:
1282        if default:
1283            self._match_text_seq("DATABLOCKSIZE")
1284            return self.expression(exp.DataBlocksizeProperty, default=True)
1285        elif self._match_texts(("MIN", "MINIMUM")):
1286            self._match_text_seq("DATABLOCKSIZE")
1287            return self.expression(exp.DataBlocksizeProperty, min=True)
1288        elif self._match_texts(("MAX", "MAXIMUM")):
1289            self._match_text_seq("DATABLOCKSIZE")
1290            return self.expression(exp.DataBlocksizeProperty, min=False)
1291
1292        self._match_text_seq("DATABLOCKSIZE")
1293        self._match(TokenType.EQ)
1294        size = self._parse_number()
1295        units = None
1296        if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
1297            units = self._prev.text
1298        return self.expression(exp.DataBlocksizeProperty, size=size, units=units)
1299
1300    def _parse_blockcompression(self) -> exp.Expression:
1301        self._match_text_seq("BLOCKCOMPRESSION")
1302        self._match(TokenType.EQ)
1303        always = self._match_text_seq("ALWAYS")
1304        manual = self._match_text_seq("MANUAL")
1305        never = self._match_text_seq("NEVER")
1306        default = self._match_text_seq("DEFAULT")
1307        autotemp = None
1308        if self._match_text_seq("AUTOTEMP"):
1309            autotemp = self._parse_schema()
1310
1311        return self.expression(
1312            exp.BlockCompressionProperty,
1313            always=always,
1314            manual=manual,
1315            never=never,
1316            default=default,
1317            autotemp=autotemp,
1318        )
1319
1320    def _parse_withisolatedloading(self) -> exp.Expression:
1321        self._match(TokenType.WITH)
1322        no = self._match_text_seq("NO")
1323        concurrent = self._match_text_seq("CONCURRENT")
1324        self._match_text_seq("ISOLATED", "LOADING")
1325        for_all = self._match_text_seq("FOR", "ALL")
1326        for_insert = self._match_text_seq("FOR", "INSERT")
1327        for_none = self._match_text_seq("FOR", "NONE")
1328        return self.expression(
1329            exp.IsolatedLoadingProperty,
1330            no=no,
1331            concurrent=concurrent,
1332            for_all=for_all,
1333            for_insert=for_insert,
1334            for_none=for_none,
1335        )
1336
1337    def _parse_locking(self) -> exp.Expression:
1338        if self._match(TokenType.TABLE):
1339            kind = "TABLE"
1340        elif self._match(TokenType.VIEW):
1341            kind = "VIEW"
1342        elif self._match(TokenType.ROW):
1343            kind = "ROW"
1344        elif self._match_text_seq("DATABASE"):
1345            kind = "DATABASE"
1346        else:
1347            kind = None
1348
1349        if kind in ("DATABASE", "TABLE", "VIEW"):
1350            this = self._parse_table_parts()
1351        else:
1352            this = None
1353
1354        if self._match(TokenType.FOR):
1355            for_or_in = "FOR"
1356        elif self._match(TokenType.IN):
1357            for_or_in = "IN"
1358        else:
1359            for_or_in = None
1360
1361        if self._match_text_seq("ACCESS"):
1362            lock_type = "ACCESS"
1363        elif self._match_texts(("EXCL", "EXCLUSIVE")):
1364            lock_type = "EXCLUSIVE"
1365        elif self._match_text_seq("SHARE"):
1366            lock_type = "SHARE"
1367        elif self._match_text_seq("READ"):
1368            lock_type = "READ"
1369        elif self._match_text_seq("WRITE"):
1370            lock_type = "WRITE"
1371        elif self._match_text_seq("CHECKSUM"):
1372            lock_type = "CHECKSUM"
1373        else:
1374            lock_type = None
1375
1376        override = self._match_text_seq("OVERRIDE")
1377
1378        return self.expression(
1379            exp.LockingProperty,
1380            this=this,
1381            kind=kind,
1382            for_or_in=for_or_in,
1383            lock_type=lock_type,
1384            override=override,
1385        )
1386
1387    def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]:
1388        if self._match(TokenType.PARTITION_BY):
1389            return self._parse_csv(self._parse_conjunction)
1390        return []
1391
1392    def _parse_partitioned_by(self) -> exp.Expression:
1393        self._match(TokenType.EQ)
1394        return self.expression(
1395            exp.PartitionedByProperty,
1396            this=self._parse_schema() or self._parse_bracket(self._parse_field()),
1397        )
1398
1399    def _parse_distkey(self) -> exp.Expression:
1400        return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
1401
1402    def _parse_create_like(self) -> t.Optional[exp.Expression]:
1403        table = self._parse_table(schema=True)
1404        options = []
1405        while self._match_texts(("INCLUDING", "EXCLUDING")):
1406            this = self._prev.text.upper()
1407            id_var = self._parse_id_var()
1408
1409            if not id_var:
1410                return None
1411
1412            options.append(
1413                self.expression(
1414                    exp.Property,
1415                    this=this,
1416                    value=exp.Var(this=id_var.this.upper()),
1417                )
1418            )
1419        return self.expression(exp.LikeProperty, this=table, expressions=options)
1420
1421    def _parse_sortkey(self, compound: bool = False) -> exp.Expression:
1422        return self.expression(
1423            exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound
1424        )
1425
1426    def _parse_character_set(self, default: bool = False) -> exp.Expression:
1427        self._match(TokenType.EQ)
1428        return self.expression(
1429            exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default
1430        )
1431
1432    def _parse_returns(self) -> exp.Expression:
1433        value: t.Optional[exp.Expression]
1434        is_table = self._match(TokenType.TABLE)
1435
1436        if is_table:
1437            if self._match(TokenType.LT):
1438                value = self.expression(
1439                    exp.Schema,
1440                    this="TABLE",
1441                    expressions=self._parse_csv(self._parse_struct_kwargs),
1442                )
1443                if not self._match(TokenType.GT):
1444                    self.raise_error("Expecting >")
1445            else:
1446                value = self._parse_schema(exp.Var(this="TABLE"))
1447        else:
1448            value = self._parse_types()
1449
1450        return self.expression(exp.ReturnsProperty, this=value, is_table=is_table)
1451
1452    def _parse_describe(self) -> exp.Expression:
1453        kind = self._match_set(self.CREATABLES) and self._prev.text
1454        this = self._parse_table()
1455
1456        return self.expression(exp.Describe, this=this, kind=kind)
1457
1458    def _parse_insert(self) -> exp.Expression:
1459        overwrite = self._match(TokenType.OVERWRITE)
1460        local = self._match(TokenType.LOCAL)
1461
1462        this: t.Optional[exp.Expression]
1463
1464        alternative = None
1465        if self._match_text_seq("DIRECTORY"):
1466            this = self.expression(
1467                exp.Directory,
1468                this=self._parse_var_or_string(),
1469                local=local,
1470                row_format=self._parse_row_format(match_row=True),
1471            )
1472        else:
1473            if self._match(TokenType.OR):
1474                alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text
1475
1476            self._match(TokenType.INTO)
1477            self._match(TokenType.TABLE)
1478            this = self._parse_table(schema=True)
1479
1480        return self.expression(
1481            exp.Insert,
1482            this=this,
1483            exists=self._parse_exists(),
1484            partition=self._parse_partition(),
1485            expression=self._parse_ddl_select(),
1486            overwrite=overwrite,
1487            alternative=alternative,
1488        )
1489
1490    def _parse_row(self) -> t.Optional[exp.Expression]:
1491        if not self._match(TokenType.FORMAT):
1492            return None
1493        return self._parse_row_format()
1494
1495    def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]:
1496        if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
1497            return None
1498
1499        if self._match_text_seq("SERDE"):
1500            return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string())
1501
1502        self._match_text_seq("DELIMITED")
1503
1504        kwargs = {}
1505
1506        if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
1507            kwargs["fields"] = self._parse_string()
1508            if self._match_text_seq("ESCAPED", "BY"):
1509                kwargs["escaped"] = self._parse_string()
1510        if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
1511            kwargs["collection_items"] = self._parse_string()
1512        if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
1513            kwargs["map_keys"] = self._parse_string()
1514        if self._match_text_seq("LINES", "TERMINATED", "BY"):
1515            kwargs["lines"] = self._parse_string()
1516        if self._match_text_seq("NULL", "DEFINED", "AS"):
1517            kwargs["null"] = self._parse_string()
1518
1519        return self.expression(exp.RowFormatDelimitedProperty, **kwargs)  # type: ignore
1520
1521    def _parse_load_data(self) -> exp.Expression:
1522        local = self._match(TokenType.LOCAL)
1523        self._match_text_seq("INPATH")
1524        inpath = self._parse_string()
1525        overwrite = self._match(TokenType.OVERWRITE)
1526        self._match_pair(TokenType.INTO, TokenType.TABLE)
1527
1528        return self.expression(
1529            exp.LoadData,
1530            this=self._parse_table(schema=True),
1531            local=local,
1532            overwrite=overwrite,
1533            inpath=inpath,
1534            partition=self._parse_partition(),
1535            input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
1536            serde=self._match_text_seq("SERDE") and self._parse_string(),
1537        )
1538
1539    def _parse_delete(self) -> exp.Expression:
1540        self._match(TokenType.FROM)
1541
1542        return self.expression(
1543            exp.Delete,
1544            this=self._parse_table(schema=True),
1545            using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()),
1546            where=self._parse_where(),
1547        )
1548
1549    def _parse_update(self) -> exp.Expression:
1550        return self.expression(
1551            exp.Update,
1552            **{  # type: ignore
1553                "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS),
1554                "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality),
1555                "from": self._parse_from(),
1556                "where": self._parse_where(),
1557            },
1558        )
1559
1560    def _parse_uncache(self) -> exp.Expression:
1561        if not self._match(TokenType.TABLE):
1562            self.raise_error("Expecting TABLE after UNCACHE")
1563
1564        return self.expression(
1565            exp.Uncache,
1566            exists=self._parse_exists(),
1567            this=self._parse_table(schema=True),
1568        )
1569
1570    def _parse_cache(self) -> exp.Expression:
1571        lazy = self._match(TokenType.LAZY)
1572        self._match(TokenType.TABLE)
1573        table = self._parse_table(schema=True)
1574        options = []
1575
1576        if self._match(TokenType.OPTIONS):
1577            self._match_l_paren()
1578            k = self._parse_string()
1579            self._match(TokenType.EQ)
1580            v = self._parse_string()
1581            options = [k, v]
1582            self._match_r_paren()
1583
1584        self._match(TokenType.ALIAS)
1585        return self.expression(
1586            exp.Cache,
1587            this=table,
1588            lazy=lazy,
1589            options=options,
1590            expression=self._parse_select(nested=True),
1591        )
1592
1593    def _parse_partition(self) -> t.Optional[exp.Expression]:
1594        if not self._match(TokenType.PARTITION):
1595            return None
1596
1597        return self.expression(
1598            exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction)
1599        )
1600
1601    def _parse_value(self) -> exp.Expression:
1602        if self._match(TokenType.L_PAREN):
1603            expressions = self._parse_csv(self._parse_conjunction)
1604            self._match_r_paren()
1605            return self.expression(exp.Tuple, expressions=expressions)
1606
1607        # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows.
1608        # Source: https://prestodb.io/docs/current/sql/values.html
1609        return self.expression(exp.Tuple, expressions=[self._parse_conjunction()])
1610
1611    def _parse_select(
1612        self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True
1613    ) -> t.Optional[exp.Expression]:
1614        cte = self._parse_with()
1615        if cte:
1616            this = self._parse_statement()
1617
1618            if not this:
1619                self.raise_error("Failed to parse any statement following CTE")
1620                return cte
1621
1622            if "with" in this.arg_types:
1623                this.set("with", cte)
1624            else:
1625                self.raise_error(f"{this.key} does not support CTE")
1626                this = cte
1627        elif self._match(TokenType.SELECT):
1628            comments = self._prev_comments
1629
1630            hint = self._parse_hint()
1631            all_ = self._match(TokenType.ALL)
1632            distinct = self._match(TokenType.DISTINCT)
1633
1634            if distinct:
1635                distinct = self.expression(
1636                    exp.Distinct,
1637                    on=self._parse_value() if self._match(TokenType.ON) else None,
1638                )
1639
1640            if all_ and distinct:
1641                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
1642
1643            limit = self._parse_limit(top=True)
1644            expressions = self._parse_csv(self._parse_expression)
1645
1646            this = self.expression(
1647                exp.Select,
1648                hint=hint,
1649                distinct=distinct,
1650                expressions=expressions,
1651                limit=limit,
1652            )
1653            this.comments = comments
1654
1655            into = self._parse_into()
1656            if into:
1657                this.set("into", into)
1658
1659            from_ = self._parse_from()
1660            if from_:
1661                this.set("from", from_)
1662
1663            self._parse_query_modifiers(this)
1664        elif (table or nested) and self._match(TokenType.L_PAREN):
1665            this = self._parse_table() if table else self._parse_select(nested=True)
1666            self._parse_query_modifiers(this)
1667            this = self._parse_set_operations(this)
1668            self._match_r_paren()
1669
1670            # early return so that subquery unions aren't parsed again
1671            # SELECT * FROM (SELECT 1) UNION ALL SELECT 1
1672            # Union ALL should be a property of the top select node, not the subquery
1673            return self._parse_subquery(this, parse_alias=parse_subquery_alias)
1674        elif self._match(TokenType.VALUES):
1675            this = self.expression(
1676                exp.Values,
1677                expressions=self._parse_csv(self._parse_value),
1678                alias=self._parse_table_alias(),
1679            )
1680        else:
1681            this = None
1682
1683        return self._parse_set_operations(this)
1684
1685    def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]:
1686        if not skip_with_token and not self._match(TokenType.WITH):
1687            return None
1688
1689        recursive = self._match(TokenType.RECURSIVE)
1690
1691        expressions = []
1692        while True:
1693            expressions.append(self._parse_cte())
1694
1695            if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
1696                break
1697            else:
1698                self._match(TokenType.WITH)
1699
1700        return self.expression(exp.With, expressions=expressions, recursive=recursive)
1701
1702    def _parse_cte(self) -> exp.Expression:
1703        alias = self._parse_table_alias()
1704        if not alias or not alias.this:
1705            self.raise_error("Expected CTE to have alias")
1706
1707        self._match(TokenType.ALIAS)
1708
1709        return self.expression(
1710            exp.CTE,
1711            this=self._parse_wrapped(self._parse_statement),
1712            alias=alias,
1713        )
1714
1715    def _parse_table_alias(
1716        self, alias_tokens: t.Optional[t.Collection[TokenType]] = None
1717    ) -> t.Optional[exp.Expression]:
1718        any_token = self._match(TokenType.ALIAS)
1719        alias = self._parse_id_var(
1720            any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS
1721        )
1722        index = self._index
1723
1724        if self._match(TokenType.L_PAREN):
1725            columns = self._parse_csv(self._parse_function_parameter)
1726            self._match_r_paren() if columns else self._retreat(index)
1727        else:
1728            columns = None
1729
1730        if not alias and not columns:
1731            return None
1732
1733        return self.expression(exp.TableAlias, this=alias, columns=columns)
1734
1735    def _parse_subquery(
1736        self, this: t.Optional[exp.Expression], parse_alias: bool = True
1737    ) -> exp.Expression:
1738        return self.expression(
1739            exp.Subquery,
1740            this=this,
1741            pivots=self._parse_pivots(),
1742            alias=self._parse_table_alias() if parse_alias else None,
1743        )
1744
1745    def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None:
1746        if not isinstance(this, self.MODIFIABLES):
1747            return
1748
1749        table = isinstance(this, exp.Table)
1750
1751        while True:
1752            lateral = self._parse_lateral()
1753            join = self._parse_join()
1754            comma = None if table else self._match(TokenType.COMMA)
1755            if lateral:
1756                this.append("laterals", lateral)
1757            if join:
1758                this.append("joins", join)
1759            if comma:
1760                this.args["from"].append("expressions", self._parse_table())
1761            if not (lateral or join or comma):
1762                break
1763
1764        for key, parser in self.QUERY_MODIFIER_PARSERS.items():
1765            expression = parser(self)
1766
1767            if expression:
1768                this.set(key, expression)
1769
1770    def _parse_hint(self) -> t.Optional[exp.Expression]:
1771        if self._match(TokenType.HINT):
1772            hints = self._parse_csv(self._parse_function)
1773            if not self._match_pair(TokenType.STAR, TokenType.SLASH):
1774                self.raise_error("Expected */ after HINT")
1775            return self.expression(exp.Hint, expressions=hints)
1776
1777        return None
1778
1779    def _parse_into(self) -> t.Optional[exp.Expression]:
1780        if not self._match(TokenType.INTO):
1781            return None
1782
1783        temp = self._match(TokenType.TEMPORARY)
1784        unlogged = self._match(TokenType.UNLOGGED)
1785        self._match(TokenType.TABLE)
1786
1787        return self.expression(
1788            exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged
1789        )
1790
1791    def _parse_from(self) -> t.Optional[exp.Expression]:
1792        if not self._match(TokenType.FROM):
1793            return None
1794
1795        return self.expression(
1796            exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table)
1797        )
1798
1799    def _parse_match_recognize(self) -> t.Optional[exp.Expression]:
1800        if not self._match(TokenType.MATCH_RECOGNIZE):
1801            return None
1802        self._match_l_paren()
1803
1804        partition = self._parse_partition_by()
1805        order = self._parse_order()
1806        measures = (
1807            self._parse_alias(self._parse_conjunction())
1808            if self._match_text_seq("MEASURES")
1809            else None
1810        )
1811
1812        if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
1813            rows = exp.Var(this="ONE ROW PER MATCH")
1814        elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
1815            text = "ALL ROWS PER MATCH"
1816            if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
1817                text += f" SHOW EMPTY MATCHES"
1818            elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
1819                text += f" OMIT EMPTY MATCHES"
1820            elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
1821                text += f" WITH UNMATCHED ROWS"
1822            rows = exp.Var(this=text)
1823        else:
1824            rows = None
1825
1826        if self._match_text_seq("AFTER", "MATCH", "SKIP"):
1827            text = "AFTER MATCH SKIP"
1828            if self._match_text_seq("PAST", "LAST", "ROW"):
1829                text += f" PAST LAST ROW"
1830            elif self._match_text_seq("TO", "NEXT", "ROW"):
1831                text += f" TO NEXT ROW"
1832            elif self._match_text_seq("TO", "FIRST"):
1833                text += f" TO FIRST {self._advance_any().text}"  # type: ignore
1834            elif self._match_text_seq("TO", "LAST"):
1835                text += f" TO LAST {self._advance_any().text}"  # type: ignore
1836            after = exp.Var(this=text)
1837        else:
1838            after = None
1839
1840        if self._match_text_seq("PATTERN"):
1841            self._match_l_paren()
1842
1843            if not self._curr:
1844                self.raise_error("Expecting )", self._curr)
1845
1846            paren = 1
1847            start = self._curr
1848
1849            while self._curr and paren > 0:
1850                if self._curr.token_type == TokenType.L_PAREN:
1851                    paren += 1
1852                if self._curr.token_type == TokenType.R_PAREN:
1853                    paren -= 1
1854                end = self._prev
1855                self._advance()
1856            if paren > 0:
1857                self.raise_error("Expecting )", self._curr)
1858            pattern = exp.Var(this=self._find_sql(start, end))
1859        else:
1860            pattern = None
1861
1862        define = (
1863            self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None
1864        )
1865        self._match_r_paren()
1866
1867        return self.expression(
1868            exp.MatchRecognize,
1869            partition_by=partition,
1870            order=order,
1871            measures=measures,
1872            rows=rows,
1873            after=after,
1874            pattern=pattern,
1875            define=define,
1876        )
1877
1878    def _parse_lateral(self) -> t.Optional[exp.Expression]:
1879        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY)
1880        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
1881
1882        if outer_apply or cross_apply:
1883            this = self._parse_select(table=True)
1884            view = None
1885            outer = not cross_apply
1886        elif self._match(TokenType.LATERAL):
1887            this = self._parse_select(table=True)
1888            view = self._match(TokenType.VIEW)
1889            outer = self._match(TokenType.OUTER)
1890        else:
1891            return None
1892
1893        if not this:
1894            this = self._parse_function() or self._parse_id_var(any_token=False)
1895            while self._match(TokenType.DOT):
1896                this = exp.Dot(
1897                    this=this,
1898                    expression=self._parse_function() or self._parse_id_var(any_token=False),
1899                )
1900
1901        table_alias: t.Optional[exp.Expression]
1902
1903        if view:
1904            table = self._parse_id_var(any_token=False)
1905            columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
1906            table_alias = self.expression(exp.TableAlias, this=table, columns=columns)
1907        else:
1908            table_alias = self._parse_table_alias()
1909
1910        expression = self.expression(
1911            exp.Lateral,
1912            this=this,
1913            view=view,
1914            outer=outer,
1915            alias=table_alias,
1916        )
1917
1918        if outer_apply or cross_apply:
1919            return self.expression(exp.Join, this=expression, side=None if cross_apply else "LEFT")
1920
1921        return expression
1922
1923    def _parse_join_side_and_kind(
1924        self,
1925    ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
1926        return (
1927            self._match(TokenType.NATURAL) and self._prev,
1928            self._match_set(self.JOIN_SIDES) and self._prev,
1929            self._match_set(self.JOIN_KINDS) and self._prev,
1930        )
1931
1932    def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]:
1933        natural, side, kind = self._parse_join_side_and_kind()
1934
1935        if not skip_join_token and not self._match(TokenType.JOIN):
1936            return None
1937
1938        kwargs: t.Dict[
1939            str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]]
1940        ] = {"this": self._parse_table()}
1941
1942        if natural:
1943            kwargs["natural"] = True
1944        if side:
1945            kwargs["side"] = side.text
1946        if kind:
1947            kwargs["kind"] = kind.text
1948
1949        if self._match(TokenType.ON):
1950            kwargs["on"] = self._parse_conjunction()
1951        elif self._match(TokenType.USING):
1952            kwargs["using"] = self._parse_wrapped_id_vars()
1953
1954        return self.expression(exp.Join, **kwargs)  # type: ignore
1955
1956    def _parse_index(self) -> exp.Expression:
1957        index = self._parse_id_var()
1958        self._match(TokenType.ON)
1959        self._match(TokenType.TABLE)  # hive
1960
1961        return self.expression(
1962            exp.Index,
1963            this=index,
1964            table=self.expression(exp.Table, this=self._parse_id_var()),
1965            columns=self._parse_expression(),
1966        )
1967
1968    def _parse_create_table_index(self) -> t.Optional[exp.Expression]:
1969        unique = self._match(TokenType.UNIQUE)
1970        primary = self._match_text_seq("PRIMARY")
1971        amp = self._match_text_seq("AMP")
1972        if not self._match(TokenType.INDEX):
1973            return None
1974        index = self._parse_id_var()
1975        columns = None
1976        if self._match(TokenType.L_PAREN, advance=False):
1977            columns = self._parse_wrapped_csv(self._parse_column)
1978        return self.expression(
1979            exp.Index,
1980            this=index,
1981            columns=columns,
1982            unique=unique,
1983            primary=primary,
1984            amp=amp,
1985        )
1986
1987    def _parse_table_parts(self, schema: bool = False) -> exp.Expression:
1988        catalog = None
1989        db = None
1990        table = (not schema and self._parse_function()) or self._parse_id_var(any_token=False)
1991
1992        while self._match(TokenType.DOT):
1993            if catalog:
1994                # This allows nesting the table in arbitrarily many dot expressions if needed
1995                table = self.expression(exp.Dot, this=table, expression=self._parse_id_var())
1996            else:
1997                catalog = db
1998                db = table
1999                table = self._parse_id_var()
2000
2001        if not table:
2002            self.raise_error(f"Expected table name but got {self._curr}")
2003
2004        return self.expression(
2005            exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots()
2006        )
2007
2008    def _parse_table(
2009        self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
2010    ) -> t.Optional[exp.Expression]:
2011        lateral = self._parse_lateral()
2012
2013        if lateral:
2014            return lateral
2015
2016        unnest = self._parse_unnest()
2017
2018        if unnest:
2019            return unnest
2020
2021        values = self._parse_derived_table_values()
2022
2023        if values:
2024            return values
2025
2026        subquery = self._parse_select(table=True)
2027
2028        if subquery:
2029            return subquery
2030
2031        this = self._parse_table_parts(schema=schema)
2032
2033        if schema:
2034            return self._parse_schema(this=this)
2035
2036        if self.alias_post_tablesample:
2037            table_sample = self._parse_table_sample()
2038
2039        alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
2040
2041        if alias:
2042            this.set("alias", alias)
2043
2044        if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
2045            this.set(
2046                "hints",
2047                self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)),
2048            )
2049            self._match_r_paren()
2050
2051        if not self.alias_post_tablesample:
2052            table_sample = self._parse_table_sample()
2053
2054        if table_sample:
2055            table_sample.set("this", this)
2056            this = table_sample
2057
2058        return this
2059
2060    def _parse_unnest(self) -> t.Optional[exp.Expression]:
2061        if not self._match(TokenType.UNNEST):
2062            return None
2063
2064        expressions = self._parse_wrapped_csv(self._parse_column)
2065        ordinality = bool(self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY))
2066        alias = self._parse_table_alias()
2067
2068        if alias and self.unnest_column_only:
2069            if alias.args.get("columns"):
2070                self.raise_error("Unexpected extra column alias in unnest.")
2071            alias.set("columns", [alias.this])
2072            alias.set("this", None)
2073
2074        offset = None
2075        if self._match_pair(TokenType.WITH, TokenType.OFFSET):
2076            self._match(TokenType.ALIAS)
2077            offset = self._parse_conjunction()
2078
2079        return self.expression(
2080            exp.Unnest,
2081            expressions=expressions,
2082            ordinality=ordinality,
2083            alias=alias,
2084            offset=offset,
2085        )
2086
2087    def _parse_derived_table_values(self) -> t.Optional[exp.Expression]:
2088        is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES)
2089        if not is_derived and not self._match(TokenType.VALUES):
2090            return None
2091
2092        expressions = self._parse_csv(self._parse_value)
2093
2094        if is_derived:
2095            self._match_r_paren()
2096
2097        return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias())
2098
2099    def _parse_table_sample(self) -> t.Optional[exp.Expression]:
2100        if not self._match(TokenType.TABLE_SAMPLE):
2101            return None
2102
2103        method = self._parse_var()
2104        bucket_numerator = None
2105        bucket_denominator = None
2106        bucket_field = None
2107        percent = None
2108        rows = None
2109        size = None
2110        seed = None
2111
2112        self._match_l_paren()
2113
2114        if self._match(TokenType.BUCKET):
2115            bucket_numerator = self._parse_number()
2116            self._match(TokenType.OUT_OF)
2117            bucket_denominator = bucket_denominator = self._parse_number()
2118            self._match(TokenType.ON)
2119            bucket_field = self._parse_field()
2120        else:
2121            num = self._parse_number()
2122
2123            if self._match(TokenType.PERCENT):
2124                percent = num
2125            elif self._match(TokenType.ROWS):
2126                rows = num
2127            else:
2128                size = num
2129
2130        self._match_r_paren()
2131
2132        if self._match(TokenType.SEED):
2133            seed = self._parse_wrapped(self._parse_number)
2134
2135        return self.expression(
2136            exp.TableSample,
2137            method=method,
2138            bucket_numerator=bucket_numerator,
2139            bucket_denominator=bucket_denominator,
2140            bucket_field=bucket_field,
2141            percent=percent,
2142            rows=rows,
2143            size=size,
2144            seed=seed,
2145        )
2146
2147    def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]:
2148        return list(iter(self._parse_pivot, None))
2149
2150    def _parse_pivot(self) -> t.Optional[exp.Expression]:
2151        index = self._index
2152
2153        if self._match(TokenType.PIVOT):
2154            unpivot = False
2155        elif self._match(TokenType.UNPIVOT):
2156            unpivot = True
2157        else:
2158            return None
2159
2160        expressions = []
2161        field = None
2162
2163        if not self._match(TokenType.L_PAREN):
2164            self._retreat(index)
2165            return None
2166
2167        if unpivot:
2168            expressions = self._parse_csv(self._parse_column)
2169        else:
2170            expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function()))
2171
2172        if not self._match(TokenType.FOR):
2173            self.raise_error("Expecting FOR")
2174
2175        value = self._parse_column()
2176
2177        if not self._match(TokenType.IN):
2178            self.raise_error("Expecting IN")
2179
2180        field = self._parse_in(value)
2181
2182        self._match_r_paren()
2183
2184        return self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot)
2185
2186    def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]:
2187        if not skip_where_token and not self._match(TokenType.WHERE):
2188            return None
2189
2190        return self.expression(
2191            exp.Where, comments=self._prev_comments, this=self._parse_conjunction()
2192        )
2193
2194    def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]:
2195        if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
2196            return None
2197
2198        elements = defaultdict(list)
2199
2200        while True:
2201            expressions = self._parse_csv(self._parse_conjunction)
2202            if expressions:
2203                elements["expressions"].extend(expressions)
2204
2205            grouping_sets = self._parse_grouping_sets()
2206            if grouping_sets:
2207                elements["grouping_sets"].extend(grouping_sets)
2208
2209            rollup = None
2210            cube = None
2211
2212            with_ = self._match(TokenType.WITH)
2213            if self._match(TokenType.ROLLUP):
2214                rollup = with_ or self._parse_wrapped_csv(self._parse_column)
2215                elements["rollup"].extend(ensure_list(rollup))
2216
2217            if self._match(TokenType.CUBE):
2218                cube = with_ or self._parse_wrapped_csv(self._parse_column)
2219                elements["cube"].extend(ensure_list(cube))
2220
2221            if not (expressions or grouping_sets or rollup or cube):
2222                break
2223
2224        return self.expression(exp.Group, **elements)  # type: ignore
2225
2226    def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
2227        if not self._match(TokenType.GROUPING_SETS):
2228            return None
2229
2230        return self._parse_wrapped_csv(self._parse_grouping_set)
2231
2232    def _parse_grouping_set(self) -> t.Optional[exp.Expression]:
2233        if self._match(TokenType.L_PAREN):
2234            grouping_set = self._parse_csv(self._parse_column)
2235            self._match_r_paren()
2236            return self.expression(exp.Tuple, expressions=grouping_set)
2237
2238        return self._parse_column()
2239
2240    def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]:
2241        if not skip_having_token and not self._match(TokenType.HAVING):
2242            return None
2243        return self.expression(exp.Having, this=self._parse_conjunction())
2244
2245    def _parse_qualify(self) -> t.Optional[exp.Expression]:
2246        if not self._match(TokenType.QUALIFY):
2247            return None
2248        return self.expression(exp.Qualify, this=self._parse_conjunction())
2249
2250    def _parse_order(
2251        self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
2252    ) -> t.Optional[exp.Expression]:
2253        if not skip_order_token and not self._match(TokenType.ORDER_BY):
2254            return this
2255
2256        return self.expression(
2257            exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
2258        )
2259
2260    def _parse_sort(
2261        self, token_type: TokenType, exp_class: t.Type[exp.Expression]
2262    ) -> t.Optional[exp.Expression]:
2263        if not self._match(token_type):
2264            return None
2265        return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered))
2266
2267    def _parse_ordered(self) -> exp.Expression:
2268        this = self._parse_conjunction()
2269        self._match(TokenType.ASC)
2270        is_desc = self._match(TokenType.DESC)
2271        is_nulls_first = self._match(TokenType.NULLS_FIRST)
2272        is_nulls_last = self._match(TokenType.NULLS_LAST)
2273        desc = is_desc or False
2274        asc = not desc
2275        nulls_first = is_nulls_first or False
2276        explicitly_null_ordered = is_nulls_first or is_nulls_last
2277        if (
2278            not explicitly_null_ordered
2279            and (
2280                (asc and self.null_ordering == "nulls_are_small")
2281                or (desc and self.null_ordering != "nulls_are_small")
2282            )
2283            and self.null_ordering != "nulls_are_last"
2284        ):
2285            nulls_first = True
2286
2287        return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first)
2288
2289    def _parse_limit(
2290        self, this: t.Optional[exp.Expression] = None, top: bool = False
2291    ) -> t.Optional[exp.Expression]:
2292        if self._match(TokenType.TOP if top else TokenType.LIMIT):
2293            limit_paren = self._match(TokenType.L_PAREN)
2294            limit_exp = self.expression(
2295                exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term()
2296            )
2297
2298            if limit_paren:
2299                self._match_r_paren()
2300
2301            return limit_exp
2302
2303        if self._match(TokenType.FETCH):
2304            direction = self._match_set((TokenType.FIRST, TokenType.NEXT))
2305            direction = self._prev.text if direction else "FIRST"
2306            count = self._parse_number()
2307            self._match_set((TokenType.ROW, TokenType.ROWS))
2308            self._match(TokenType.ONLY)
2309            return self.expression(exp.Fetch, direction=direction, count=count)
2310
2311        return this
2312
2313    def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2314        if not self._match_set((TokenType.OFFSET, TokenType.COMMA)):
2315            return this
2316
2317        count = self._parse_number()
2318        self._match_set((TokenType.ROW, TokenType.ROWS))
2319        return self.expression(exp.Offset, this=this, expression=count)
2320
2321    def _parse_lock(self) -> t.Optional[exp.Expression]:
2322        if self._match_text_seq("FOR", "UPDATE"):
2323            return self.expression(exp.Lock, update=True)
2324        if self._match_text_seq("FOR", "SHARE"):
2325            return self.expression(exp.Lock, update=False)
2326
2327        return None
2328
2329    def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2330        if not self._match_set(self.SET_OPERATIONS):
2331            return this
2332
2333        token_type = self._prev.token_type
2334
2335        if token_type == TokenType.UNION:
2336            expression = exp.Union
2337        elif token_type == TokenType.EXCEPT:
2338            expression = exp.Except
2339        else:
2340            expression = exp.Intersect
2341
2342        return self.expression(
2343            expression,
2344            this=this,
2345            distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
2346            expression=self._parse_set_operations(self._parse_select(nested=True)),
2347        )
2348
2349    def _parse_expression(self) -> t.Optional[exp.Expression]:
2350        return self._parse_alias(self._parse_conjunction())
2351
2352    def _parse_conjunction(self) -> t.Optional[exp.Expression]:
2353        return self._parse_tokens(self._parse_equality, self.CONJUNCTION)
2354
2355    def _parse_equality(self) -> t.Optional[exp.Expression]:
2356        return self._parse_tokens(self._parse_comparison, self.EQUALITY)
2357
2358    def _parse_comparison(self) -> t.Optional[exp.Expression]:
2359        return self._parse_tokens(self._parse_range, self.COMPARISON)
2360
2361    def _parse_range(self) -> t.Optional[exp.Expression]:
2362        this = self._parse_bitwise()
2363        negate = self._match(TokenType.NOT)
2364
2365        if self._match_set(self.RANGE_PARSERS):
2366            this = self.RANGE_PARSERS[self._prev.token_type](self, this)
2367        elif self._match(TokenType.ISNULL):
2368            this = self.expression(exp.Is, this=this, expression=exp.Null())
2369
2370        # Postgres supports ISNULL and NOTNULL for conditions.
2371        # https://blog.andreiavram.ro/postgresql-null-composite-type/
2372        if self._match(TokenType.NOTNULL):
2373            this = self.expression(exp.Is, this=this, expression=exp.Null())
2374            this = self.expression(exp.Not, this=this)
2375
2376        if negate:
2377            this = self.expression(exp.Not, this=this)
2378
2379        if self._match(TokenType.IS):
2380            this = self._parse_is(this)
2381
2382        return this
2383
2384    def _parse_is(self, this: t.Optional[exp.Expression]) -> exp.Expression:
2385        negate = self._match(TokenType.NOT)
2386        if self._match(TokenType.DISTINCT_FROM):
2387            klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ
2388            return self.expression(klass, this=this, expression=self._parse_expression())
2389
2390        this = self.expression(
2391            exp.Is,
2392            this=this,
2393            expression=self._parse_null() or self._parse_boolean(),
2394        )
2395        return self.expression(exp.Not, this=this) if negate else this
2396
2397    def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression:
2398        unnest = self._parse_unnest()
2399        if unnest:
2400            this = self.expression(exp.In, this=this, unnest=unnest)
2401        elif self._match(TokenType.L_PAREN):
2402            expressions = self._parse_csv(self._parse_select_or_expression)
2403
2404            if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable):
2405                this = self.expression(exp.In, this=this, query=expressions[0])
2406            else:
2407                this = self.expression(exp.In, this=this, expressions=expressions)
2408
2409            self._match_r_paren()
2410        else:
2411            this = self.expression(exp.In, this=this, field=self._parse_field())
2412
2413        return this
2414
2415    def _parse_between(self, this: exp.Expression) -> exp.Expression:
2416        low = self._parse_bitwise()
2417        self._match(TokenType.AND)
2418        high = self._parse_bitwise()
2419        return self.expression(exp.Between, this=this, low=low, high=high)
2420
2421    def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2422        if not self._match(TokenType.ESCAPE):
2423            return this
2424        return self.expression(exp.Escape, this=this, expression=self._parse_string())
2425
2426    def _parse_bitwise(self) -> t.Optional[exp.Expression]:
2427        this = self._parse_term()
2428
2429        while True:
2430            if self._match_set(self.BITWISE):
2431                this = self.expression(
2432                    self.BITWISE[self._prev.token_type],
2433                    this=this,
2434                    expression=self._parse_term(),
2435                )
2436            elif self._match_pair(TokenType.LT, TokenType.LT):
2437                this = self.expression(
2438                    exp.BitwiseLeftShift, this=this, expression=self._parse_term()
2439                )
2440            elif self._match_pair(TokenType.GT, TokenType.GT):
2441                this = self.expression(
2442                    exp.BitwiseRightShift, this=this, expression=self._parse_term()
2443                )
2444            else:
2445                break
2446
2447        return this
2448
2449    def _parse_term(self) -> t.Optional[exp.Expression]:
2450        return self._parse_tokens(self._parse_factor, self.TERM)
2451
2452    def _parse_factor(self) -> t.Optional[exp.Expression]:
2453        return self._parse_tokens(self._parse_unary, self.FACTOR)
2454
2455    def _parse_unary(self) -> t.Optional[exp.Expression]:
2456        if self._match_set(self.UNARY_PARSERS):
2457            return self.UNARY_PARSERS[self._prev.token_type](self)
2458        return self._parse_at_time_zone(self._parse_type())
2459
2460    def _parse_type(self) -> t.Optional[exp.Expression]:
2461        if self._match(TokenType.INTERVAL):
2462            return self.expression(exp.Interval, this=self._parse_term(), unit=self._parse_var())
2463
2464        index = self._index
2465        type_token = self._parse_types(check_func=True)
2466        this = self._parse_column()
2467
2468        if type_token:
2469            if this and not isinstance(this, exp.Star):
2470                return self.expression(exp.Cast, this=this, to=type_token)
2471            if not type_token.args.get("expressions"):
2472                self._retreat(index)
2473                return self._parse_column()
2474            return type_token
2475
2476        return this
2477
2478    def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]:
2479        index = self._index
2480
2481        prefix = self._match_text_seq("SYSUDTLIB", ".")
2482
2483        if not self._match_set(self.TYPE_TOKENS):
2484            return None
2485
2486        type_token = self._prev.token_type
2487
2488        if type_token == TokenType.PSEUDO_TYPE:
2489            return self.expression(exp.PseudoType, this=self._prev.text)
2490
2491        nested = type_token in self.NESTED_TYPE_TOKENS
2492        is_struct = type_token == TokenType.STRUCT
2493        expressions = None
2494        maybe_func = False
2495
2496        if self._match(TokenType.L_PAREN):
2497            if is_struct:
2498                expressions = self._parse_csv(self._parse_struct_kwargs)
2499            elif nested:
2500                expressions = self._parse_csv(self._parse_types)
2501            else:
2502                expressions = self._parse_csv(self._parse_conjunction)
2503
2504            if not expressions:
2505                self._retreat(index)
2506                return None
2507
2508            self._match_r_paren()
2509            maybe_func = True
2510
2511        if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2512            this = exp.DataType(
2513                this=exp.DataType.Type.ARRAY,
2514                expressions=[exp.DataType.build(type_token.value, expressions=expressions)],
2515                nested=True,
2516            )
2517
2518            while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2519                this = exp.DataType(
2520                    this=exp.DataType.Type.ARRAY,
2521                    expressions=[this],
2522                    nested=True,
2523                )
2524
2525            return this
2526
2527        if self._match(TokenType.L_BRACKET):
2528            self._retreat(index)
2529            return None
2530
2531        values: t.Optional[t.List[t.Optional[exp.Expression]]] = None
2532        if nested and self._match(TokenType.LT):
2533            if is_struct:
2534                expressions = self._parse_csv(self._parse_struct_kwargs)
2535            else:
2536                expressions = self._parse_csv(self._parse_types)
2537
2538            if not self._match(TokenType.GT):
2539                self.raise_error("Expecting >")
2540
2541            if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)):
2542                values = self._parse_csv(self._parse_conjunction)
2543                self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN))
2544
2545        value: t.Optional[exp.Expression] = None
2546        if type_token in self.TIMESTAMPS:
2547            if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ:
2548                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions)
2549            elif (
2550                self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ
2551            ):
2552                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions)
2553            elif self._match(TokenType.WITHOUT_TIME_ZONE):
2554                if type_token == TokenType.TIME:
2555                    value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions)
2556                else:
2557                    value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2558
2559            maybe_func = maybe_func and value is None
2560
2561            if value is None:
2562                value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2563        elif type_token == TokenType.INTERVAL:
2564            value = self.expression(exp.Interval, unit=self._parse_var())
2565
2566        if maybe_func and check_func:
2567            index2 = self._index
2568            peek = self._parse_string()
2569
2570            if not peek:
2571                self._retreat(index)
2572                return None
2573
2574            self._retreat(index2)
2575
2576        if value:
2577            return value
2578
2579        return exp.DataType(
2580            this=exp.DataType.Type[type_token.value.upper()],
2581            expressions=expressions,
2582            nested=nested,
2583            values=values,
2584            prefix=prefix,
2585        )
2586
2587    def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]:
2588        if self._curr and self._curr.token_type in self.TYPE_TOKENS:
2589            return self._parse_types()
2590
2591        this = self._parse_id_var()
2592        self._match(TokenType.COLON)
2593        data_type = self._parse_types()
2594
2595        if not data_type:
2596            return None
2597        return self.expression(exp.StructKwarg, this=this, expression=data_type)
2598
2599    def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2600        if not self._match(TokenType.AT_TIME_ZONE):
2601            return this
2602        return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary())
2603
2604    def _parse_column(self) -> t.Optional[exp.Expression]:
2605        this = self._parse_field()
2606        if isinstance(this, exp.Identifier):
2607            this = self.expression(exp.Column, this=this)
2608        elif not this:
2609            return self._parse_bracket(this)
2610        this = self._parse_bracket(this)
2611
2612        while self._match_set(self.COLUMN_OPERATORS):
2613            op_token = self._prev.token_type
2614            op = self.COLUMN_OPERATORS.get(op_token)
2615
2616            if op_token == TokenType.DCOLON:
2617                field = self._parse_types()
2618                if not field:
2619                    self.raise_error("Expected type")
2620            elif op:
2621                self._advance()
2622                value = self._prev.text
2623                field = (
2624                    exp.Literal.number(value)
2625                    if self._prev.token_type == TokenType.NUMBER
2626                    else exp.Literal.string(value)
2627                )
2628            else:
2629                field = self._parse_star() or self._parse_function() or self._parse_id_var()
2630
2631            if isinstance(field, exp.Func):
2632                # bigquery allows function calls like x.y.count(...)
2633                # SAFE.SUBSTR(...)
2634                # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
2635                this = self._replace_columns_with_dots(this)
2636
2637            if op:
2638                this = op(self, this, field)
2639            elif isinstance(this, exp.Column) and not this.args.get("catalog"):
2640                this = self.expression(
2641                    exp.Column,
2642                    this=field,
2643                    table=this.this,
2644                    db=this.args.get("table"),
2645                    catalog=this.args.get("db"),
2646                )
2647            else:
2648                this = self.expression(exp.Dot, this=this, expression=field)
2649            this = self._parse_bracket(this)
2650
2651        return this
2652
2653    def _parse_primary(self) -> t.Optional[exp.Expression]:
2654        if self._match_set(self.PRIMARY_PARSERS):
2655            token_type = self._prev.token_type
2656            primary = self.PRIMARY_PARSERS[token_type](self, self._prev)
2657
2658            if token_type == TokenType.STRING:
2659                expressions = [primary]
2660                while self._match(TokenType.STRING):
2661                    expressions.append(exp.Literal.string(self._prev.text))
2662                if len(expressions) > 1:
2663                    return self.expression(exp.Concat, expressions=expressions)
2664            return primary
2665
2666        if self._match_pair(TokenType.DOT, TokenType.NUMBER):
2667            return exp.Literal.number(f"0.{self._prev.text}")
2668
2669        if self._match(TokenType.L_PAREN):
2670            comments = self._prev_comments
2671            query = self._parse_select()
2672
2673            if query:
2674                expressions = [query]
2675            else:
2676                expressions = self._parse_csv(
2677                    lambda: self._parse_alias(self._parse_conjunction(), explicit=True)
2678                )
2679
2680            this = seq_get(expressions, 0)
2681            self._parse_query_modifiers(this)
2682            self._match_r_paren()
2683
2684            if isinstance(this, exp.Subqueryable):
2685                this = self._parse_set_operations(
2686                    self._parse_subquery(this=this, parse_alias=False)
2687                )
2688            elif len(expressions) > 1:
2689                this = self.expression(exp.Tuple, expressions=expressions)
2690            else:
2691                this = self.expression(exp.Paren, this=this)
2692
2693            if this and comments:
2694                this.comments = comments
2695
2696            return this
2697
2698        return None
2699
2700    def _parse_field(self, any_token: bool = False) -> t.Optional[exp.Expression]:
2701        return self._parse_primary() or self._parse_function() or self._parse_id_var(any_token)
2702
2703    def _parse_function(
2704        self, functions: t.Optional[t.Dict[str, t.Callable]] = None
2705    ) -> t.Optional[exp.Expression]:
2706        if not self._curr:
2707            return None
2708
2709        token_type = self._curr.token_type
2710
2711        if self._match_set(self.NO_PAREN_FUNCTION_PARSERS):
2712            return self.NO_PAREN_FUNCTION_PARSERS[token_type](self)
2713
2714        if not self._next or self._next.token_type != TokenType.L_PAREN:
2715            if token_type in self.NO_PAREN_FUNCTIONS:
2716                self._advance()
2717                return self.expression(self.NO_PAREN_FUNCTIONS[token_type])
2718
2719            return None
2720
2721        if token_type not in self.FUNC_TOKENS:
2722            return None
2723
2724        this = self._curr.text
2725        upper = this.upper()
2726        self._advance(2)
2727
2728        parser = self.FUNCTION_PARSERS.get(upper)
2729
2730        if parser:
2731            this = parser(self)
2732        else:
2733            subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
2734
2735            if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH):
2736                this = self.expression(subquery_predicate, this=self._parse_select())
2737                self._match_r_paren()
2738                return this
2739
2740            if functions is None:
2741                functions = self.FUNCTIONS
2742
2743            function = functions.get(upper)
2744            args = self._parse_csv(self._parse_lambda)
2745
2746            if function:
2747                # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the
2748                # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists.
2749                if count_params(function) == 2:
2750                    params = None
2751                    if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN):
2752                        params = self._parse_csv(self._parse_lambda)
2753
2754                    this = function(args, params)
2755                else:
2756                    this = function(args)
2757
2758                self.validate_expression(this, args)
2759            else:
2760                this = self.expression(exp.Anonymous, this=this, expressions=args)
2761
2762        self._match_r_paren(this)
2763        return self._parse_window(this)
2764
2765    def _parse_function_parameter(self) -> t.Optional[exp.Expression]:
2766        return self._parse_column_def(self._parse_id_var())
2767
2768    def _parse_user_defined_function(
2769        self, kind: t.Optional[TokenType] = None
2770    ) -> t.Optional[exp.Expression]:
2771        this = self._parse_id_var()
2772
2773        while self._match(TokenType.DOT):
2774            this = self.expression(exp.Dot, this=this, expression=self._parse_id_var())
2775
2776        if not self._match(TokenType.L_PAREN):
2777            return this
2778
2779        expressions = self._parse_csv(self._parse_function_parameter)
2780        self._match_r_paren()
2781        return self.expression(
2782            exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True
2783        )
2784
2785    def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]:
2786        literal = self._parse_primary()
2787        if literal:
2788            return self.expression(exp.Introducer, this=token.text, expression=literal)
2789
2790        return self.expression(exp.Identifier, this=token.text)
2791
2792    def _parse_national(self, token: Token) -> exp.Expression:
2793        return self.expression(exp.National, this=exp.Literal.string(token.text))
2794
2795    def _parse_session_parameter(self) -> exp.Expression:
2796        kind = None
2797        this = self._parse_id_var() or self._parse_primary()
2798
2799        if this and self._match(TokenType.DOT):
2800            kind = this.name
2801            this = self._parse_var() or self._parse_primary()
2802
2803        return self.expression(exp.SessionParameter, this=this, kind=kind)
2804
2805    def _parse_lambda(self) -> t.Optional[exp.Expression]:
2806        index = self._index
2807
2808        if self._match(TokenType.L_PAREN):
2809            expressions = self._parse_csv(self._parse_id_var)
2810
2811            if not self._match(TokenType.R_PAREN):
2812                self._retreat(index)
2813        else:
2814            expressions = [self._parse_id_var()]
2815
2816        if self._match_set(self.LAMBDAS):
2817            return self.LAMBDAS[self._prev.token_type](self, expressions)
2818
2819        self._retreat(index)
2820
2821        this: t.Optional[exp.Expression]
2822
2823        if self._match(TokenType.DISTINCT):
2824            this = self.expression(
2825                exp.Distinct, expressions=self._parse_csv(self._parse_conjunction)
2826            )
2827        else:
2828            this = self._parse_select_or_expression()
2829
2830        if self._match(TokenType.IGNORE_NULLS):
2831            this = self.expression(exp.IgnoreNulls, this=this)
2832        else:
2833            self._match(TokenType.RESPECT_NULLS)
2834
2835        return self._parse_limit(self._parse_order(this))
2836
2837    def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2838        index = self._index
2839        if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT):
2840            self._retreat(index)
2841            return this
2842
2843        args = self._parse_csv(
2844            lambda: self._parse_constraint()
2845            or self._parse_column_def(self._parse_field(any_token=True))
2846        )
2847        self._match_r_paren()
2848        return self.expression(exp.Schema, this=this, expressions=args)
2849
2850    def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2851        kind = self._parse_types()
2852
2853        if self._match_text_seq("FOR", "ORDINALITY"):
2854            return self.expression(exp.ColumnDef, this=this, ordinality=True)
2855
2856        constraints = []
2857        while True:
2858            constraint = self._parse_column_constraint()
2859            if not constraint:
2860                break
2861            constraints.append(constraint)
2862
2863        if not kind and not constraints:
2864            return this
2865
2866        return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints)
2867
2868    def _parse_auto_increment(self) -> exp.Expression:
2869        start = None
2870        increment = None
2871
2872        if self._match(TokenType.L_PAREN, advance=False):
2873            args = self._parse_wrapped_csv(self._parse_bitwise)
2874            start = seq_get(args, 0)
2875            increment = seq_get(args, 1)
2876        elif self._match_text_seq("START"):
2877            start = self._parse_bitwise()
2878            self._match_text_seq("INCREMENT")
2879            increment = self._parse_bitwise()
2880
2881        if start and increment:
2882            return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment)
2883
2884        return exp.AutoIncrementColumnConstraint()
2885
2886    def _parse_generated_as_identity(self) -> exp.Expression:
2887        if self._match(TokenType.BY_DEFAULT):
2888            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False)
2889        else:
2890            self._match_text_seq("ALWAYS")
2891            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True)
2892
2893        self._match_text_seq("AS", "IDENTITY")
2894        if self._match(TokenType.L_PAREN):
2895            if self._match_text_seq("START", "WITH"):
2896                this.set("start", self._parse_bitwise())
2897            if self._match_text_seq("INCREMENT", "BY"):
2898                this.set("increment", self._parse_bitwise())
2899            if self._match_text_seq("MINVALUE"):
2900                this.set("minvalue", self._parse_bitwise())
2901            if self._match_text_seq("MAXVALUE"):
2902                this.set("maxvalue", self._parse_bitwise())
2903
2904            if self._match_text_seq("CYCLE"):
2905                this.set("cycle", True)
2906            elif self._match_text_seq("NO", "CYCLE"):
2907                this.set("cycle", False)
2908
2909            self._match_r_paren()
2910
2911        return this
2912
2913    def _parse_not_constraint(self) -> t.Optional[exp.Expression]:
2914        if self._match_text_seq("NULL"):
2915            return self.expression(exp.NotNullColumnConstraint)
2916        if self._match_text_seq("CASESPECIFIC"):
2917            return self.expression(exp.CaseSpecificColumnConstraint, not_=True)
2918        return None
2919
2920    def _parse_column_constraint(self) -> t.Optional[exp.Expression]:
2921        this = self._parse_references()
2922        if this:
2923            return this
2924
2925        if self._match(TokenType.CONSTRAINT):
2926            this = self._parse_id_var()
2927
2928        if self._match_texts(self.CONSTRAINT_PARSERS):
2929            return self.expression(
2930                exp.ColumnConstraint,
2931                this=this,
2932                kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self),
2933            )
2934
2935        return this
2936
2937    def _parse_constraint(self) -> t.Optional[exp.Expression]:
2938        if not self._match(TokenType.CONSTRAINT):
2939            return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS)
2940
2941        this = self._parse_id_var()
2942        expressions = []
2943
2944        while True:
2945            constraint = self._parse_unnamed_constraint() or self._parse_function()
2946            if not constraint:
2947                break
2948            expressions.append(constraint)
2949
2950        return self.expression(exp.Constraint, this=this, expressions=expressions)
2951
2952    def _parse_unnamed_constraint(
2953        self, constraints: t.Optional[t.Collection[str]] = None
2954    ) -> t.Optional[exp.Expression]:
2955        if not self._match_texts(constraints or self.CONSTRAINT_PARSERS):
2956            return None
2957
2958        constraint = self._prev.text.upper()
2959        if constraint not in self.CONSTRAINT_PARSERS:
2960            self.raise_error(f"No parser found for schema constraint {constraint}.")
2961
2962        return self.CONSTRAINT_PARSERS[constraint](self)
2963
2964    def _parse_unique(self) -> exp.Expression:
2965        if not self._match(TokenType.L_PAREN, advance=False):
2966            return self.expression(exp.UniqueColumnConstraint)
2967        return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars())
2968
2969    def _parse_key_constraint_options(self) -> t.List[str]:
2970        options = []
2971        while True:
2972            if not self._curr:
2973                break
2974
2975            if self._match(TokenType.ON):
2976                action = None
2977                on = self._advance_any() and self._prev.text
2978
2979                if self._match(TokenType.NO_ACTION):
2980                    action = "NO ACTION"
2981                elif self._match(TokenType.CASCADE):
2982                    action = "CASCADE"
2983                elif self._match_pair(TokenType.SET, TokenType.NULL):
2984                    action = "SET NULL"
2985                elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
2986                    action = "SET DEFAULT"
2987                else:
2988                    self.raise_error("Invalid key constraint")
2989
2990                options.append(f"ON {on} {action}")
2991            elif self._match_text_seq("NOT", "ENFORCED"):
2992                options.append("NOT ENFORCED")
2993            elif self._match_text_seq("DEFERRABLE"):
2994                options.append("DEFERRABLE")
2995            elif self._match_text_seq("INITIALLY", "DEFERRED"):
2996                options.append("INITIALLY DEFERRED")
2997            elif self._match_text_seq("NORELY"):
2998                options.append("NORELY")
2999            elif self._match_text_seq("MATCH", "FULL"):
3000                options.append("MATCH FULL")
3001            else:
3002                break
3003
3004        return options
3005
3006    def _parse_references(self) -> t.Optional[exp.Expression]:
3007        if not self._match(TokenType.REFERENCES):
3008            return None
3009
3010        expressions = None
3011        this = self._parse_id_var()
3012
3013        if self._match(TokenType.L_PAREN, advance=False):
3014            expressions = self._parse_wrapped_id_vars()
3015
3016        options = self._parse_key_constraint_options()
3017        return self.expression(exp.Reference, this=this, expressions=expressions, options=options)
3018
3019    def _parse_foreign_key(self) -> exp.Expression:
3020        expressions = self._parse_wrapped_id_vars()
3021        reference = self._parse_references()
3022        options = {}
3023
3024        while self._match(TokenType.ON):
3025            if not self._match_set((TokenType.DELETE, TokenType.UPDATE)):
3026                self.raise_error("Expected DELETE or UPDATE")
3027
3028            kind = self._prev.text.lower()
3029
3030            if self._match(TokenType.NO_ACTION):
3031                action = "NO ACTION"
3032            elif self._match(TokenType.SET):
3033                self._match_set((TokenType.NULL, TokenType.DEFAULT))
3034                action = "SET " + self._prev.text.upper()
3035            else:
3036                self._advance()
3037                action = self._prev.text.upper()
3038
3039            options[kind] = action
3040
3041        return self.expression(
3042            exp.ForeignKey, expressions=expressions, reference=reference, **options  # type: ignore
3043        )
3044
3045    def _parse_primary_key(self) -> exp.Expression:
3046        desc = (
3047            self._match_set((TokenType.ASC, TokenType.DESC))
3048            and self._prev.token_type == TokenType.DESC
3049        )
3050
3051        if not self._match(TokenType.L_PAREN, advance=False):
3052            return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc)
3053
3054        expressions = self._parse_wrapped_id_vars()
3055        options = self._parse_key_constraint_options()
3056        return self.expression(exp.PrimaryKey, expressions=expressions, options=options)
3057
3058    def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3059        if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)):
3060            return this
3061
3062        bracket_kind = self._prev.token_type
3063        expressions: t.List[t.Optional[exp.Expression]]
3064
3065        if self._match(TokenType.COLON):
3066            expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())]
3067        else:
3068            expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction()))
3069
3070        # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
3071        if bracket_kind == TokenType.L_BRACE:
3072            this = self.expression(exp.Struct, expressions=expressions)
3073        elif not this or this.name.upper() == "ARRAY":
3074            this = self.expression(exp.Array, expressions=expressions)
3075        else:
3076            expressions = apply_index_offset(expressions, -self.index_offset)
3077            this = self.expression(exp.Bracket, this=this, expressions=expressions)
3078
3079        if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET:
3080            self.raise_error("Expected ]")
3081        elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE:
3082            self.raise_error("Expected }")
3083
3084        this.comments = self._prev_comments
3085        return self._parse_bracket(this)
3086
3087    def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3088        if self._match(TokenType.COLON):
3089            return self.expression(exp.Slice, this=this, expression=self._parse_conjunction())
3090        return this
3091
3092    def _parse_case(self) -> t.Optional[exp.Expression]:
3093        ifs = []
3094        default = None
3095
3096        expression = self._parse_conjunction()
3097
3098        while self._match(TokenType.WHEN):
3099            this = self._parse_conjunction()
3100            self._match(TokenType.THEN)
3101            then = self._parse_conjunction()
3102            ifs.append(self.expression(exp.If, this=this, true=then))
3103
3104        if self._match(TokenType.ELSE):
3105            default = self._parse_conjunction()
3106
3107        if not self._match(TokenType.END):
3108            self.raise_error("Expected END after CASE", self._prev)
3109
3110        return self._parse_window(
3111            self.expression(exp.Case, this=expression, ifs=ifs, default=default)
3112        )
3113
3114    def _parse_if(self) -> t.Optional[exp.Expression]:
3115        if self._match(TokenType.L_PAREN):
3116            args = self._parse_csv(self._parse_conjunction)
3117            this = exp.If.from_arg_list(args)
3118            self.validate_expression(this, args)
3119            self._match_r_paren()
3120        else:
3121            condition = self._parse_conjunction()
3122            self._match(TokenType.THEN)
3123            true = self._parse_conjunction()
3124            false = self._parse_conjunction() if self._match(TokenType.ELSE) else None
3125            self._match(TokenType.END)
3126            this = self.expression(exp.If, this=condition, true=true, false=false)
3127
3128        return self._parse_window(this)
3129
3130    def _parse_extract(self) -> exp.Expression:
3131        this = self._parse_function() or self._parse_var() or self._parse_type()
3132
3133        if self._match(TokenType.FROM):
3134            return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3135
3136        if not self._match(TokenType.COMMA):
3137            self.raise_error("Expected FROM or comma after EXTRACT", self._prev)
3138
3139        return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3140
3141    def _parse_cast(self, strict: bool) -> exp.Expression:
3142        this = self._parse_conjunction()
3143
3144        if not self._match(TokenType.ALIAS):
3145            self.raise_error("Expected AS after CAST")
3146
3147        to = self._parse_types()
3148
3149        if not to:
3150            self.raise_error("Expected TYPE after CAST")
3151        elif to.this == exp.DataType.Type.CHAR:
3152            if self._match(TokenType.CHARACTER_SET):
3153                to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())
3154
3155        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3156
3157    def _parse_string_agg(self) -> exp.Expression:
3158        expression: t.Optional[exp.Expression]
3159
3160        if self._match(TokenType.DISTINCT):
3161            args = self._parse_csv(self._parse_conjunction)
3162            expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)])
3163        else:
3164            args = self._parse_csv(self._parse_conjunction)
3165            expression = seq_get(args, 0)
3166
3167        index = self._index
3168        if not self._match(TokenType.R_PAREN):
3169            # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]])
3170            order = self._parse_order(this=expression)
3171            return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3172
3173        # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]).
3174        # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that
3175        # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them.
3176        if not self._match(TokenType.WITHIN_GROUP):
3177            self._retreat(index)
3178            this = exp.GroupConcat.from_arg_list(args)
3179            self.validate_expression(this, args)
3180            return this
3181
3182        self._match_l_paren()  # The corresponding match_r_paren will be called in parse_function (caller)
3183        order = self._parse_order(this=expression)
3184        return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3185
3186    def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]:
3187        to: t.Optional[exp.Expression]
3188        this = self._parse_column()
3189
3190        if self._match(TokenType.USING):
3191            to = self.expression(exp.CharacterSet, this=self._parse_var())
3192        elif self._match(TokenType.COMMA):
3193            to = self._parse_types()
3194        else:
3195            to = None
3196
3197        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3198
3199    def _parse_position(self, haystack_first: bool = False) -> exp.Expression:
3200        args = self._parse_csv(self._parse_bitwise)
3201
3202        if self._match(TokenType.IN):
3203            return self.expression(
3204                exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0)
3205            )
3206
3207        if haystack_first:
3208            haystack = seq_get(args, 0)
3209            needle = seq_get(args, 1)
3210        else:
3211            needle = seq_get(args, 0)
3212            haystack = seq_get(args, 1)
3213
3214        this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2))
3215
3216        self.validate_expression(this, args)
3217
3218        return this
3219
3220    def _parse_join_hint(self, func_name: str) -> exp.Expression:
3221        args = self._parse_csv(self._parse_table)
3222        return exp.JoinHint(this=func_name.upper(), expressions=args)
3223
3224    def _parse_substring(self) -> exp.Expression:
3225        # Postgres supports the form: substring(string [from int] [for int])
3226        # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
3227
3228        args = self._parse_csv(self._parse_bitwise)
3229
3230        if self._match(TokenType.FROM):
3231            args.append(self._parse_bitwise())
3232            if self._match(TokenType.FOR):
3233                args.append(self._parse_bitwise())
3234
3235        this = exp.Substring.from_arg_list(args)
3236        self.validate_expression(this, args)
3237
3238        return this
3239
3240    def _parse_trim(self) -> exp.Expression:
3241        # https://www.w3resource.com/sql/character-functions/trim.php
3242        # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html
3243
3244        position = None
3245        collation = None
3246
3247        if self._match_set(self.TRIM_TYPES):
3248            position = self._prev.text.upper()
3249
3250        expression = self._parse_term()
3251        if self._match_set((TokenType.FROM, TokenType.COMMA)):
3252            this = self._parse_term()
3253        else:
3254            this = expression
3255            expression = None
3256
3257        if self._match(TokenType.COLLATE):
3258            collation = self._parse_term()
3259
3260        return self.expression(
3261            exp.Trim,
3262            this=this,
3263            position=position,
3264            expression=expression,
3265            collation=collation,
3266        )
3267
3268    def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3269        return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window)
3270
3271    def _parse_named_window(self) -> t.Optional[exp.Expression]:
3272        return self._parse_window(self._parse_id_var(), alias=True)
3273
3274    def _parse_window(
3275        self, this: t.Optional[exp.Expression], alias: bool = False
3276    ) -> t.Optional[exp.Expression]:
3277        if self._match(TokenType.FILTER):
3278            where = self._parse_wrapped(self._parse_where)
3279            this = self.expression(exp.Filter, this=this, expression=where)
3280
3281        # T-SQL allows the OVER (...) syntax after WITHIN GROUP.
3282        # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
3283        if self._match(TokenType.WITHIN_GROUP):
3284            order = self._parse_wrapped(self._parse_order)
3285            this = self.expression(exp.WithinGroup, this=this, expression=order)
3286
3287        # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
3288        # Some dialects choose to implement and some do not.
3289        # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
3290
3291        # There is some code above in _parse_lambda that handles
3292        #   SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ...
3293
3294        # The below changes handle
3295        #   SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ...
3296
3297        # Oracle allows both formats
3298        #   (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html)
3299        #   and Snowflake chose to do the same for familiarity
3300        #   https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes
3301        if self._match(TokenType.IGNORE_NULLS):
3302            this = self.expression(exp.IgnoreNulls, this=this)
3303        elif self._match(TokenType.RESPECT_NULLS):
3304            this = self.expression(exp.RespectNulls, this=this)
3305
3306        # bigquery select from window x AS (partition by ...)
3307        if alias:
3308            self._match(TokenType.ALIAS)
3309        elif not self._match(TokenType.OVER):
3310            return this
3311
3312        if not self._match(TokenType.L_PAREN):
3313            return self.expression(exp.Window, this=this, alias=self._parse_id_var(False))
3314
3315        window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
3316        partition = self._parse_partition_by()
3317        order = self._parse_order()
3318        kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text
3319
3320        if kind:
3321            self._match(TokenType.BETWEEN)
3322            start = self._parse_window_spec()
3323            self._match(TokenType.AND)
3324            end = self._parse_window_spec()
3325
3326            spec = self.expression(
3327                exp.WindowSpec,
3328                kind=kind,
3329                start=start["value"],
3330                start_side=start["side"],
3331                end=end["value"],
3332                end_side=end["side"],
3333            )
3334        else:
3335            spec = None
3336
3337        self._match_r_paren()
3338
3339        return self.expression(
3340            exp.Window,
3341            this=this,
3342            partition_by=partition,
3343            order=order,
3344            spec=spec,
3345            alias=window_alias,
3346        )
3347
3348    def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]:
3349        self._match(TokenType.BETWEEN)
3350
3351        return {
3352            "value": (
3353                self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text
3354            )
3355            or self._parse_bitwise(),
3356            "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text,
3357        }
3358
3359    def _parse_alias(
3360        self, this: t.Optional[exp.Expression], explicit: bool = False
3361    ) -> t.Optional[exp.Expression]:
3362        any_token = self._match(TokenType.ALIAS)
3363
3364        if explicit and not any_token:
3365            return this
3366
3367        if self._match(TokenType.L_PAREN):
3368            aliases = self.expression(
3369                exp.Aliases,
3370                this=this,
3371                expressions=self._parse_csv(lambda: self._parse_id_var(any_token)),
3372            )
3373            self._match_r_paren(aliases)
3374            return aliases
3375
3376        alias = self._parse_id_var(any_token)
3377
3378        if alias:
3379            return self.expression(exp.Alias, this=this, alias=alias)
3380
3381        return this
3382
3383    def _parse_id_var(
3384        self,
3385        any_token: bool = True,
3386        tokens: t.Optional[t.Collection[TokenType]] = None,
3387        prefix_tokens: t.Optional[t.Collection[TokenType]] = None,
3388    ) -> t.Optional[exp.Expression]:
3389        identifier = self._parse_identifier()
3390
3391        if identifier:
3392            return identifier
3393
3394        prefix = ""
3395
3396        if prefix_tokens:
3397            while self._match_set(prefix_tokens):
3398                prefix += self._prev.text
3399
3400        if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS):
3401            quoted = self._prev.token_type == TokenType.STRING
3402            return exp.Identifier(this=prefix + self._prev.text, quoted=quoted)
3403
3404        return None
3405
3406    def _parse_string(self) -> t.Optional[exp.Expression]:
3407        if self._match(TokenType.STRING):
3408            return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev)
3409        return self._parse_placeholder()
3410
3411    def _parse_number(self) -> t.Optional[exp.Expression]:
3412        if self._match(TokenType.NUMBER):
3413            return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev)
3414        return self._parse_placeholder()
3415
3416    def _parse_identifier(self) -> t.Optional[exp.Expression]:
3417        if self._match(TokenType.IDENTIFIER):
3418            return self.expression(exp.Identifier, this=self._prev.text, quoted=True)
3419        return self._parse_placeholder()
3420
3421    def _parse_var(self, any_token: bool = False) -> t.Optional[exp.Expression]:
3422        if (any_token and self._advance_any()) or self._match(TokenType.VAR):
3423            return self.expression(exp.Var, this=self._prev.text)
3424        return self._parse_placeholder()
3425
3426    def _advance_any(self) -> t.Optional[Token]:
3427        if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS:
3428            self._advance()
3429            return self._prev
3430        return None
3431
3432    def _parse_var_or_string(self) -> t.Optional[exp.Expression]:
3433        return self._parse_var() or self._parse_string()
3434
3435    def _parse_null(self) -> t.Optional[exp.Expression]:
3436        if self._match(TokenType.NULL):
3437            return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev)
3438        return None
3439
3440    def _parse_boolean(self) -> t.Optional[exp.Expression]:
3441        if self._match(TokenType.TRUE):
3442            return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev)
3443        if self._match(TokenType.FALSE):
3444            return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev)
3445        return None
3446
3447    def _parse_star(self) -> t.Optional[exp.Expression]:
3448        if self._match(TokenType.STAR):
3449            return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev)
3450        return None
3451
3452    def _parse_parameter(self) -> exp.Expression:
3453        wrapped = self._match(TokenType.L_BRACE)
3454        this = self._parse_var() or self._parse_primary()
3455        self._match(TokenType.R_BRACE)
3456        return self.expression(exp.Parameter, this=this, wrapped=wrapped)
3457
3458    def _parse_placeholder(self) -> t.Optional[exp.Expression]:
3459        if self._match_set(self.PLACEHOLDER_PARSERS):
3460            placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self)
3461            if placeholder:
3462                return placeholder
3463            self._advance(-1)
3464        return None
3465
3466    def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3467        if not self._match(TokenType.EXCEPT):
3468            return None
3469        if self._match(TokenType.L_PAREN, advance=False):
3470            return self._parse_wrapped_csv(self._parse_column)
3471        return self._parse_csv(self._parse_column)
3472
3473    def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3474        if not self._match(TokenType.REPLACE):
3475            return None
3476        if self._match(TokenType.L_PAREN, advance=False):
3477            return self._parse_wrapped_csv(self._parse_expression)
3478        return self._parse_csv(self._parse_expression)
3479
3480    def _parse_csv(
3481        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
3482    ) -> t.List[t.Optional[exp.Expression]]:
3483        parse_result = parse_method()
3484        items = [parse_result] if parse_result is not None else []
3485
3486        while self._match(sep):
3487            if parse_result and self._prev_comments:
3488                parse_result.comments = self._prev_comments
3489
3490            parse_result = parse_method()
3491            if parse_result is not None:
3492                items.append(parse_result)
3493
3494        return items
3495
3496    def _parse_tokens(
3497        self, parse_method: t.Callable, expressions: t.Dict
3498    ) -> t.Optional[exp.Expression]:
3499        this = parse_method()
3500
3501        while self._match_set(expressions):
3502            this = self.expression(
3503                expressions[self._prev.token_type],
3504                this=this,
3505                comments=self._prev_comments,
3506                expression=parse_method(),
3507            )
3508
3509        return this
3510
3511    def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]:
3512        return self._parse_wrapped_csv(self._parse_id_var)
3513
3514    def _parse_wrapped_csv(
3515        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
3516    ) -> t.List[t.Optional[exp.Expression]]:
3517        return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep))
3518
3519    def _parse_wrapped(self, parse_method: t.Callable) -> t.Any:
3520        self._match_l_paren()
3521        parse_result = parse_method()
3522        self._match_r_paren()
3523        return parse_result
3524
3525    def _parse_select_or_expression(self) -> t.Optional[exp.Expression]:
3526        return self._parse_select() or self._parse_expression()
3527
3528    def _parse_ddl_select(self) -> t.Optional[exp.Expression]:
3529        return self._parse_set_operations(
3530            self._parse_select(nested=True, parse_subquery_alias=False)
3531        )
3532
3533    def _parse_transaction(self) -> exp.Expression:
3534        this = None
3535        if self._match_texts(self.TRANSACTION_KIND):
3536            this = self._prev.text
3537
3538        self._match_texts({"TRANSACTION", "WORK"})
3539
3540        modes = []
3541        while True:
3542            mode = []
3543            while self._match(TokenType.VAR):
3544                mode.append(self._prev.text)
3545
3546            if mode:
3547                modes.append(" ".join(mode))
3548            if not self._match(TokenType.COMMA):
3549                break
3550
3551        return self.expression(exp.Transaction, this=this, modes=modes)
3552
3553    def _parse_commit_or_rollback(self) -> exp.Expression:
3554        chain = None
3555        savepoint = None
3556        is_rollback = self._prev.token_type == TokenType.ROLLBACK
3557
3558        self._match_texts({"TRANSACTION", "WORK"})
3559
3560        if self._match_text_seq("TO"):
3561            self._match_text_seq("SAVEPOINT")
3562            savepoint = self._parse_id_var()
3563
3564        if self._match(TokenType.AND):
3565            chain = not self._match_text_seq("NO")
3566            self._match_text_seq("CHAIN")
3567
3568        if is_rollback:
3569            return self.expression(exp.Rollback, savepoint=savepoint)
3570        return self.expression(exp.Commit, chain=chain)
3571
3572    def _parse_add_column(self) -> t.Optional[exp.Expression]:
3573        if not self._match_text_seq("ADD"):
3574            return None
3575
3576        self._match(TokenType.COLUMN)
3577        exists_column = self._parse_exists(not_=True)
3578        expression = self._parse_column_def(self._parse_field(any_token=True))
3579
3580        if expression:
3581            expression.set("exists", exists_column)
3582
3583        return expression
3584
3585    def _parse_drop_column(self) -> t.Optional[exp.Expression]:
3586        return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN")
3587
3588    # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html
3589    def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression:
3590        return self.expression(
3591            exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists
3592        )
3593
3594    def _parse_add_constraint(self) -> t.Optional[exp.Expression]:
3595        this = None
3596        kind = self._prev.token_type
3597
3598        if kind == TokenType.CONSTRAINT:
3599            this = self._parse_id_var()
3600
3601            if self._match_text_seq("CHECK"):
3602                expression = self._parse_wrapped(self._parse_conjunction)
3603                enforced = self._match_text_seq("ENFORCED")
3604
3605                return self.expression(
3606                    exp.AddConstraint, this=this, expression=expression, enforced=enforced
3607                )
3608
3609        if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY):
3610            expression = self._parse_foreign_key()
3611        elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY):
3612            expression = self._parse_primary_key()
3613
3614        return self.expression(exp.AddConstraint, this=this, expression=expression)
3615
3616    def _parse_alter(self) -> t.Optional[exp.Expression]:
3617        if not self._match(TokenType.TABLE):
3618            return self._parse_as_command(self._prev)
3619
3620        exists = self._parse_exists()
3621        this = self._parse_table(schema=True)
3622
3623        actions: t.Optional[exp.Expression | t.List[t.Optional[exp.Expression]]] = None
3624
3625        index = self._index
3626        if self._match(TokenType.DELETE):
3627            actions = [self.expression(exp.Delete, where=self._parse_where())]
3628        elif self._match_text_seq("ADD"):
3629            if self._match_set(self.ADD_CONSTRAINT_TOKENS):
3630                actions = self._parse_csv(self._parse_add_constraint)
3631            else:
3632                self._retreat(index)
3633                actions = self._parse_csv(self._parse_add_column)
3634        elif self._match_text_seq("DROP"):
3635            partition_exists = self._parse_exists()
3636
3637            if self._match(TokenType.PARTITION, advance=False):
3638                actions = self._parse_csv(
3639                    lambda: self._parse_drop_partition(exists=partition_exists)
3640                )
3641            else:
3642                self._retreat(index)
3643                actions = self._parse_csv(self._parse_drop_column)
3644        elif self._match_text_seq("RENAME", "TO"):
3645            actions = self.expression(exp.RenameTable, this=self._parse_table(schema=True))
3646        elif self._match_text_seq("ALTER"):
3647            self._match(TokenType.COLUMN)
3648            column = self._parse_field(any_token=True)
3649
3650            if self._match_pair(TokenType.DROP, TokenType.DEFAULT):
3651                actions = self.expression(exp.AlterColumn, this=column, drop=True)
3652            elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
3653                actions = self.expression(
3654                    exp.AlterColumn, this=column, default=self._parse_conjunction()
3655                )
3656            else:
3657                self._match_text_seq("SET", "DATA")
3658                actions = self.expression(
3659                    exp.AlterColumn,
3660                    this=column,
3661                    dtype=self._match_text_seq("TYPE") and self._parse_types(),
3662                    collate=self._match(TokenType.COLLATE) and self._parse_term(),
3663                    using=self._match(TokenType.USING) and self._parse_conjunction(),
3664                )
3665
3666        actions = ensure_list(actions)
3667        return self.expression(exp.AlterTable, this=this, exists=exists, actions=actions)
3668
3669    def _parse_show(self) -> t.Optional[exp.Expression]:
3670        parser = self._find_parser(self.SHOW_PARSERS, self._show_trie)  # type: ignore
3671        if parser:
3672            return parser(self)
3673        self._advance()
3674        return self.expression(exp.Show, this=self._prev.text.upper())
3675
3676    def _default_parse_set_item(self) -> exp.Expression:
3677        return self.expression(
3678            exp.SetItem,
3679            this=self._parse_statement(),
3680        )
3681
3682    def _parse_set_item(self) -> t.Optional[exp.Expression]:
3683        parser = self._find_parser(self.SET_PARSERS, self._set_trie)  # type: ignore
3684        return parser(self) if parser else self._default_parse_set_item()
3685
3686    def _parse_merge(self) -> exp.Expression:
3687        self._match(TokenType.INTO)
3688        target = self._parse_table()
3689
3690        self._match(TokenType.USING)
3691        using = self._parse_table()
3692
3693        self._match(TokenType.ON)
3694        on = self._parse_conjunction()
3695
3696        whens = []
3697        while self._match(TokenType.WHEN):
3698            this = self._parse_conjunction()
3699            self._match(TokenType.THEN)
3700
3701            if self._match(TokenType.INSERT):
3702                _this = self._parse_star()
3703                if _this:
3704                    then = self.expression(exp.Insert, this=_this)
3705                else:
3706                    then = self.expression(
3707                        exp.Insert,
3708                        this=self._parse_value(),
3709                        expression=self._match(TokenType.VALUES) and self._parse_value(),
3710                    )
3711            elif self._match(TokenType.UPDATE):
3712                expressions = self._parse_star()
3713                if expressions:
3714                    then = self.expression(exp.Update, expressions=expressions)
3715                else:
3716                    then = self.expression(
3717                        exp.Update,
3718                        expressions=self._match(TokenType.SET)
3719                        and self._parse_csv(self._parse_equality),
3720                    )
3721            elif self._match(TokenType.DELETE):
3722                then = self.expression(exp.Var, this=self._prev.text)
3723
3724            whens.append(self.expression(exp.When, this=this, then=then))
3725
3726        return self.expression(
3727            exp.Merge,
3728            this=target,
3729            using=using,
3730            on=on,
3731            expressions=whens,
3732        )
3733
3734    def _parse_set(self) -> exp.Expression:
3735        return self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item))
3736
3737    def _parse_as_command(self, start: Token) -> exp.Command:
3738        while self._curr:
3739            self._advance()
3740        return exp.Command(this=self._find_sql(start, self._prev))
3741
3742    def _find_parser(
3743        self, parsers: t.Dict[str, t.Callable], trie: t.Dict
3744    ) -> t.Optional[t.Callable]:
3745        index = self._index
3746        this = []
3747        while True:
3748            # The current token might be multiple words
3749            curr = self._curr.text.upper()
3750            key = curr.split(" ")
3751            this.append(curr)
3752            self._advance()
3753            result, trie = in_trie(trie, key)
3754            if result == 0:
3755                break
3756            if result == 2:
3757                subparser = parsers[" ".join(this)]
3758                return subparser
3759        self._retreat(index)
3760        return None
3761
3762    def _match(self, token_type, advance=True):
3763        if not self._curr:
3764            return None
3765
3766        if self._curr.token_type == token_type:
3767            if advance:
3768                self._advance()
3769            return True
3770
3771        return None
3772
3773    def _match_set(self, types):
3774        if not self._curr:
3775            return None
3776
3777        if self._curr.token_type in types:
3778            self._advance()
3779            return True
3780
3781        return None
3782
3783    def _match_pair(self, token_type_a, token_type_b, advance=True):
3784        if not self._curr or not self._next:
3785            return None
3786
3787        if self._curr.token_type == token_type_a and self._next.token_type == token_type_b:
3788            if advance:
3789                self._advance(2)
3790            return True
3791
3792        return None
3793
3794    def _match_l_paren(self, expression=None):
3795        if not self._match(TokenType.L_PAREN):
3796            self.raise_error("Expecting (")
3797        if expression and self._prev_comments:
3798            expression.comments = self._prev_comments
3799
3800    def _match_r_paren(self, expression=None):
3801        if not self._match(TokenType.R_PAREN):
3802            self.raise_error("Expecting )")
3803        if expression and self._prev_comments:
3804            expression.comments = self._prev_comments
3805
3806    def _match_texts(self, texts):
3807        if self._curr and self._curr.text.upper() in texts:
3808            self._advance()
3809            return True
3810        return False
3811
3812    def _match_text_seq(self, *texts, advance=True):
3813        index = self._index
3814        for text in texts:
3815            if self._curr and self._curr.text.upper() == text:
3816                self._advance()
3817            else:
3818                self._retreat(index)
3819                return False
3820
3821        if not advance:
3822            self._retreat(index)
3823
3824        return True
3825
3826    def _replace_columns_with_dots(self, this):
3827        if isinstance(this, exp.Dot):
3828            exp.replace_children(this, self._replace_columns_with_dots)
3829        elif isinstance(this, exp.Column):
3830            exp.replace_children(this, self._replace_columns_with_dots)
3831            table = this.args.get("table")
3832            this = (
3833                self.expression(exp.Dot, this=table, expression=this.this)
3834                if table
3835                else self.expression(exp.Var, this=this.name)
3836            )
3837        elif isinstance(this, exp.Identifier):
3838            this = self.expression(exp.Var, this=this.name)
3839        return this
3840
3841    def _replace_lambda(self, node, lambda_variables):
3842        if isinstance(node, exp.Column):
3843            if node.name in lambda_variables:
3844                return node.this
3845        return node

Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer and produces a parsed syntax tree.

Arguments:
  • error_level: the desired error level. Default: ErrorLevel.RAISE
  • error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
  • index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
  • alias_post_tablesample: If the table alias comes after tablesample. Default: False
  • max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
  • null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
Parser( error_level: Optional[sqlglot.errors.ErrorLevel] = None, error_message_context: int = 100, index_offset: int = 0, unnest_column_only: bool = False, alias_post_tablesample: bool = False, max_errors: int = 3, null_ordering: Optional[str] = None)
701    def __init__(
702        self,
703        error_level: t.Optional[ErrorLevel] = None,
704        error_message_context: int = 100,
705        index_offset: int = 0,
706        unnest_column_only: bool = False,
707        alias_post_tablesample: bool = False,
708        max_errors: int = 3,
709        null_ordering: t.Optional[str] = None,
710    ):
711        self.error_level = error_level or ErrorLevel.IMMEDIATE
712        self.error_message_context = error_message_context
713        self.index_offset = index_offset
714        self.unnest_column_only = unnest_column_only
715        self.alias_post_tablesample = alias_post_tablesample
716        self.max_errors = max_errors
717        self.null_ordering = null_ordering
718        self.reset()
def reset(self):
720    def reset(self):
721        self.sql = ""
722        self.errors = []
723        self._tokens = []
724        self._index = 0
725        self._curr = None
726        self._next = None
727        self._prev = None
728        self._prev_comments = None
def parse( self, raw_tokens: List[sqlglot.tokens.Token], sql: Optional[str] = None) -> List[Optional[sqlglot.expressions.Expression]]:
730    def parse(
731        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
732    ) -> t.List[t.Optional[exp.Expression]]:
733        """
734        Parses a list of tokens and returns a list of syntax trees, one tree
735        per parsed SQL statement.
736
737        Args:
738            raw_tokens: the list of tokens.
739            sql: the original SQL string, used to produce helpful debug messages.
740
741        Returns:
742            The list of syntax trees.
743        """
744        return self._parse(
745            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
746        )

Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.

Arguments:
  • raw_tokens: the list of tokens.
  • sql: the original SQL string, used to produce helpful debug messages.
Returns:

The list of syntax trees.

def parse_into( self, expression_types: Union[str, Type[sqlglot.expressions.Expression], Collection[Union[str, Type[sqlglot.expressions.Expression]]]], raw_tokens: List[sqlglot.tokens.Token], sql: Optional[str] = None) -> List[Optional[sqlglot.expressions.Expression]]:
748    def parse_into(
749        self,
750        expression_types: exp.IntoType,
751        raw_tokens: t.List[Token],
752        sql: t.Optional[str] = None,
753    ) -> t.List[t.Optional[exp.Expression]]:
754        """
755        Parses a list of tokens into a given Expression type. If a collection of Expression
756        types is given instead, this method will try to parse the token list into each one
757        of them, stopping at the first for which the parsing succeeds.
758
759        Args:
760            expression_types: the expression type(s) to try and parse the token list into.
761            raw_tokens: the list of tokens.
762            sql: the original SQL string, used to produce helpful debug messages.
763
764        Returns:
765            The target Expression.
766        """
767        errors = []
768        for expression_type in ensure_collection(expression_types):
769            parser = self.EXPRESSION_PARSERS.get(expression_type)
770            if not parser:
771                raise TypeError(f"No parser registered for {expression_type}")
772            try:
773                return self._parse(parser, raw_tokens, sql)
774            except ParseError as e:
775                e.errors[0]["into_expression"] = expression_type
776                errors.append(e)
777        raise ParseError(
778            f"Failed to parse into {expression_types}",
779            errors=merge_errors(errors),
780        ) from errors[-1]

Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.

Arguments:
  • expression_types: the expression type(s) to try and parse the token list into.
  • raw_tokens: the list of tokens.
  • sql: the original SQL string, used to produce helpful debug messages.
Returns:

The target Expression.

def check_errors(self) -> None:
816    def check_errors(self) -> None:
817        """
818        Logs or raises any found errors, depending on the chosen error level setting.
819        """
820        if self.error_level == ErrorLevel.WARN:
821            for error in self.errors:
822                logger.error(str(error))
823        elif self.error_level == ErrorLevel.RAISE and self.errors:
824            raise ParseError(
825                concat_messages(self.errors, self.max_errors),
826                errors=merge_errors(self.errors),
827            )

Logs or raises any found errors, depending on the chosen error level setting.

def raise_error(self, message: str, token: Optional[sqlglot.tokens.Token] = None) -> None:
829    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
830        """
831        Appends an error in the list of recorded errors or raises it, depending on the chosen
832        error level setting.
833        """
834        token = token or self._curr or self._prev or Token.string("")
835        start = self._find_token(token)
836        end = start + len(token.text)
837        start_context = self.sql[max(start - self.error_message_context, 0) : start]
838        highlight = self.sql[start:end]
839        end_context = self.sql[end : end + self.error_message_context]
840
841        error = ParseError.new(
842            f"{message}. Line {token.line}, Col: {token.col}.\n"
843            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
844            description=message,
845            line=token.line,
846            col=token.col,
847            start_context=start_context,
848            highlight=highlight,
849            end_context=end_context,
850        )
851
852        if self.error_level == ErrorLevel.IMMEDIATE:
853            raise error
854
855        self.errors.append(error)

Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.

def expression( self, exp_class: Type[sqlglot.expressions.Expression], comments: Optional[List[str]] = None, **kwargs) -> sqlglot.expressions.Expression:
857    def expression(
858        self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs
859    ) -> exp.Expression:
860        """
861        Creates a new, validated Expression.
862
863        Args:
864            exp_class: the expression class to instantiate.
865            comments: an optional list of comments to attach to the expression.
866            kwargs: the arguments to set for the expression along with their respective values.
867
868        Returns:
869            The target expression.
870        """
871        instance = exp_class(**kwargs)
872        if self._prev_comments:
873            instance.comments = self._prev_comments
874            self._prev_comments = None
875        if comments:
876            instance.comments = comments
877        self.validate_expression(instance)
878        return instance

Creates a new, validated Expression.

Arguments:
  • exp_class: the expression class to instantiate.
  • comments: an optional list of comments to attach to the expression.
  • kwargs: the arguments to set for the expression along with their respective values.
Returns:

The target expression.

def validate_expression( self, expression: sqlglot.expressions.Expression, args: Optional[List] = None) -> None:
880    def validate_expression(
881        self, expression: exp.Expression, args: t.Optional[t.List] = None
882    ) -> None:
883        """
884        Validates an already instantiated expression, making sure that all its mandatory arguments
885        are set.
886
887        Args:
888            expression: the expression to validate.
889            args: an optional list of items that was used to instantiate the expression, if it's a Func.
890        """
891        if self.error_level == ErrorLevel.IGNORE:
892            return
893
894        for error_message in expression.error_messages(args):
895            self.raise_error(error_message)

Validates an already instantiated expression, making sure that all its mandatory arguments are set.

Arguments:
  • expression: the expression to validate.
  • args: an optional list of items that was used to instantiate the expression, if it's a Func.