From ebec59cc5cb6c6856705bf82ced7fe8d9f75b0d0 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Tue, 7 Mar 2023 19:09:31 +0100 Subject: Merging upstream version 11.3.0. Signed-off-by: Daniel Baumann --- docs/sqlglot/parser.html | 14698 +++++++++++++++++++++++---------------------- 1 file changed, 7390 insertions(+), 7308 deletions(-) (limited to 'docs/sqlglot/parser.html') diff --git a/docs/sqlglot/parser.html b/docs/sqlglot/parser.html index cfd4563..b2685da 100644 --- a/docs/sqlglot/parser.html +++ b/docs/sqlglot/parser.html @@ -245,3729 +245,3770 @@ 157 158 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 159 - 160 ID_VAR_TOKENS = { - 161 TokenType.VAR, - 162 TokenType.ANTI, - 163 TokenType.APPLY, - 164 TokenType.AUTO_INCREMENT, - 165 TokenType.BEGIN, - 166 TokenType.BOTH, - 167 TokenType.BUCKET, - 168 TokenType.CACHE, - 169 TokenType.CASCADE, - 170 TokenType.COLLATE, - 171 TokenType.COLUMN, - 172 TokenType.COMMAND, - 173 TokenType.COMMIT, - 174 TokenType.COMPOUND, - 175 TokenType.CONSTRAINT, - 176 TokenType.CURRENT_TIME, - 177 TokenType.DEFAULT, - 178 TokenType.DELETE, - 179 TokenType.DESCRIBE, - 180 TokenType.DIV, - 181 TokenType.END, - 182 TokenType.EXECUTE, - 183 TokenType.ESCAPE, - 184 TokenType.FALSE, - 185 TokenType.FIRST, - 186 TokenType.FILTER, - 187 TokenType.FOLLOWING, - 188 TokenType.FORMAT, - 189 TokenType.FUNCTION, - 190 TokenType.IF, - 191 TokenType.INDEX, - 192 TokenType.ISNULL, - 193 TokenType.INTERVAL, - 194 TokenType.LAZY, - 195 TokenType.LEADING, - 196 TokenType.LEFT, - 197 TokenType.LOCAL, - 198 TokenType.MATERIALIZED, - 199 TokenType.MERGE, - 200 TokenType.NATURAL, - 201 TokenType.NEXT, - 202 TokenType.OFFSET, - 203 TokenType.ONLY, - 204 TokenType.OPTIONS, - 205 TokenType.ORDINALITY, - 206 TokenType.PERCENT, - 207 TokenType.PIVOT, - 208 TokenType.PRECEDING, - 209 TokenType.RANGE, - 210 TokenType.REFERENCES, - 211 TokenType.RIGHT, - 212 TokenType.ROW, - 213 TokenType.ROWS, - 214 TokenType.SCHEMA, - 215 TokenType.SEED, - 216 TokenType.SEMI, - 217 TokenType.SET, - 218 TokenType.SHOW, - 219 TokenType.SORTKEY, - 220 TokenType.TABLE, - 221 TokenType.TEMPORARY, - 222 TokenType.TOP, - 223 TokenType.TRAILING, - 224 TokenType.TRUE, - 225 TokenType.UNBOUNDED, - 226 TokenType.UNIQUE, - 227 TokenType.UNLOGGED, - 228 TokenType.UNPIVOT, - 229 TokenType.PROCEDURE, - 230 TokenType.VIEW, - 231 TokenType.VOLATILE, - 232 TokenType.WINDOW, - 233 *SUBQUERY_PREDICATES, - 234 *TYPE_TOKENS, - 235 *NO_PAREN_FUNCTIONS, - 236 } - 237 - 238 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { - 239 TokenType.APPLY, - 240 TokenType.LEFT, - 241 TokenType.NATURAL, - 242 TokenType.OFFSET, - 243 TokenType.RIGHT, - 244 TokenType.WINDOW, - 245 } - 246 - 247 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} - 248 - 249 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} - 250 - 251 FUNC_TOKENS = { - 252 TokenType.COMMAND, - 253 TokenType.CURRENT_DATE, - 254 TokenType.CURRENT_DATETIME, - 255 TokenType.CURRENT_TIMESTAMP, - 256 TokenType.CURRENT_TIME, - 257 TokenType.FILTER, - 258 TokenType.FIRST, - 259 TokenType.FORMAT, - 260 TokenType.IDENTIFIER, - 261 TokenType.INDEX, - 262 TokenType.ISNULL, - 263 TokenType.ILIKE, - 264 TokenType.LIKE, - 265 TokenType.MERGE, - 266 TokenType.OFFSET, - 267 TokenType.PRIMARY_KEY, - 268 TokenType.REPLACE, - 269 TokenType.ROW, - 270 TokenType.UNNEST, - 271 TokenType.VAR, - 272 TokenType.LEFT, - 273 TokenType.RIGHT, - 274 TokenType.DATE, - 275 TokenType.DATETIME, - 276 TokenType.TABLE, - 277 TokenType.TIMESTAMP, - 278 TokenType.TIMESTAMPTZ, - 279 TokenType.WINDOW, - 280 *TYPE_TOKENS, - 281 *SUBQUERY_PREDICATES, - 282 } - 283 - 284 CONJUNCTION = { - 285 TokenType.AND: exp.And, - 286 TokenType.OR: exp.Or, - 287 } - 288 - 289 EQUALITY = { - 290 TokenType.EQ: exp.EQ, - 291 TokenType.NEQ: exp.NEQ, - 292 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, - 293 } - 294 - 295 COMPARISON = { - 296 TokenType.GT: exp.GT, - 297 TokenType.GTE: exp.GTE, - 298 TokenType.LT: exp.LT, - 299 TokenType.LTE: exp.LTE, - 300 } - 301 - 302 BITWISE = { - 303 TokenType.AMP: exp.BitwiseAnd, - 304 TokenType.CARET: exp.BitwiseXor, - 305 TokenType.PIPE: exp.BitwiseOr, - 306 TokenType.DPIPE: exp.DPipe, - 307 } - 308 - 309 TERM = { - 310 TokenType.DASH: exp.Sub, - 311 TokenType.PLUS: exp.Add, - 312 TokenType.MOD: exp.Mod, - 313 TokenType.COLLATE: exp.Collate, - 314 } - 315 - 316 FACTOR = { - 317 TokenType.DIV: exp.IntDiv, - 318 TokenType.LR_ARROW: exp.Distance, - 319 TokenType.SLASH: exp.Div, - 320 TokenType.STAR: exp.Mul, - 321 } - 322 - 323 TIMESTAMPS = { - 324 TokenType.TIME, - 325 TokenType.TIMESTAMP, - 326 TokenType.TIMESTAMPTZ, - 327 TokenType.TIMESTAMPLTZ, - 328 } - 329 - 330 SET_OPERATIONS = { - 331 TokenType.UNION, - 332 TokenType.INTERSECT, - 333 TokenType.EXCEPT, - 334 } - 335 - 336 JOIN_SIDES = { - 337 TokenType.LEFT, - 338 TokenType.RIGHT, - 339 TokenType.FULL, - 340 } - 341 - 342 JOIN_KINDS = { - 343 TokenType.INNER, - 344 TokenType.OUTER, - 345 TokenType.CROSS, - 346 TokenType.SEMI, - 347 TokenType.ANTI, - 348 } - 349 - 350 LAMBDAS = { - 351 TokenType.ARROW: lambda self, expressions: self.expression( - 352 exp.Lambda, - 353 this=self._parse_conjunction().transform( - 354 self._replace_lambda, {node.name for node in expressions} - 355 ), - 356 expressions=expressions, - 357 ), - 358 TokenType.FARROW: lambda self, expressions: self.expression( - 359 exp.Kwarg, - 360 this=exp.Var(this=expressions[0].name), - 361 expression=self._parse_conjunction(), - 362 ), - 363 } - 364 - 365 COLUMN_OPERATORS = { - 366 TokenType.DOT: None, - 367 TokenType.DCOLON: lambda self, this, to: self.expression( - 368 exp.Cast, - 369 this=this, - 370 to=to, - 371 ), - 372 TokenType.ARROW: lambda self, this, path: self.expression( - 373 exp.JSONExtract, - 374 this=this, - 375 expression=path, - 376 ), - 377 TokenType.DARROW: lambda self, this, path: self.expression( - 378 exp.JSONExtractScalar, + 160 DB_CREATABLES = { + 161 TokenType.DATABASE, + 162 TokenType.SCHEMA, + 163 TokenType.TABLE, + 164 TokenType.VIEW, + 165 } + 166 + 167 CREATABLES = { + 168 TokenType.COLUMN, + 169 TokenType.FUNCTION, + 170 TokenType.INDEX, + 171 TokenType.PROCEDURE, + 172 *DB_CREATABLES, + 173 } + 174 + 175 ID_VAR_TOKENS = { + 176 TokenType.VAR, + 177 TokenType.ANTI, + 178 TokenType.APPLY, + 179 TokenType.AUTO_INCREMENT, + 180 TokenType.BEGIN, + 181 TokenType.BOTH, + 182 TokenType.BUCKET, + 183 TokenType.CACHE, + 184 TokenType.CASCADE, + 185 TokenType.COLLATE, + 186 TokenType.COMMAND, + 187 TokenType.COMMENT, + 188 TokenType.COMMIT, + 189 TokenType.COMPOUND, + 190 TokenType.CONSTRAINT, + 191 TokenType.CURRENT_TIME, + 192 TokenType.DEFAULT, + 193 TokenType.DELETE, + 194 TokenType.DESCRIBE, + 195 TokenType.DIV, + 196 TokenType.END, + 197 TokenType.EXECUTE, + 198 TokenType.ESCAPE, + 199 TokenType.FALSE, + 200 TokenType.FIRST, + 201 TokenType.FILTER, + 202 TokenType.FOLLOWING, + 203 TokenType.FORMAT, + 204 TokenType.IF, + 205 TokenType.ISNULL, + 206 TokenType.INTERVAL, + 207 TokenType.LAZY, + 208 TokenType.LEADING, + 209 TokenType.LEFT, + 210 TokenType.LOCAL, + 211 TokenType.MATERIALIZED, + 212 TokenType.MERGE, + 213 TokenType.NATURAL, + 214 TokenType.NEXT, + 215 TokenType.OFFSET, + 216 TokenType.ONLY, + 217 TokenType.OPTIONS, + 218 TokenType.ORDINALITY, + 219 TokenType.PERCENT, + 220 TokenType.PIVOT, + 221 TokenType.PRECEDING, + 222 TokenType.RANGE, + 223 TokenType.REFERENCES, + 224 TokenType.RIGHT, + 225 TokenType.ROW, + 226 TokenType.ROWS, + 227 TokenType.SEED, + 228 TokenType.SEMI, + 229 TokenType.SET, + 230 TokenType.SHOW, + 231 TokenType.SORTKEY, + 232 TokenType.TEMPORARY, + 233 TokenType.TOP, + 234 TokenType.TRAILING, + 235 TokenType.TRUE, + 236 TokenType.UNBOUNDED, + 237 TokenType.UNIQUE, + 238 TokenType.UNLOGGED, + 239 TokenType.UNPIVOT, + 240 TokenType.VOLATILE, + 241 TokenType.WINDOW, + 242 *CREATABLES, + 243 *SUBQUERY_PREDICATES, + 244 *TYPE_TOKENS, + 245 *NO_PAREN_FUNCTIONS, + 246 } + 247 + 248 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { + 249 TokenType.APPLY, + 250 TokenType.LEFT, + 251 TokenType.NATURAL, + 252 TokenType.OFFSET, + 253 TokenType.RIGHT, + 254 TokenType.WINDOW, + 255 } + 256 + 257 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} + 258 + 259 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} + 260 + 261 FUNC_TOKENS = { + 262 TokenType.COMMAND, + 263 TokenType.CURRENT_DATE, + 264 TokenType.CURRENT_DATETIME, + 265 TokenType.CURRENT_TIMESTAMP, + 266 TokenType.CURRENT_TIME, + 267 TokenType.FILTER, + 268 TokenType.FIRST, + 269 TokenType.FORMAT, + 270 TokenType.IDENTIFIER, + 271 TokenType.INDEX, + 272 TokenType.ISNULL, + 273 TokenType.ILIKE, + 274 TokenType.LIKE, + 275 TokenType.MERGE, + 276 TokenType.OFFSET, + 277 TokenType.PRIMARY_KEY, + 278 TokenType.REPLACE, + 279 TokenType.ROW, + 280 TokenType.UNNEST, + 281 TokenType.VAR, + 282 TokenType.LEFT, + 283 TokenType.RIGHT, + 284 TokenType.DATE, + 285 TokenType.DATETIME, + 286 TokenType.TABLE, + 287 TokenType.TIMESTAMP, + 288 TokenType.TIMESTAMPTZ, + 289 TokenType.WINDOW, + 290 *TYPE_TOKENS, + 291 *SUBQUERY_PREDICATES, + 292 } + 293 + 294 CONJUNCTION = { + 295 TokenType.AND: exp.And, + 296 TokenType.OR: exp.Or, + 297 } + 298 + 299 EQUALITY = { + 300 TokenType.EQ: exp.EQ, + 301 TokenType.NEQ: exp.NEQ, + 302 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, + 303 } + 304 + 305 COMPARISON = { + 306 TokenType.GT: exp.GT, + 307 TokenType.GTE: exp.GTE, + 308 TokenType.LT: exp.LT, + 309 TokenType.LTE: exp.LTE, + 310 } + 311 + 312 BITWISE = { + 313 TokenType.AMP: exp.BitwiseAnd, + 314 TokenType.CARET: exp.BitwiseXor, + 315 TokenType.PIPE: exp.BitwiseOr, + 316 TokenType.DPIPE: exp.DPipe, + 317 } + 318 + 319 TERM = { + 320 TokenType.DASH: exp.Sub, + 321 TokenType.PLUS: exp.Add, + 322 TokenType.MOD: exp.Mod, + 323 TokenType.COLLATE: exp.Collate, + 324 } + 325 + 326 FACTOR = { + 327 TokenType.DIV: exp.IntDiv, + 328 TokenType.LR_ARROW: exp.Distance, + 329 TokenType.SLASH: exp.Div, + 330 TokenType.STAR: exp.Mul, + 331 } + 332 + 333 TIMESTAMPS = { + 334 TokenType.TIME, + 335 TokenType.TIMESTAMP, + 336 TokenType.TIMESTAMPTZ, + 337 TokenType.TIMESTAMPLTZ, + 338 } + 339 + 340 SET_OPERATIONS = { + 341 TokenType.UNION, + 342 TokenType.INTERSECT, + 343 TokenType.EXCEPT, + 344 } + 345 + 346 JOIN_SIDES = { + 347 TokenType.LEFT, + 348 TokenType.RIGHT, + 349 TokenType.FULL, + 350 } + 351 + 352 JOIN_KINDS = { + 353 TokenType.INNER, + 354 TokenType.OUTER, + 355 TokenType.CROSS, + 356 TokenType.SEMI, + 357 TokenType.ANTI, + 358 } + 359 + 360 LAMBDAS = { + 361 TokenType.ARROW: lambda self, expressions: self.expression( + 362 exp.Lambda, + 363 this=self._parse_conjunction().transform( + 364 self._replace_lambda, {node.name for node in expressions} + 365 ), + 366 expressions=expressions, + 367 ), + 368 TokenType.FARROW: lambda self, expressions: self.expression( + 369 exp.Kwarg, + 370 this=exp.Var(this=expressions[0].name), + 371 expression=self._parse_conjunction(), + 372 ), + 373 } + 374 + 375 COLUMN_OPERATORS = { + 376 TokenType.DOT: None, + 377 TokenType.DCOLON: lambda self, this, to: self.expression( + 378 exp.Cast, 379 this=this, - 380 expression=path, + 380 to=to, 381 ), - 382 TokenType.HASH_ARROW: lambda self, this, path: self.expression( - 383 exp.JSONBExtract, + 382 TokenType.ARROW: lambda self, this, path: self.expression( + 383 exp.JSONExtract, 384 this=this, 385 expression=path, 386 ), - 387 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( - 388 exp.JSONBExtractScalar, + 387 TokenType.DARROW: lambda self, this, path: self.expression( + 388 exp.JSONExtractScalar, 389 this=this, 390 expression=path, 391 ), - 392 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( - 393 exp.JSONBContains, + 392 TokenType.HASH_ARROW: lambda self, this, path: self.expression( + 393 exp.JSONBExtract, 394 this=this, - 395 expression=key, + 395 expression=path, 396 ), - 397 } - 398 - 399 EXPRESSION_PARSERS = { - 400 exp.Column: lambda self: self._parse_column(), - 401 exp.DataType: lambda self: self._parse_types(), - 402 exp.From: lambda self: self._parse_from(), - 403 exp.Group: lambda self: self._parse_group(), - 404 exp.Identifier: lambda self: self._parse_id_var(), - 405 exp.Lateral: lambda self: self._parse_lateral(), - 406 exp.Join: lambda self: self._parse_join(), - 407 exp.Order: lambda self: self._parse_order(), - 408 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), - 409 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), - 410 exp.Lambda: lambda self: self._parse_lambda(), - 411 exp.Limit: lambda self: self._parse_limit(), - 412 exp.Offset: lambda self: self._parse_offset(), - 413 exp.TableAlias: lambda self: self._parse_table_alias(), - 414 exp.Table: lambda self: self._parse_table(), - 415 exp.Condition: lambda self: self._parse_conjunction(), - 416 exp.Expression: lambda self: self._parse_statement(), - 417 exp.Properties: lambda self: self._parse_properties(), - 418 exp.Where: lambda self: self._parse_where(), - 419 exp.Ordered: lambda self: self._parse_ordered(), - 420 exp.Having: lambda self: self._parse_having(), - 421 exp.With: lambda self: self._parse_with(), - 422 exp.Window: lambda self: self._parse_named_window(), - 423 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), - 424 } - 425 - 426 STATEMENT_PARSERS = { - 427 TokenType.ALTER: lambda self: self._parse_alter(), - 428 TokenType.BEGIN: lambda self: self._parse_transaction(), - 429 TokenType.CACHE: lambda self: self._parse_cache(), - 430 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), - 431 TokenType.CREATE: lambda self: self._parse_create(), - 432 TokenType.DELETE: lambda self: self._parse_delete(), - 433 TokenType.DESC: lambda self: self._parse_describe(), - 434 TokenType.DESCRIBE: lambda self: self._parse_describe(), - 435 TokenType.DROP: lambda self: self._parse_drop(), - 436 TokenType.END: lambda self: self._parse_commit_or_rollback(), - 437 TokenType.INSERT: lambda self: self._parse_insert(), - 438 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), - 439 TokenType.MERGE: lambda self: self._parse_merge(), - 440 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), - 441 TokenType.UNCACHE: lambda self: self._parse_uncache(), - 442 TokenType.UPDATE: lambda self: self._parse_update(), - 443 TokenType.USE: lambda self: self.expression( - 444 exp.Use, - 445 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) - 446 and exp.Var(this=self._prev.text), - 447 this=self._parse_table(schema=False), - 448 ), - 449 } - 450 - 451 UNARY_PARSERS = { - 452 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op - 453 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), - 454 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), - 455 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), - 456 } - 457 - 458 PRIMARY_PARSERS = { - 459 TokenType.STRING: lambda self, token: self.expression( - 460 exp.Literal, this=token.text, is_string=True - 461 ), - 462 TokenType.NUMBER: lambda self, token: self.expression( - 463 exp.Literal, this=token.text, is_string=False - 464 ), - 465 TokenType.STAR: lambda self, _: self.expression( - 466 exp.Star, - 467 **{"except": self._parse_except(), "replace": self._parse_replace()}, - 468 ), - 469 TokenType.NULL: lambda self, _: self.expression(exp.Null), - 470 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), - 471 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), - 472 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), - 473 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), - 474 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), - 475 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), - 476 TokenType.NATIONAL: lambda self, token: self._parse_national(token), - 477 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), - 478 } - 479 - 480 PLACEHOLDER_PARSERS = { - 481 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), - 482 TokenType.PARAMETER: lambda self: self._parse_parameter(), - 483 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) - 484 if self._match_set((TokenType.NUMBER, TokenType.VAR)) - 485 else None, - 486 } - 487 - 488 RANGE_PARSERS = { - 489 TokenType.BETWEEN: lambda self, this: self._parse_between(this), - 490 TokenType.GLOB: lambda self, this: self._parse_escape( - 491 self.expression(exp.Glob, this=this, expression=self._parse_bitwise()) - 492 ), - 493 TokenType.IN: lambda self, this: self._parse_in(this), - 494 TokenType.IS: lambda self, this: self._parse_is(this), - 495 TokenType.LIKE: lambda self, this: self._parse_escape( - 496 self.expression(exp.Like, this=this, expression=self._parse_bitwise()) - 497 ), - 498 TokenType.ILIKE: lambda self, this: self._parse_escape( - 499 self.expression(exp.ILike, this=this, expression=self._parse_bitwise()) - 500 ), - 501 TokenType.IRLIKE: lambda self, this: self.expression( - 502 exp.RegexpILike, this=this, expression=self._parse_bitwise() + 397 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( + 398 exp.JSONBExtractScalar, + 399 this=this, + 400 expression=path, + 401 ), + 402 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( + 403 exp.JSONBContains, + 404 this=this, + 405 expression=key, + 406 ), + 407 } + 408 + 409 EXPRESSION_PARSERS = { + 410 exp.Column: lambda self: self._parse_column(), + 411 exp.DataType: lambda self: self._parse_types(), + 412 exp.From: lambda self: self._parse_from(), + 413 exp.Group: lambda self: self._parse_group(), + 414 exp.Identifier: lambda self: self._parse_id_var(), + 415 exp.Lateral: lambda self: self._parse_lateral(), + 416 exp.Join: lambda self: self._parse_join(), + 417 exp.Order: lambda self: self._parse_order(), + 418 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), + 419 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), + 420 exp.Lambda: lambda self: self._parse_lambda(), + 421 exp.Limit: lambda self: self._parse_limit(), + 422 exp.Offset: lambda self: self._parse_offset(), + 423 exp.TableAlias: lambda self: self._parse_table_alias(), + 424 exp.Table: lambda self: self._parse_table(), + 425 exp.Condition: lambda self: self._parse_conjunction(), + 426 exp.Expression: lambda self: self._parse_statement(), + 427 exp.Properties: lambda self: self._parse_properties(), + 428 exp.Where: lambda self: self._parse_where(), + 429 exp.Ordered: lambda self: self._parse_ordered(), + 430 exp.Having: lambda self: self._parse_having(), + 431 exp.With: lambda self: self._parse_with(), + 432 exp.Window: lambda self: self._parse_named_window(), + 433 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), + 434 } + 435 + 436 STATEMENT_PARSERS = { + 437 TokenType.ALTER: lambda self: self._parse_alter(), + 438 TokenType.BEGIN: lambda self: self._parse_transaction(), + 439 TokenType.CACHE: lambda self: self._parse_cache(), + 440 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), + 441 TokenType.COMMENT: lambda self: self._parse_comment(), + 442 TokenType.CREATE: lambda self: self._parse_create(), + 443 TokenType.DELETE: lambda self: self._parse_delete(), + 444 TokenType.DESC: lambda self: self._parse_describe(), + 445 TokenType.DESCRIBE: lambda self: self._parse_describe(), + 446 TokenType.DROP: lambda self: self._parse_drop(), + 447 TokenType.END: lambda self: self._parse_commit_or_rollback(), + 448 TokenType.INSERT: lambda self: self._parse_insert(), + 449 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), + 450 TokenType.MERGE: lambda self: self._parse_merge(), + 451 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), + 452 TokenType.UNCACHE: lambda self: self._parse_uncache(), + 453 TokenType.UPDATE: lambda self: self._parse_update(), + 454 TokenType.USE: lambda self: self.expression( + 455 exp.Use, + 456 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) + 457 and exp.Var(this=self._prev.text), + 458 this=self._parse_table(schema=False), + 459 ), + 460 } + 461 + 462 UNARY_PARSERS = { + 463 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op + 464 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), + 465 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), + 466 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), + 467 } + 468 + 469 PRIMARY_PARSERS = { + 470 TokenType.STRING: lambda self, token: self.expression( + 471 exp.Literal, this=token.text, is_string=True + 472 ), + 473 TokenType.NUMBER: lambda self, token: self.expression( + 474 exp.Literal, this=token.text, is_string=False + 475 ), + 476 TokenType.STAR: lambda self, _: self.expression( + 477 exp.Star, + 478 **{"except": self._parse_except(), "replace": self._parse_replace()}, + 479 ), + 480 TokenType.NULL: lambda self, _: self.expression(exp.Null), + 481 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), + 482 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), + 483 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), + 484 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), + 485 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), + 486 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), + 487 TokenType.NATIONAL: lambda self, token: self._parse_national(token), + 488 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), + 489 } + 490 + 491 PLACEHOLDER_PARSERS = { + 492 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), + 493 TokenType.PARAMETER: lambda self: self._parse_parameter(), + 494 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) + 495 if self._match_set((TokenType.NUMBER, TokenType.VAR)) + 496 else None, + 497 } + 498 + 499 RANGE_PARSERS = { + 500 TokenType.BETWEEN: lambda self, this: self._parse_between(this), + 501 TokenType.GLOB: lambda self, this: self._parse_escape( + 502 self.expression(exp.Glob, this=this, expression=self._parse_bitwise()) 503 ), - 504 TokenType.RLIKE: lambda self, this: self.expression( - 505 exp.RegexpLike, this=this, expression=self._parse_bitwise() + 504 TokenType.OVERLAPS: lambda self, this: self._parse_escape( + 505 self.expression(exp.Overlaps, this=this, expression=self._parse_bitwise()) 506 ), - 507 TokenType.SIMILAR_TO: lambda self, this: self.expression( - 508 exp.SimilarTo, this=this, expression=self._parse_bitwise() - 509 ), - 510 } - 511 - 512 PROPERTY_PARSERS = { - 513 "AFTER": lambda self: self._parse_afterjournal( - 514 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" - 515 ), - 516 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), - 517 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), - 518 "BEFORE": lambda self: self._parse_journal( - 519 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" + 507 TokenType.IN: lambda self, this: self._parse_in(this), + 508 TokenType.IS: lambda self, this: self._parse_is(this), + 509 TokenType.LIKE: lambda self, this: self._parse_escape( + 510 self.expression(exp.Like, this=this, expression=self._parse_bitwise()) + 511 ), + 512 TokenType.ILIKE: lambda self, this: self._parse_escape( + 513 self.expression(exp.ILike, this=this, expression=self._parse_bitwise()) + 514 ), + 515 TokenType.IRLIKE: lambda self, this: self.expression( + 516 exp.RegexpILike, this=this, expression=self._parse_bitwise() + 517 ), + 518 TokenType.RLIKE: lambda self, this: self.expression( + 519 exp.RegexpLike, this=this, expression=self._parse_bitwise() 520 ), - 521 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), - 522 "CHARACTER SET": lambda self: self._parse_character_set(), - 523 "CHECKSUM": lambda self: self._parse_checksum(), - 524 "CLUSTER BY": lambda self: self.expression( - 525 exp.Cluster, expressions=self._parse_csv(self._parse_ordered) - 526 ), - 527 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), - 528 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), - 529 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( - 530 default=self._prev.text.upper() == "DEFAULT" - 531 ), - 532 "DEFINER": lambda self: self._parse_definer(), - 533 "DETERMINISTIC": lambda self: self.expression( - 534 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") - 535 ), - 536 "DISTKEY": lambda self: self._parse_distkey(), - 537 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), - 538 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), - 539 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), - 540 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), - 541 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), - 542 "FREESPACE": lambda self: self._parse_freespace(), - 543 "GLOBAL": lambda self: self._parse_temporary(global_=True), - 544 "IMMUTABLE": lambda self: self.expression( - 545 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") - 546 ), - 547 "JOURNAL": lambda self: self._parse_journal( - 548 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" + 521 TokenType.SIMILAR_TO: lambda self, this: self.expression( + 522 exp.SimilarTo, this=this, expression=self._parse_bitwise() + 523 ), + 524 } + 525 + 526 PROPERTY_PARSERS = { + 527 "AFTER": lambda self: self._parse_afterjournal( + 528 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" + 529 ), + 530 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), + 531 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), + 532 "BEFORE": lambda self: self._parse_journal( + 533 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" + 534 ), + 535 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), + 536 "CHARACTER SET": lambda self: self._parse_character_set(), + 537 "CHECKSUM": lambda self: self._parse_checksum(), + 538 "CLUSTER BY": lambda self: self.expression( + 539 exp.Cluster, expressions=self._parse_csv(self._parse_ordered) + 540 ), + 541 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), + 542 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), + 543 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( + 544 default=self._prev.text.upper() == "DEFAULT" + 545 ), + 546 "DEFINER": lambda self: self._parse_definer(), + 547 "DETERMINISTIC": lambda self: self.expression( + 548 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 549 ), - 550 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), - 551 "LIKE": lambda self: self._parse_create_like(), - 552 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), - 553 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), - 554 "LOCK": lambda self: self._parse_locking(), - 555 "LOCKING": lambda self: self._parse_locking(), - 556 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), - 557 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), - 558 "MAX": lambda self: self._parse_datablocksize(), - 559 "MAXIMUM": lambda self: self._parse_datablocksize(), - 560 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( - 561 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" - 562 ), - 563 "MIN": lambda self: self._parse_datablocksize(), - 564 "MINIMUM": lambda self: self._parse_datablocksize(), - 565 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), - 566 "NO": lambda self: self._parse_noprimaryindex(), - 567 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), - 568 "ON": lambda self: self._parse_oncommit(), - 569 "PARTITION BY": lambda self: self._parse_partitioned_by(), - 570 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), - 571 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), - 572 "RETURNS": lambda self: self._parse_returns(), - 573 "ROW": lambda self: self._parse_row(), - 574 "SET": lambda self: self.expression(exp.SetProperty, multi=False), - 575 "SORTKEY": lambda self: self._parse_sortkey(), - 576 "STABLE": lambda self: self.expression( - 577 exp.VolatilityProperty, this=exp.Literal.string("STABLE") - 578 ), - 579 "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty), - 580 "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty), - 581 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), - 582 "TEMPORARY": lambda self: self._parse_temporary(global_=False), - 583 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), - 584 "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty), - 585 "VOLATILE": lambda self: self.expression( - 586 exp.VolatilityProperty, this=exp.Literal.string("VOLATILE") - 587 ), - 588 "WITH": lambda self: self._parse_with_property(), - 589 } - 590 - 591 CONSTRAINT_PARSERS = { - 592 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), - 593 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), - 594 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), - 595 "CHARACTER SET": lambda self: self.expression( - 596 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() - 597 ), - 598 "CHECK": lambda self: self.expression( - 599 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) - 600 ), - 601 "COLLATE": lambda self: self.expression( - 602 exp.CollateColumnConstraint, this=self._parse_var() - 603 ), - 604 "COMMENT": lambda self: self.expression( - 605 exp.CommentColumnConstraint, this=self._parse_string() - 606 ), - 607 "COMPRESS": lambda self: self._parse_compress(), - 608 "DEFAULT": lambda self: self.expression( - 609 exp.DefaultColumnConstraint, this=self._parse_bitwise() - 610 ), - 611 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), - 612 "FOREIGN KEY": lambda self: self._parse_foreign_key(), - 613 "FORMAT": lambda self: self.expression( - 614 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() - 615 ), - 616 "GENERATED": lambda self: self._parse_generated_as_identity(), - 617 "IDENTITY": lambda self: self._parse_auto_increment(), - 618 "INLINE": lambda self: self._parse_inline(), - 619 "LIKE": lambda self: self._parse_create_like(), - 620 "NOT": lambda self: self._parse_not_constraint(), - 621 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), - 622 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), - 623 "PRIMARY KEY": lambda self: self._parse_primary_key(), - 624 "TITLE": lambda self: self.expression( - 625 exp.TitleColumnConstraint, this=self._parse_var_or_string() - 626 ), - 627 "UNIQUE": lambda self: self._parse_unique(), - 628 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), - 629 } - 630 - 631 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} - 632 - 633 NO_PAREN_FUNCTION_PARSERS = { - 634 TokenType.CASE: lambda self: self._parse_case(), - 635 TokenType.IF: lambda self: self._parse_if(), - 636 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), - 637 } - 638 - 639 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { - 640 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), - 641 "TRY_CONVERT": lambda self: self._parse_convert(False), - 642 "EXTRACT": lambda self: self._parse_extract(), - 643 "POSITION": lambda self: self._parse_position(), - 644 "SUBSTRING": lambda self: self._parse_substring(), - 645 "TRIM": lambda self: self._parse_trim(), - 646 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), - 647 "TRY_CAST": lambda self: self._parse_cast(False), - 648 "STRING_AGG": lambda self: self._parse_string_agg(), - 649 } - 650 - 651 QUERY_MODIFIER_PARSERS = { - 652 "match": lambda self: self._parse_match_recognize(), - 653 "where": lambda self: self._parse_where(), - 654 "group": lambda self: self._parse_group(), - 655 "having": lambda self: self._parse_having(), - 656 "qualify": lambda self: self._parse_qualify(), - 657 "windows": lambda self: self._parse_window_clause(), - 658 "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute), - 659 "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), - 660 "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), - 661 "order": lambda self: self._parse_order(), - 662 "limit": lambda self: self._parse_limit(), - 663 "offset": lambda self: self._parse_offset(), - 664 "lock": lambda self: self._parse_lock(), - 665 } - 666 - 667 SHOW_PARSERS: t.Dict[str, t.Callable] = {} - 668 SET_PARSERS: t.Dict[str, t.Callable] = {} - 669 - 670 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) - 671 - 672 CREATABLES = { - 673 TokenType.COLUMN, - 674 TokenType.FUNCTION, - 675 TokenType.INDEX, - 676 TokenType.PROCEDURE, - 677 TokenType.SCHEMA, - 678 TokenType.TABLE, - 679 TokenType.VIEW, - 680 } - 681 - 682 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} - 683 - 684 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} - 685 - 686 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} - 687 - 688 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} - 689 - 690 STRICT_CAST = True + 550 "DISTKEY": lambda self: self._parse_distkey(), + 551 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), + 552 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), + 553 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), + 554 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), + 555 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), + 556 "FREESPACE": lambda self: self._parse_freespace(), + 557 "GLOBAL": lambda self: self._parse_temporary(global_=True), + 558 "IMMUTABLE": lambda self: self.expression( + 559 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") + 560 ), + 561 "JOURNAL": lambda self: self._parse_journal( + 562 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" + 563 ), + 564 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), + 565 "LIKE": lambda self: self._parse_create_like(), + 566 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), + 567 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), + 568 "LOCK": lambda self: self._parse_locking(), + 569 "LOCKING": lambda self: self._parse_locking(), + 570 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), + 571 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), + 572 "MAX": lambda self: self._parse_datablocksize(), + 573 "MAXIMUM": lambda self: self._parse_datablocksize(), + 574 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( + 575 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" + 576 ), + 577 "MIN": lambda self: self._parse_datablocksize(), + 578 "MINIMUM": lambda self: self._parse_datablocksize(), + 579 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), + 580 "NO": lambda self: self._parse_noprimaryindex(), + 581 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), + 582 "ON": lambda self: self._parse_oncommit(), + 583 "PARTITION BY": lambda self: self._parse_partitioned_by(), + 584 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), + 585 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), + 586 "RETURNS": lambda self: self._parse_returns(), + 587 "ROW": lambda self: self._parse_row(), + 588 "SET": lambda self: self.expression(exp.SetProperty, multi=False), + 589 "SORTKEY": lambda self: self._parse_sortkey(), + 590 "STABLE": lambda self: self.expression( + 591 exp.VolatilityProperty, this=exp.Literal.string("STABLE") + 592 ), + 593 "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty), + 594 "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty), + 595 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), + 596 "TEMPORARY": lambda self: self._parse_temporary(global_=False), + 597 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), + 598 "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty), + 599 "VOLATILE": lambda self: self.expression( + 600 exp.VolatilityProperty, this=exp.Literal.string("VOLATILE") + 601 ), + 602 "WITH": lambda self: self._parse_with_property(), + 603 } + 604 + 605 CONSTRAINT_PARSERS = { + 606 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), + 607 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), + 608 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), + 609 "CHARACTER SET": lambda self: self.expression( + 610 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() + 611 ), + 612 "CHECK": lambda self: self.expression( + 613 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) + 614 ), + 615 "COLLATE": lambda self: self.expression( + 616 exp.CollateColumnConstraint, this=self._parse_var() + 617 ), + 618 "COMMENT": lambda self: self.expression( + 619 exp.CommentColumnConstraint, this=self._parse_string() + 620 ), + 621 "COMPRESS": lambda self: self._parse_compress(), + 622 "DEFAULT": lambda self: self.expression( + 623 exp.DefaultColumnConstraint, this=self._parse_bitwise() + 624 ), + 625 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), + 626 "FOREIGN KEY": lambda self: self._parse_foreign_key(), + 627 "FORMAT": lambda self: self.expression( + 628 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() + 629 ), + 630 "GENERATED": lambda self: self._parse_generated_as_identity(), + 631 "IDENTITY": lambda self: self._parse_auto_increment(), + 632 "INLINE": lambda self: self._parse_inline(), + 633 "LIKE": lambda self: self._parse_create_like(), + 634 "NOT": lambda self: self._parse_not_constraint(), + 635 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), + 636 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), + 637 "PRIMARY KEY": lambda self: self._parse_primary_key(), + 638 "TITLE": lambda self: self.expression( + 639 exp.TitleColumnConstraint, this=self._parse_var_or_string() + 640 ), + 641 "UNIQUE": lambda self: self._parse_unique(), + 642 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), + 643 } + 644 + 645 ALTER_PARSERS = { + 646 "ADD": lambda self: self._parse_alter_table_add(), + 647 "ALTER": lambda self: self._parse_alter_table_alter(), + 648 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), + 649 "DROP": lambda self: self._parse_alter_table_drop(), + 650 "RENAME": lambda self: self._parse_alter_table_rename(), + 651 } + 652 + 653 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} + 654 + 655 NO_PAREN_FUNCTION_PARSERS = { + 656 TokenType.CASE: lambda self: self._parse_case(), + 657 TokenType.IF: lambda self: self._parse_if(), + 658 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), + 659 } + 660 + 661 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { + 662 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), + 663 "TRY_CONVERT": lambda self: self._parse_convert(False), + 664 "EXTRACT": lambda self: self._parse_extract(), + 665 "POSITION": lambda self: self._parse_position(), + 666 "SUBSTRING": lambda self: self._parse_substring(), + 667 "TRIM": lambda self: self._parse_trim(), + 668 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), + 669 "TRY_CAST": lambda self: self._parse_cast(False), + 670 "STRING_AGG": lambda self: self._parse_string_agg(), + 671 } + 672 + 673 QUERY_MODIFIER_PARSERS = { + 674 "match": lambda self: self._parse_match_recognize(), + 675 "where": lambda self: self._parse_where(), + 676 "group": lambda self: self._parse_group(), + 677 "having": lambda self: self._parse_having(), + 678 "qualify": lambda self: self._parse_qualify(), + 679 "windows": lambda self: self._parse_window_clause(), + 680 "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute), + 681 "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), + 682 "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), + 683 "order": lambda self: self._parse_order(), + 684 "limit": lambda self: self._parse_limit(), + 685 "offset": lambda self: self._parse_offset(), + 686 "lock": lambda self: self._parse_lock(), + 687 } + 688 + 689 SHOW_PARSERS: t.Dict[str, t.Callable] = {} + 690 SET_PARSERS: t.Dict[str, t.Callable] = {} 691 - 692 __slots__ = ( - 693 "error_level", - 694 "error_message_context", - 695 "sql", - 696 "errors", - 697 "index_offset", - 698 "unnest_column_only", - 699 "alias_post_tablesample", - 700 "max_errors", - 701 "null_ordering", - 702 "_tokens", - 703 "_index", - 704 "_curr", - 705 "_next", - 706 "_prev", - 707 "_prev_comments", - 708 "_show_trie", - 709 "_set_trie", - 710 ) - 711 - 712 def __init__( - 713 self, - 714 error_level: t.Optional[ErrorLevel] = None, - 715 error_message_context: int = 100, - 716 index_offset: int = 0, - 717 unnest_column_only: bool = False, - 718 alias_post_tablesample: bool = False, - 719 max_errors: int = 3, - 720 null_ordering: t.Optional[str] = None, - 721 ): - 722 self.error_level = error_level or ErrorLevel.IMMEDIATE - 723 self.error_message_context = error_message_context - 724 self.index_offset = index_offset - 725 self.unnest_column_only = unnest_column_only - 726 self.alias_post_tablesample = alias_post_tablesample - 727 self.max_errors = max_errors - 728 self.null_ordering = null_ordering - 729 self.reset() - 730 - 731 def reset(self): - 732 self.sql = "" - 733 self.errors = [] - 734 self._tokens = [] - 735 self._index = 0 - 736 self._curr = None - 737 self._next = None - 738 self._prev = None - 739 self._prev_comments = None - 740 - 741 def parse( - 742 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None - 743 ) -> t.List[t.Optional[exp.Expression]]: - 744 """ - 745 Parses a list of tokens and returns a list of syntax trees, one tree - 746 per parsed SQL statement. - 747 - 748 Args: - 749 raw_tokens: the list of tokens. - 750 sql: the original SQL string, used to produce helpful debug messages. - 751 - 752 Returns: - 753 The list of syntax trees. - 754 """ - 755 return self._parse( - 756 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql - 757 ) - 758 - 759 def parse_into( - 760 self, - 761 expression_types: exp.IntoType, - 762 raw_tokens: t.List[Token], - 763 sql: t.Optional[str] = None, - 764 ) -> t.List[t.Optional[exp.Expression]]: - 765 """ - 766 Parses a list of tokens into a given Expression type. If a collection of Expression - 767 types is given instead, this method will try to parse the token list into each one - 768 of them, stopping at the first for which the parsing succeeds. - 769 - 770 Args: - 771 expression_types: the expression type(s) to try and parse the token list into. - 772 raw_tokens: the list of tokens. - 773 sql: the original SQL string, used to produce helpful debug messages. - 774 - 775 Returns: - 776 The target Expression. - 777 """ - 778 errors = [] - 779 for expression_type in ensure_collection(expression_types): - 780 parser = self.EXPRESSION_PARSERS.get(expression_type) - 781 if not parser: - 782 raise TypeError(f"No parser registered for {expression_type}") - 783 try: - 784 return self._parse(parser, raw_tokens, sql) - 785 except ParseError as e: - 786 e.errors[0]["into_expression"] = expression_type - 787 errors.append(e) - 788 raise ParseError( - 789 f"Failed to parse into {expression_types}", - 790 errors=merge_errors(errors), - 791 ) from errors[-1] - 792 - 793 def _parse( - 794 self, - 795 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], - 796 raw_tokens: t.List[Token], - 797 sql: t.Optional[str] = None, - 798 ) -> t.List[t.Optional[exp.Expression]]: - 799 self.reset() - 800 self.sql = sql or "" - 801 total = len(raw_tokens) - 802 chunks: t.List[t.List[Token]] = [[]] - 803 - 804 for i, token in enumerate(raw_tokens): - 805 if token.token_type == TokenType.SEMICOLON: - 806 if i < total - 1: - 807 chunks.append([]) - 808 else: - 809 chunks[-1].append(token) - 810 - 811 expressions = [] - 812 - 813 for tokens in chunks: - 814 self._index = -1 - 815 self._tokens = tokens - 816 self._advance() - 817 - 818 expressions.append(parse_method(self)) - 819 - 820 if self._index < len(self._tokens): - 821 self.raise_error("Invalid expression / Unexpected token") + 692 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) + 693 + 694 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} + 695 + 696 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} + 697 + 698 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} + 699 + 700 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} + 701 + 702 STRICT_CAST = True + 703 + 704 __slots__ = ( + 705 "error_level", + 706 "error_message_context", + 707 "sql", + 708 "errors", + 709 "index_offset", + 710 "unnest_column_only", + 711 "alias_post_tablesample", + 712 "max_errors", + 713 "null_ordering", + 714 "_tokens", + 715 "_index", + 716 "_curr", + 717 "_next", + 718 "_prev", + 719 "_prev_comments", + 720 "_show_trie", + 721 "_set_trie", + 722 ) + 723 + 724 def __init__( + 725 self, + 726 error_level: t.Optional[ErrorLevel] = None, + 727 error_message_context: int = 100, + 728 index_offset: int = 0, + 729 unnest_column_only: bool = False, + 730 alias_post_tablesample: bool = False, + 731 max_errors: int = 3, + 732 null_ordering: t.Optional[str] = None, + 733 ): + 734 self.error_level = error_level or ErrorLevel.IMMEDIATE + 735 self.error_message_context = error_message_context + 736 self.index_offset = index_offset + 737 self.unnest_column_only = unnest_column_only + 738 self.alias_post_tablesample = alias_post_tablesample + 739 self.max_errors = max_errors + 740 self.null_ordering = null_ordering + 741 self.reset() + 742 + 743 def reset(self): + 744 self.sql = "" + 745 self.errors = [] + 746 self._tokens = [] + 747 self._index = 0 + 748 self._curr = None + 749 self._next = None + 750 self._prev = None + 751 self._prev_comments = None + 752 + 753 def parse( + 754 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None + 755 ) -> t.List[t.Optional[exp.Expression]]: + 756 """ + 757 Parses a list of tokens and returns a list of syntax trees, one tree + 758 per parsed SQL statement. + 759 + 760 Args: + 761 raw_tokens: the list of tokens. + 762 sql: the original SQL string, used to produce helpful debug messages. + 763 + 764 Returns: + 765 The list of syntax trees. + 766 """ + 767 return self._parse( + 768 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql + 769 ) + 770 + 771 def parse_into( + 772 self, + 773 expression_types: exp.IntoType, + 774 raw_tokens: t.List[Token], + 775 sql: t.Optional[str] = None, + 776 ) -> t.List[t.Optional[exp.Expression]]: + 777 """ + 778 Parses a list of tokens into a given Expression type. If a collection of Expression + 779 types is given instead, this method will try to parse the token list into each one + 780 of them, stopping at the first for which the parsing succeeds. + 781 + 782 Args: + 783 expression_types: the expression type(s) to try and parse the token list into. + 784 raw_tokens: the list of tokens. + 785 sql: the original SQL string, used to produce helpful debug messages. + 786 + 787 Returns: + 788 The target Expression. + 789 """ + 790 errors = [] + 791 for expression_type in ensure_collection(expression_types): + 792 parser = self.EXPRESSION_PARSERS.get(expression_type) + 793 if not parser: + 794 raise TypeError(f"No parser registered for {expression_type}") + 795 try: + 796 return self._parse(parser, raw_tokens, sql) + 797 except ParseError as e: + 798 e.errors[0]["into_expression"] = expression_type + 799 errors.append(e) + 800 raise ParseError( + 801 f"Failed to parse into {expression_types}", + 802 errors=merge_errors(errors), + 803 ) from errors[-1] + 804 + 805 def _parse( + 806 self, + 807 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], + 808 raw_tokens: t.List[Token], + 809 sql: t.Optional[str] = None, + 810 ) -> t.List[t.Optional[exp.Expression]]: + 811 self.reset() + 812 self.sql = sql or "" + 813 total = len(raw_tokens) + 814 chunks: t.List[t.List[Token]] = [[]] + 815 + 816 for i, token in enumerate(raw_tokens): + 817 if token.token_type == TokenType.SEMICOLON: + 818 if i < total - 1: + 819 chunks.append([]) + 820 else: + 821 chunks[-1].append(token) 822 - 823 self.check_errors() + 823 expressions = [] 824 - 825 return expressions - 826 - 827 def check_errors(self) -> None: - 828 """ - 829 Logs or raises any found errors, depending on the chosen error level setting. - 830 """ - 831 if self.error_level == ErrorLevel.WARN: - 832 for error in self.errors: - 833 logger.error(str(error)) - 834 elif self.error_level == ErrorLevel.RAISE and self.errors: - 835 raise ParseError( - 836 concat_messages(self.errors, self.max_errors), - 837 errors=merge_errors(self.errors), - 838 ) - 839 - 840 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: - 841 """ - 842 Appends an error in the list of recorded errors or raises it, depending on the chosen - 843 error level setting. - 844 """ - 845 token = token or self._curr or self._prev or Token.string("") - 846 start = self._find_token(token) - 847 end = start + len(token.text) - 848 start_context = self.sql[max(start - self.error_message_context, 0) : start] - 849 highlight = self.sql[start:end] - 850 end_context = self.sql[end : end + self.error_message_context] + 825 for tokens in chunks: + 826 self._index = -1 + 827 self._tokens = tokens + 828 self._advance() + 829 + 830 expressions.append(parse_method(self)) + 831 + 832 if self._index < len(self._tokens): + 833 self.raise_error("Invalid expression / Unexpected token") + 834 + 835 self.check_errors() + 836 + 837 return expressions + 838 + 839 def check_errors(self) -> None: + 840 """ + 841 Logs or raises any found errors, depending on the chosen error level setting. + 842 """ + 843 if self.error_level == ErrorLevel.WARN: + 844 for error in self.errors: + 845 logger.error(str(error)) + 846 elif self.error_level == ErrorLevel.RAISE and self.errors: + 847 raise ParseError( + 848 concat_messages(self.errors, self.max_errors), + 849 errors=merge_errors(self.errors), + 850 ) 851 - 852 error = ParseError.new( - 853 f"{message}. Line {token.line}, Col: {token.col}.\n" - 854 f" {start_context}\033[4m{highlight}\033[0m{end_context}", - 855 description=message, - 856 line=token.line, - 857 col=token.col, - 858 start_context=start_context, - 859 highlight=highlight, - 860 end_context=end_context, - 861 ) - 862 - 863 if self.error_level == ErrorLevel.IMMEDIATE: - 864 raise error - 865 - 866 self.errors.append(error) - 867 - 868 def expression( - 869 self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs - 870 ) -> exp.Expression: - 871 """ - 872 Creates a new, validated Expression. - 873 - 874 Args: - 875 exp_class: the expression class to instantiate. - 876 comments: an optional list of comments to attach to the expression. - 877 kwargs: the arguments to set for the expression along with their respective values. - 878 - 879 Returns: - 880 The target expression. - 881 """ - 882 instance = exp_class(**kwargs) - 883 if self._prev_comments: - 884 instance.comments = self._prev_comments - 885 self._prev_comments = None - 886 if comments: - 887 instance.comments = comments - 888 self.validate_expression(instance) - 889 return instance + 852 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: + 853 """ + 854 Appends an error in the list of recorded errors or raises it, depending on the chosen + 855 error level setting. + 856 """ + 857 token = token or self._curr or self._prev or Token.string("") + 858 start = self._find_token(token) + 859 end = start + len(token.text) + 860 start_context = self.sql[max(start - self.error_message_context, 0) : start] + 861 highlight = self.sql[start:end] + 862 end_context = self.sql[end : end + self.error_message_context] + 863 + 864 error = ParseError.new( + 865 f"{message}. Line {token.line}, Col: {token.col}.\n" + 866 f" {start_context}\033[4m{highlight}\033[0m{end_context}", + 867 description=message, + 868 line=token.line, + 869 col=token.col, + 870 start_context=start_context, + 871 highlight=highlight, + 872 end_context=end_context, + 873 ) + 874 + 875 if self.error_level == ErrorLevel.IMMEDIATE: + 876 raise error + 877 + 878 self.errors.append(error) + 879 + 880 def expression( + 881 self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs + 882 ) -> exp.Expression: + 883 """ + 884 Creates a new, validated Expression. + 885 + 886 Args: + 887 exp_class: the expression class to instantiate. + 888 comments: an optional list of comments to attach to the expression. + 889 kwargs: the arguments to set for the expression along with their respective values. 890 - 891 def validate_expression( - 892 self, expression: exp.Expression, args: t.Optional[t.List] = None - 893 ) -> None: - 894 """ - 895 Validates an already instantiated expression, making sure that all its mandatory arguments - 896 are set. - 897 - 898 Args: - 899 expression: the expression to validate. - 900 args: an optional list of items that was used to instantiate the expression, if it's a Func. - 901 """ - 902 if self.error_level == ErrorLevel.IGNORE: - 903 return - 904 - 905 for error_message in expression.error_messages(args): - 906 self.raise_error(error_message) - 907 - 908 def _find_sql(self, start: Token, end: Token) -> str: - 909 return self.sql[self._find_token(start) : self._find_token(end) + len(end.text)] - 910 - 911 def _find_token(self, token: Token) -> int: - 912 line = 1 - 913 col = 1 - 914 index = 0 - 915 - 916 while line < token.line or col < token.col: - 917 if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK: - 918 line += 1 - 919 col = 1 - 920 else: - 921 col += 1 - 922 index += 1 - 923 - 924 return index - 925 - 926 def _advance(self, times: int = 1) -> None: - 927 self._index += times - 928 self._curr = seq_get(self._tokens, self._index) - 929 self._next = seq_get(self._tokens, self._index + 1) - 930 if self._index > 0: - 931 self._prev = self._tokens[self._index - 1] - 932 self._prev_comments = self._prev.comments - 933 else: - 934 self._prev = None - 935 self._prev_comments = None - 936 - 937 def _retreat(self, index: int) -> None: - 938 self._advance(index - self._index) - 939 - 940 def _parse_command(self) -> exp.Expression: - 941 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) - 942 - 943 def _parse_statement(self) -> t.Optional[exp.Expression]: - 944 if self._curr is None: - 945 return None - 946 - 947 if self._match_set(self.STATEMENT_PARSERS): - 948 return self.STATEMENT_PARSERS[self._prev.token_type](self) - 949 - 950 if self._match_set(Tokenizer.COMMANDS): - 951 return self._parse_command() - 952 - 953 expression = self._parse_expression() - 954 expression = self._parse_set_operations(expression) if expression else self._parse_select() - 955 - 956 self._parse_query_modifiers(expression) - 957 return expression + 891 Returns: + 892 The target expression. + 893 """ + 894 instance = exp_class(**kwargs) + 895 if self._prev_comments: + 896 instance.comments = self._prev_comments + 897 self._prev_comments = None + 898 if comments: + 899 instance.comments = comments + 900 self.validate_expression(instance) + 901 return instance + 902 + 903 def validate_expression( + 904 self, expression: exp.Expression, args: t.Optional[t.List] = None + 905 ) -> None: + 906 """ + 907 Validates an already instantiated expression, making sure that all its mandatory arguments + 908 are set. + 909 + 910 Args: + 911 expression: the expression to validate. + 912 args: an optional list of items that was used to instantiate the expression, if it's a Func. + 913 """ + 914 if self.error_level == ErrorLevel.IGNORE: + 915 return + 916 + 917 for error_message in expression.error_messages(args): + 918 self.raise_error(error_message) + 919 + 920 def _find_sql(self, start: Token, end: Token) -> str: + 921 return self.sql[self._find_token(start) : self._find_token(end) + len(end.text)] + 922 + 923 def _find_token(self, token: Token) -> int: + 924 line = 1 + 925 col = 1 + 926 index = 0 + 927 + 928 while line < token.line or col < token.col: + 929 if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK: + 930 line += 1 + 931 col = 1 + 932 else: + 933 col += 1 + 934 index += 1 + 935 + 936 return index + 937 + 938 def _advance(self, times: int = 1) -> None: + 939 self._index += times + 940 self._curr = seq_get(self._tokens, self._index) + 941 self._next = seq_get(self._tokens, self._index + 1) + 942 if self._index > 0: + 943 self._prev = self._tokens[self._index - 1] + 944 self._prev_comments = self._prev.comments + 945 else: + 946 self._prev = None + 947 self._prev_comments = None + 948 + 949 def _retreat(self, index: int) -> None: + 950 self._advance(index - self._index) + 951 + 952 def _parse_command(self) -> exp.Expression: + 953 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) + 954 + 955 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: + 956 start = self._prev + 957 exists = self._parse_exists() if allow_exists else None 958 - 959 def _parse_drop(self, default_kind: t.Optional[str] = None) -> t.Optional[exp.Expression]: - 960 start = self._prev - 961 temporary = self._match(TokenType.TEMPORARY) - 962 materialized = self._match(TokenType.MATERIALIZED) - 963 kind = self._match_set(self.CREATABLES) and self._prev.text - 964 if not kind: - 965 if default_kind: - 966 kind = default_kind - 967 else: - 968 return self._parse_as_command(start) - 969 - 970 return self.expression( - 971 exp.Drop, - 972 exists=self._parse_exists(), - 973 this=self._parse_table(schema=True), - 974 kind=kind, - 975 temporary=temporary, - 976 materialized=materialized, - 977 cascade=self._match(TokenType.CASCADE), - 978 ) - 979 - 980 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: - 981 return ( - 982 self._match(TokenType.IF) - 983 and (not not_ or self._match(TokenType.NOT)) - 984 and self._match(TokenType.EXISTS) - 985 ) - 986 - 987 def _parse_create(self) -> t.Optional[exp.Expression]: - 988 start = self._prev - 989 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( - 990 TokenType.OR, TokenType.REPLACE - 991 ) - 992 unique = self._match(TokenType.UNIQUE) + 959 self._match(TokenType.ON) + 960 + 961 kind = self._match_set(self.CREATABLES) and self._prev + 962 + 963 if not kind: + 964 return self._parse_as_command(start) + 965 + 966 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): + 967 this = self._parse_user_defined_function(kind=kind.token_type) + 968 elif kind.token_type == TokenType.TABLE: + 969 this = self._parse_table() + 970 elif kind.token_type == TokenType.COLUMN: + 971 this = self._parse_column() + 972 else: + 973 this = self._parse_id_var() + 974 + 975 self._match(TokenType.IS) + 976 + 977 return self.expression( + 978 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists + 979 ) + 980 + 981 def _parse_statement(self) -> t.Optional[exp.Expression]: + 982 if self._curr is None: + 983 return None + 984 + 985 if self._match_set(self.STATEMENT_PARSERS): + 986 return self.STATEMENT_PARSERS[self._prev.token_type](self) + 987 + 988 if self._match_set(Tokenizer.COMMANDS): + 989 return self._parse_command() + 990 + 991 expression = self._parse_expression() + 992 expression = self._parse_set_operations(expression) if expression else self._parse_select() 993 - 994 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): - 995 self._match(TokenType.TABLE) + 994 self._parse_query_modifiers(expression) + 995 return expression 996 - 997 properties = None - 998 create_token = self._match_set(self.CREATABLES) and self._prev - 999 -1000 if not create_token: -1001 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE -1002 create_token = self._match_set(self.CREATABLES) and self._prev -1003 -1004 if not properties or not create_token: -1005 return self._parse_as_command(start) -1006 -1007 exists = self._parse_exists(not_=True) -1008 this = None -1009 expression = None -1010 indexes = None -1011 no_schema_binding = None -1012 begin = None -1013 -1014 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): -1015 this = self._parse_user_defined_function(kind=create_token.token_type) -1016 temp_properties = self._parse_properties() -1017 if properties and temp_properties: -1018 properties.expressions.extend(temp_properties.expressions) -1019 elif temp_properties: -1020 properties = temp_properties -1021 -1022 self._match(TokenType.ALIAS) -1023 begin = self._match(TokenType.BEGIN) -1024 return_ = self._match_text_seq("RETURN") -1025 expression = self._parse_statement() -1026 -1027 if return_: -1028 expression = self.expression(exp.Return, this=expression) -1029 elif create_token.token_type == TokenType.INDEX: -1030 this = self._parse_index() -1031 elif create_token.token_type in ( -1032 TokenType.TABLE, -1033 TokenType.VIEW, -1034 TokenType.SCHEMA, -1035 ): -1036 table_parts = self._parse_table_parts(schema=True) -1037 -1038 # exp.Properties.Location.POST_NAME -1039 if self._match(TokenType.COMMA): -1040 temp_properties = self._parse_properties(before=True) -1041 if properties and temp_properties: -1042 properties.expressions.extend(temp_properties.expressions) -1043 elif temp_properties: -1044 properties = temp_properties + 997 def _parse_drop(self, default_kind: t.Optional[str] = None) -> t.Optional[exp.Expression]: + 998 start = self._prev + 999 temporary = self._match(TokenType.TEMPORARY) +1000 materialized = self._match(TokenType.MATERIALIZED) +1001 kind = self._match_set(self.CREATABLES) and self._prev.text +1002 if not kind: +1003 if default_kind: +1004 kind = default_kind +1005 else: +1006 return self._parse_as_command(start) +1007 +1008 return self.expression( +1009 exp.Drop, +1010 exists=self._parse_exists(), +1011 this=self._parse_table(schema=True), +1012 kind=kind, +1013 temporary=temporary, +1014 materialized=materialized, +1015 cascade=self._match(TokenType.CASCADE), +1016 ) +1017 +1018 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: +1019 return ( +1020 self._match(TokenType.IF) +1021 and (not not_ or self._match(TokenType.NOT)) +1022 and self._match(TokenType.EXISTS) +1023 ) +1024 +1025 def _parse_create(self) -> t.Optional[exp.Expression]: +1026 start = self._prev +1027 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( +1028 TokenType.OR, TokenType.REPLACE +1029 ) +1030 unique = self._match(TokenType.UNIQUE) +1031 volatile = self._match(TokenType.VOLATILE) +1032 +1033 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): +1034 self._match(TokenType.TABLE) +1035 +1036 properties = None +1037 create_token = self._match_set(self.CREATABLES) and self._prev +1038 +1039 if not create_token: +1040 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE +1041 create_token = self._match_set(self.CREATABLES) and self._prev +1042 +1043 if not properties or not create_token: +1044 return self._parse_as_command(start) 1045 -1046 this = self._parse_schema(this=table_parts) -1047 -1048 # exp.Properties.Location.POST_SCHEMA and POST_WITH -1049 temp_properties = self._parse_properties() -1050 if properties and temp_properties: -1051 properties.expressions.extend(temp_properties.expressions) -1052 elif temp_properties: -1053 properties = temp_properties -1054 -1055 self._match(TokenType.ALIAS) -1056 -1057 # exp.Properties.Location.POST_ALIAS -1058 if not ( -1059 self._match(TokenType.SELECT, advance=False) -1060 or self._match(TokenType.WITH, advance=False) -1061 or self._match(TokenType.L_PAREN, advance=False) -1062 ): -1063 temp_properties = self._parse_properties() -1064 if properties and temp_properties: -1065 properties.expressions.extend(temp_properties.expressions) -1066 elif temp_properties: -1067 properties = temp_properties -1068 -1069 expression = self._parse_ddl_select() -1070 -1071 if create_token.token_type == TokenType.TABLE: -1072 # exp.Properties.Location.POST_EXPRESSION -1073 temp_properties = self._parse_properties() -1074 if properties and temp_properties: -1075 properties.expressions.extend(temp_properties.expressions) -1076 elif temp_properties: -1077 properties = temp_properties -1078 -1079 indexes = [] -1080 while True: -1081 index = self._parse_create_table_index() +1046 exists = self._parse_exists(not_=True) +1047 this = None +1048 expression = None +1049 indexes = None +1050 no_schema_binding = None +1051 begin = None +1052 +1053 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): +1054 this = self._parse_user_defined_function(kind=create_token.token_type) +1055 temp_properties = self._parse_properties() +1056 if properties and temp_properties: +1057 properties.expressions.extend(temp_properties.expressions) +1058 elif temp_properties: +1059 properties = temp_properties +1060 +1061 self._match(TokenType.ALIAS) +1062 begin = self._match(TokenType.BEGIN) +1063 return_ = self._match_text_seq("RETURN") +1064 expression = self._parse_statement() +1065 +1066 if return_: +1067 expression = self.expression(exp.Return, this=expression) +1068 elif create_token.token_type == TokenType.INDEX: +1069 this = self._parse_index() +1070 elif create_token.token_type in self.DB_CREATABLES: +1071 table_parts = self._parse_table_parts(schema=True) +1072 +1073 # exp.Properties.Location.POST_NAME +1074 if self._match(TokenType.COMMA): +1075 temp_properties = self._parse_properties(before=True) +1076 if properties and temp_properties: +1077 properties.expressions.extend(temp_properties.expressions) +1078 elif temp_properties: +1079 properties = temp_properties +1080 +1081 this = self._parse_schema(this=table_parts) 1082 -1083 # exp.Properties.Location.POST_INDEX -1084 if self._match(TokenType.PARTITION_BY, advance=False): -1085 temp_properties = self._parse_properties() -1086 if properties and temp_properties: -1087 properties.expressions.extend(temp_properties.expressions) -1088 elif temp_properties: -1089 properties = temp_properties -1090 -1091 if not index: -1092 break -1093 else: -1094 indexes.append(index) -1095 elif create_token.token_type == TokenType.VIEW: -1096 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): -1097 no_schema_binding = True -1098 -1099 return self.expression( -1100 exp.Create, -1101 this=this, -1102 kind=create_token.text, -1103 unique=unique, -1104 expression=expression, -1105 exists=exists, -1106 properties=properties, -1107 replace=replace, -1108 indexes=indexes, -1109 no_schema_binding=no_schema_binding, -1110 begin=begin, -1111 ) -1112 -1113 def _parse_property_before(self) -> t.Optional[exp.Expression]: -1114 self._match(TokenType.COMMA) -1115 -1116 # parsers look to _prev for no/dual/default, so need to consume first -1117 self._match_text_seq("NO") -1118 self._match_text_seq("DUAL") -1119 self._match_text_seq("DEFAULT") -1120 -1121 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): -1122 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) -1123 -1124 return None +1083 # exp.Properties.Location.POST_SCHEMA and POST_WITH +1084 temp_properties = self._parse_properties() +1085 if properties and temp_properties: +1086 properties.expressions.extend(temp_properties.expressions) +1087 elif temp_properties: +1088 properties = temp_properties +1089 +1090 self._match(TokenType.ALIAS) +1091 +1092 # exp.Properties.Location.POST_ALIAS +1093 if not ( +1094 self._match(TokenType.SELECT, advance=False) +1095 or self._match(TokenType.WITH, advance=False) +1096 or self._match(TokenType.L_PAREN, advance=False) +1097 ): +1098 temp_properties = self._parse_properties() +1099 if properties and temp_properties: +1100 properties.expressions.extend(temp_properties.expressions) +1101 elif temp_properties: +1102 properties = temp_properties +1103 +1104 expression = self._parse_ddl_select() +1105 +1106 if create_token.token_type == TokenType.TABLE: +1107 # exp.Properties.Location.POST_EXPRESSION +1108 temp_properties = self._parse_properties() +1109 if properties and temp_properties: +1110 properties.expressions.extend(temp_properties.expressions) +1111 elif temp_properties: +1112 properties = temp_properties +1113 +1114 indexes = [] +1115 while True: +1116 index = self._parse_create_table_index() +1117 +1118 # exp.Properties.Location.POST_INDEX +1119 if self._match(TokenType.PARTITION_BY, advance=False): +1120 temp_properties = self._parse_properties() +1121 if properties and temp_properties: +1122 properties.expressions.extend(temp_properties.expressions) +1123 elif temp_properties: +1124 properties = temp_properties 1125 -1126 def _parse_property(self) -> t.Optional[exp.Expression]: -1127 if self._match_texts(self.PROPERTY_PARSERS): -1128 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) -1129 -1130 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): -1131 return self._parse_character_set(default=True) -1132 -1133 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): -1134 return self._parse_sortkey(compound=True) -1135 -1136 if self._match_text_seq("SQL", "SECURITY"): -1137 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) -1138 -1139 assignment = self._match_pair( -1140 TokenType.VAR, TokenType.EQ, advance=False -1141 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) -1142 -1143 if assignment: -1144 key = self._parse_var_or_string() -1145 self._match(TokenType.EQ) -1146 return self.expression(exp.Property, this=key, value=self._parse_column()) -1147 -1148 return None -1149 -1150 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: -1151 self._match(TokenType.EQ) -1152 self._match(TokenType.ALIAS) -1153 return self.expression( -1154 exp_class, -1155 this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), -1156 ) -1157 -1158 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: -1159 properties = [] -1160 -1161 while True: -1162 if before: -1163 identified_property = self._parse_property_before() -1164 else: -1165 identified_property = self._parse_property() -1166 -1167 if not identified_property: -1168 break -1169 for p in ensure_collection(identified_property): -1170 properties.append(p) +1126 if not index: +1127 break +1128 else: +1129 indexes.append(index) +1130 elif create_token.token_type == TokenType.VIEW: +1131 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): +1132 no_schema_binding = True +1133 +1134 return self.expression( +1135 exp.Create, +1136 this=this, +1137 kind=create_token.text, +1138 replace=replace, +1139 unique=unique, +1140 volatile=volatile, +1141 expression=expression, +1142 exists=exists, +1143 properties=properties, +1144 indexes=indexes, +1145 no_schema_binding=no_schema_binding, +1146 begin=begin, +1147 ) +1148 +1149 def _parse_property_before(self) -> t.Optional[exp.Expression]: +1150 self._match(TokenType.COMMA) +1151 +1152 # parsers look to _prev for no/dual/default, so need to consume first +1153 self._match_text_seq("NO") +1154 self._match_text_seq("DUAL") +1155 self._match_text_seq("DEFAULT") +1156 +1157 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): +1158 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) +1159 +1160 return None +1161 +1162 def _parse_property(self) -> t.Optional[exp.Expression]: +1163 if self._match_texts(self.PROPERTY_PARSERS): +1164 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) +1165 +1166 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): +1167 return self._parse_character_set(default=True) +1168 +1169 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): +1170 return self._parse_sortkey(compound=True) 1171 -1172 if properties: -1173 return self.expression(exp.Properties, expressions=properties) +1172 if self._match_text_seq("SQL", "SECURITY"): +1173 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1174 -1175 return None -1176 -1177 def _parse_fallback(self, no=False) -> exp.Expression: -1178 self._match_text_seq("FALLBACK") -1179 return self.expression( -1180 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") -1181 ) -1182 -1183 def _parse_with_property( -1184 self, -1185 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: -1186 self._match(TokenType.WITH) -1187 if self._match(TokenType.L_PAREN, advance=False): -1188 return self._parse_wrapped_csv(self._parse_property) -1189 -1190 if self._match_text_seq("JOURNAL"): -1191 return self._parse_withjournaltable() -1192 -1193 if self._match_text_seq("DATA"): -1194 return self._parse_withdata(no=False) -1195 elif self._match_text_seq("NO", "DATA"): -1196 return self._parse_withdata(no=True) -1197 -1198 if not self._next: -1199 return None -1200 -1201 return self._parse_withisolatedloading() +1175 assignment = self._match_pair( +1176 TokenType.VAR, TokenType.EQ, advance=False +1177 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) +1178 +1179 if assignment: +1180 key = self._parse_var_or_string() +1181 self._match(TokenType.EQ) +1182 return self.expression(exp.Property, this=key, value=self._parse_column()) +1183 +1184 return None +1185 +1186 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: +1187 self._match(TokenType.EQ) +1188 self._match(TokenType.ALIAS) +1189 return self.expression( +1190 exp_class, +1191 this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), +1192 ) +1193 +1194 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: +1195 properties = [] +1196 +1197 while True: +1198 if before: +1199 identified_property = self._parse_property_before() +1200 else: +1201 identified_property = self._parse_property() 1202 -1203 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html -1204 def _parse_definer(self) -> t.Optional[exp.Expression]: -1205 self._match(TokenType.EQ) -1206 -1207 user = self._parse_id_var() -1208 self._match(TokenType.PARAMETER) -1209 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) +1203 if not identified_property: +1204 break +1205 for p in ensure_collection(identified_property): +1206 properties.append(p) +1207 +1208 if properties: +1209 return self.expression(exp.Properties, expressions=properties) 1210 -1211 if not user or not host: -1212 return None -1213 -1214 return exp.DefinerProperty(this=f"{user}@{host}") -1215 -1216 def _parse_withjournaltable(self) -> exp.Expression: -1217 self._match(TokenType.TABLE) -1218 self._match(TokenType.EQ) -1219 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) -1220 -1221 def _parse_log(self, no=False) -> exp.Expression: -1222 self._match_text_seq("LOG") -1223 return self.expression(exp.LogProperty, no=no) -1224 -1225 def _parse_journal(self, no=False, dual=False) -> exp.Expression: -1226 before = self._match_text_seq("BEFORE") -1227 self._match_text_seq("JOURNAL") -1228 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) -1229 -1230 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: -1231 self._match_text_seq("NOT") -1232 self._match_text_seq("LOCAL") -1233 self._match_text_seq("AFTER", "JOURNAL") -1234 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) -1235 -1236 def _parse_checksum(self) -> exp.Expression: -1237 self._match_text_seq("CHECKSUM") -1238 self._match(TokenType.EQ) -1239 -1240 on = None -1241 if self._match(TokenType.ON): -1242 on = True -1243 elif self._match_text_seq("OFF"): -1244 on = False -1245 default = self._match(TokenType.DEFAULT) +1211 return None +1212 +1213 def _parse_fallback(self, no=False) -> exp.Expression: +1214 self._match_text_seq("FALLBACK") +1215 return self.expression( +1216 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") +1217 ) +1218 +1219 def _parse_with_property( +1220 self, +1221 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: +1222 self._match(TokenType.WITH) +1223 if self._match(TokenType.L_PAREN, advance=False): +1224 return self._parse_wrapped_csv(self._parse_property) +1225 +1226 if self._match_text_seq("JOURNAL"): +1227 return self._parse_withjournaltable() +1228 +1229 if self._match_text_seq("DATA"): +1230 return self._parse_withdata(no=False) +1231 elif self._match_text_seq("NO", "DATA"): +1232 return self._parse_withdata(no=True) +1233 +1234 if not self._next: +1235 return None +1236 +1237 return self._parse_withisolatedloading() +1238 +1239 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html +1240 def _parse_definer(self) -> t.Optional[exp.Expression]: +1241 self._match(TokenType.EQ) +1242 +1243 user = self._parse_id_var() +1244 self._match(TokenType.PARAMETER) +1245 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1246 -1247 return self.expression( -1248 exp.ChecksumProperty, -1249 on=on, -1250 default=default, -1251 ) -1252 -1253 def _parse_freespace(self) -> exp.Expression: -1254 self._match_text_seq("FREESPACE") -1255 self._match(TokenType.EQ) -1256 return self.expression( -1257 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) -1258 ) -1259 -1260 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: -1261 self._match_text_seq("MERGEBLOCKRATIO") -1262 if self._match(TokenType.EQ): -1263 return self.expression( -1264 exp.MergeBlockRatioProperty, -1265 this=self._parse_number(), -1266 percent=self._match(TokenType.PERCENT), -1267 ) -1268 else: -1269 return self.expression( -1270 exp.MergeBlockRatioProperty, -1271 no=no, -1272 default=default, -1273 ) -1274 -1275 def _parse_datablocksize(self, default=None) -> exp.Expression: -1276 if default: -1277 self._match_text_seq("DATABLOCKSIZE") -1278 return self.expression(exp.DataBlocksizeProperty, default=True) -1279 elif self._match_texts(("MIN", "MINIMUM")): -1280 self._match_text_seq("DATABLOCKSIZE") -1281 return self.expression(exp.DataBlocksizeProperty, min=True) -1282 elif self._match_texts(("MAX", "MAXIMUM")): -1283 self._match_text_seq("DATABLOCKSIZE") -1284 return self.expression(exp.DataBlocksizeProperty, min=False) -1285 -1286 self._match_text_seq("DATABLOCKSIZE") -1287 self._match(TokenType.EQ) -1288 size = self._parse_number() -1289 units = None -1290 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): -1291 units = self._prev.text -1292 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) -1293 -1294 def _parse_blockcompression(self) -> exp.Expression: -1295 self._match_text_seq("BLOCKCOMPRESSION") -1296 self._match(TokenType.EQ) -1297 always = self._match_text_seq("ALWAYS") -1298 manual = self._match_text_seq("MANUAL") -1299 never = self._match_text_seq("NEVER") -1300 default = self._match_text_seq("DEFAULT") -1301 autotemp = None -1302 if self._match_text_seq("AUTOTEMP"): -1303 autotemp = self._parse_schema() -1304 -1305 return self.expression( -1306 exp.BlockCompressionProperty, -1307 always=always, -1308 manual=manual, -1309 never=never, -1310 default=default, -1311 autotemp=autotemp, -1312 ) -1313 -1314 def _parse_withisolatedloading(self) -> exp.Expression: -1315 no = self._match_text_seq("NO") -1316 concurrent = self._match_text_seq("CONCURRENT") -1317 self._match_text_seq("ISOLATED", "LOADING") -1318 for_all = self._match_text_seq("FOR", "ALL") -1319 for_insert = self._match_text_seq("FOR", "INSERT") -1320 for_none = self._match_text_seq("FOR", "NONE") -1321 return self.expression( -1322 exp.IsolatedLoadingProperty, -1323 no=no, -1324 concurrent=concurrent, -1325 for_all=for_all, -1326 for_insert=for_insert, -1327 for_none=for_none, -1328 ) +1247 if not user or not host: +1248 return None +1249 +1250 return exp.DefinerProperty(this=f"{user}@{host}") +1251 +1252 def _parse_withjournaltable(self) -> exp.Expression: +1253 self._match(TokenType.TABLE) +1254 self._match(TokenType.EQ) +1255 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) +1256 +1257 def _parse_log(self, no=False) -> exp.Expression: +1258 self._match_text_seq("LOG") +1259 return self.expression(exp.LogProperty, no=no) +1260 +1261 def _parse_journal(self, no=False, dual=False) -> exp.Expression: +1262 before = self._match_text_seq("BEFORE") +1263 self._match_text_seq("JOURNAL") +1264 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) +1265 +1266 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: +1267 self._match_text_seq("NOT") +1268 self._match_text_seq("LOCAL") +1269 self._match_text_seq("AFTER", "JOURNAL") +1270 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) +1271 +1272 def _parse_checksum(self) -> exp.Expression: +1273 self._match_text_seq("CHECKSUM") +1274 self._match(TokenType.EQ) +1275 +1276 on = None +1277 if self._match(TokenType.ON): +1278 on = True +1279 elif self._match_text_seq("OFF"): +1280 on = False +1281 default = self._match(TokenType.DEFAULT) +1282 +1283 return self.expression( +1284 exp.ChecksumProperty, +1285 on=on, +1286 default=default, +1287 ) +1288 +1289 def _parse_freespace(self) -> exp.Expression: +1290 self._match_text_seq("FREESPACE") +1291 self._match(TokenType.EQ) +1292 return self.expression( +1293 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) +1294 ) +1295 +1296 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: +1297 self._match_text_seq("MERGEBLOCKRATIO") +1298 if self._match(TokenType.EQ): +1299 return self.expression( +1300 exp.MergeBlockRatioProperty, +1301 this=self._parse_number(), +1302 percent=self._match(TokenType.PERCENT), +1303 ) +1304 else: +1305 return self.expression( +1306 exp.MergeBlockRatioProperty, +1307 no=no, +1308 default=default, +1309 ) +1310 +1311 def _parse_datablocksize(self, default=None) -> exp.Expression: +1312 if default: +1313 self._match_text_seq("DATABLOCKSIZE") +1314 return self.expression(exp.DataBlocksizeProperty, default=True) +1315 elif self._match_texts(("MIN", "MINIMUM")): +1316 self._match_text_seq("DATABLOCKSIZE") +1317 return self.expression(exp.DataBlocksizeProperty, min=True) +1318 elif self._match_texts(("MAX", "MAXIMUM")): +1319 self._match_text_seq("DATABLOCKSIZE") +1320 return self.expression(exp.DataBlocksizeProperty, min=False) +1321 +1322 self._match_text_seq("DATABLOCKSIZE") +1323 self._match(TokenType.EQ) +1324 size = self._parse_number() +1325 units = None +1326 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): +1327 units = self._prev.text +1328 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) 1329 -1330 def _parse_locking(self) -> exp.Expression: -1331 if self._match(TokenType.TABLE): -1332 kind = "TABLE" -1333 elif self._match(TokenType.VIEW): -1334 kind = "VIEW" -1335 elif self._match(TokenType.ROW): -1336 kind = "ROW" -1337 elif self._match_text_seq("DATABASE"): -1338 kind = "DATABASE" -1339 else: -1340 kind = None -1341 -1342 if kind in ("DATABASE", "TABLE", "VIEW"): -1343 this = self._parse_table_parts() -1344 else: -1345 this = None -1346 -1347 if self._match(TokenType.FOR): -1348 for_or_in = "FOR" -1349 elif self._match(TokenType.IN): -1350 for_or_in = "IN" -1351 else: -1352 for_or_in = None -1353 -1354 if self._match_text_seq("ACCESS"): -1355 lock_type = "ACCESS" -1356 elif self._match_texts(("EXCL", "EXCLUSIVE")): -1357 lock_type = "EXCLUSIVE" -1358 elif self._match_text_seq("SHARE"): -1359 lock_type = "SHARE" -1360 elif self._match_text_seq("READ"): -1361 lock_type = "READ" -1362 elif self._match_text_seq("WRITE"): -1363 lock_type = "WRITE" -1364 elif self._match_text_seq("CHECKSUM"): -1365 lock_type = "CHECKSUM" -1366 else: -1367 lock_type = None -1368 -1369 override = self._match_text_seq("OVERRIDE") -1370 -1371 return self.expression( -1372 exp.LockingProperty, -1373 this=this, -1374 kind=kind, -1375 for_or_in=for_or_in, -1376 lock_type=lock_type, -1377 override=override, -1378 ) -1379 -1380 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: -1381 if self._match(TokenType.PARTITION_BY): -1382 return self._parse_csv(self._parse_conjunction) -1383 return [] -1384 -1385 def _parse_partitioned_by(self) -> exp.Expression: -1386 self._match(TokenType.EQ) -1387 return self.expression( -1388 exp.PartitionedByProperty, -1389 this=self._parse_schema() or self._parse_bracket(self._parse_field()), -1390 ) -1391 -1392 def _parse_withdata(self, no=False) -> exp.Expression: -1393 if self._match_text_seq("AND", "STATISTICS"): -1394 statistics = True -1395 elif self._match_text_seq("AND", "NO", "STATISTICS"): -1396 statistics = False -1397 else: -1398 statistics = None -1399 -1400 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) -1401 -1402 def _parse_noprimaryindex(self) -> exp.Expression: -1403 self._match_text_seq("PRIMARY", "INDEX") -1404 return exp.NoPrimaryIndexProperty() -1405 -1406 def _parse_oncommit(self) -> exp.Expression: -1407 self._match_text_seq("COMMIT", "PRESERVE", "ROWS") -1408 return exp.OnCommitProperty() -1409 -1410 def _parse_distkey(self) -> exp.Expression: -1411 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) -1412 -1413 def _parse_create_like(self) -> t.Optional[exp.Expression]: -1414 table = self._parse_table(schema=True) -1415 options = [] -1416 while self._match_texts(("INCLUDING", "EXCLUDING")): -1417 this = self._prev.text.upper() -1418 id_var = self._parse_id_var() -1419 -1420 if not id_var: -1421 return None -1422 -1423 options.append( -1424 self.expression( -1425 exp.Property, -1426 this=this, -1427 value=exp.Var(this=id_var.this.upper()), -1428 ) -1429 ) -1430 return self.expression(exp.LikeProperty, this=table, expressions=options) -1431 -1432 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: -1433 return self.expression( -1434 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound -1435 ) -1436 -1437 def _parse_character_set(self, default: bool = False) -> exp.Expression: -1438 self._match(TokenType.EQ) -1439 return self.expression( -1440 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default -1441 ) -1442 -1443 def _parse_returns(self) -> exp.Expression: -1444 value: t.Optional[exp.Expression] -1445 is_table = self._match(TokenType.TABLE) -1446 -1447 if is_table: -1448 if self._match(TokenType.LT): -1449 value = self.expression( -1450 exp.Schema, -1451 this="TABLE", -1452 expressions=self._parse_csv(self._parse_struct_kwargs), -1453 ) -1454 if not self._match(TokenType.GT): -1455 self.raise_error("Expecting >") -1456 else: -1457 value = self._parse_schema(exp.Var(this="TABLE")) -1458 else: -1459 value = self._parse_types() -1460 -1461 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) -1462 -1463 def _parse_temporary(self, global_=False) -> exp.Expression: -1464 self._match(TokenType.TEMPORARY) # in case calling from "GLOBAL" -1465 return self.expression(exp.TemporaryProperty, global_=global_) -1466 -1467 def _parse_describe(self) -> exp.Expression: -1468 kind = self._match_set(self.CREATABLES) and self._prev.text -1469 this = self._parse_table() -1470 -1471 return self.expression(exp.Describe, this=this, kind=kind) +1330 def _parse_blockcompression(self) -> exp.Expression: +1331 self._match_text_seq("BLOCKCOMPRESSION") +1332 self._match(TokenType.EQ) +1333 always = self._match_text_seq("ALWAYS") +1334 manual = self._match_text_seq("MANUAL") +1335 never = self._match_text_seq("NEVER") +1336 default = self._match_text_seq("DEFAULT") +1337 autotemp = None +1338 if self._match_text_seq("AUTOTEMP"): +1339 autotemp = self._parse_schema() +1340 +1341 return self.expression( +1342 exp.BlockCompressionProperty, +1343 always=always, +1344 manual=manual, +1345 never=never, +1346 default=default, +1347 autotemp=autotemp, +1348 ) +1349 +1350 def _parse_withisolatedloading(self) -> exp.Expression: +1351 no = self._match_text_seq("NO") +1352 concurrent = self._match_text_seq("CONCURRENT") +1353 self._match_text_seq("ISOLATED", "LOADING") +1354 for_all = self._match_text_seq("FOR", "ALL") +1355 for_insert = self._match_text_seq("FOR", "INSERT") +1356 for_none = self._match_text_seq("FOR", "NONE") +1357 return self.expression( +1358 exp.IsolatedLoadingProperty, +1359 no=no, +1360 concurrent=concurrent, +1361 for_all=for_all, +1362 for_insert=for_insert, +1363 for_none=for_none, +1364 ) +1365 +1366 def _parse_locking(self) -> exp.Expression: +1367 if self._match(TokenType.TABLE): +1368 kind = "TABLE" +1369 elif self._match(TokenType.VIEW): +1370 kind = "VIEW" +1371 elif self._match(TokenType.ROW): +1372 kind = "ROW" +1373 elif self._match_text_seq("DATABASE"): +1374 kind = "DATABASE" +1375 else: +1376 kind = None +1377 +1378 if kind in ("DATABASE", "TABLE", "VIEW"): +1379 this = self._parse_table_parts() +1380 else: +1381 this = None +1382 +1383 if self._match(TokenType.FOR): +1384 for_or_in = "FOR" +1385 elif self._match(TokenType.IN): +1386 for_or_in = "IN" +1387 else: +1388 for_or_in = None +1389 +1390 if self._match_text_seq("ACCESS"): +1391 lock_type = "ACCESS" +1392 elif self._match_texts(("EXCL", "EXCLUSIVE")): +1393 lock_type = "EXCLUSIVE" +1394 elif self._match_text_seq("SHARE"): +1395 lock_type = "SHARE" +1396 elif self._match_text_seq("READ"): +1397 lock_type = "READ" +1398 elif self._match_text_seq("WRITE"): +1399 lock_type = "WRITE" +1400 elif self._match_text_seq("CHECKSUM"): +1401 lock_type = "CHECKSUM" +1402 else: +1403 lock_type = None +1404 +1405 override = self._match_text_seq("OVERRIDE") +1406 +1407 return self.expression( +1408 exp.LockingProperty, +1409 this=this, +1410 kind=kind, +1411 for_or_in=for_or_in, +1412 lock_type=lock_type, +1413 override=override, +1414 ) +1415 +1416 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: +1417 if self._match(TokenType.PARTITION_BY): +1418 return self._parse_csv(self._parse_conjunction) +1419 return [] +1420 +1421 def _parse_partitioned_by(self) -> exp.Expression: +1422 self._match(TokenType.EQ) +1423 return self.expression( +1424 exp.PartitionedByProperty, +1425 this=self._parse_schema() or self._parse_bracket(self._parse_field()), +1426 ) +1427 +1428 def _parse_withdata(self, no=False) -> exp.Expression: +1429 if self._match_text_seq("AND", "STATISTICS"): +1430 statistics = True +1431 elif self._match_text_seq("AND", "NO", "STATISTICS"): +1432 statistics = False +1433 else: +1434 statistics = None +1435 +1436 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) +1437 +1438 def _parse_noprimaryindex(self) -> exp.Expression: +1439 self._match_text_seq("PRIMARY", "INDEX") +1440 return exp.NoPrimaryIndexProperty() +1441 +1442 def _parse_oncommit(self) -> exp.Expression: +1443 self._match_text_seq("COMMIT", "PRESERVE", "ROWS") +1444 return exp.OnCommitProperty() +1445 +1446 def _parse_distkey(self) -> exp.Expression: +1447 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) +1448 +1449 def _parse_create_like(self) -> t.Optional[exp.Expression]: +1450 table = self._parse_table(schema=True) +1451 options = [] +1452 while self._match_texts(("INCLUDING", "EXCLUDING")): +1453 this = self._prev.text.upper() +1454 id_var = self._parse_id_var() +1455 +1456 if not id_var: +1457 return None +1458 +1459 options.append( +1460 self.expression( +1461 exp.Property, +1462 this=this, +1463 value=exp.Var(this=id_var.this.upper()), +1464 ) +1465 ) +1466 return self.expression(exp.LikeProperty, this=table, expressions=options) +1467 +1468 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: +1469 return self.expression( +1470 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound +1471 ) 1472 -1473 def _parse_insert(self) -> exp.Expression: -1474 overwrite = self._match(TokenType.OVERWRITE) -1475 local = self._match(TokenType.LOCAL) -1476 -1477 this: t.Optional[exp.Expression] +1473 def _parse_character_set(self, default: bool = False) -> exp.Expression: +1474 self._match(TokenType.EQ) +1475 return self.expression( +1476 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default +1477 ) 1478 -1479 alternative = None -1480 if self._match_text_seq("DIRECTORY"): -1481 this = self.expression( -1482 exp.Directory, -1483 this=self._parse_var_or_string(), -1484 local=local, -1485 row_format=self._parse_row_format(match_row=True), -1486 ) -1487 else: -1488 if self._match(TokenType.OR): -1489 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text -1490 -1491 self._match(TokenType.INTO) -1492 self._match(TokenType.TABLE) -1493 this = self._parse_table(schema=True) -1494 -1495 return self.expression( -1496 exp.Insert, -1497 this=this, -1498 exists=self._parse_exists(), -1499 partition=self._parse_partition(), -1500 expression=self._parse_ddl_select(), -1501 overwrite=overwrite, -1502 alternative=alternative, -1503 ) -1504 -1505 def _parse_row(self) -> t.Optional[exp.Expression]: -1506 if not self._match(TokenType.FORMAT): -1507 return None -1508 return self._parse_row_format() -1509 -1510 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: -1511 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): -1512 return None -1513 -1514 if self._match_text_seq("SERDE"): -1515 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) -1516 -1517 self._match_text_seq("DELIMITED") -1518 -1519 kwargs = {} -1520 -1521 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): -1522 kwargs["fields"] = self._parse_string() -1523 if self._match_text_seq("ESCAPED", "BY"): -1524 kwargs["escaped"] = self._parse_string() -1525 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): -1526 kwargs["collection_items"] = self._parse_string() -1527 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): -1528 kwargs["map_keys"] = self._parse_string() -1529 if self._match_text_seq("LINES", "TERMINATED", "BY"): -1530 kwargs["lines"] = self._parse_string() -1531 if self._match_text_seq("NULL", "DEFINED", "AS"): -1532 kwargs["null"] = self._parse_string() -1533 -1534 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore -1535 -1536 def _parse_load_data(self) -> exp.Expression: -1537 local = self._match(TokenType.LOCAL) -1538 self._match_text_seq("INPATH") -1539 inpath = self._parse_string() -1540 overwrite = self._match(TokenType.OVERWRITE) -1541 self._match_pair(TokenType.INTO, TokenType.TABLE) -1542 -1543 return self.expression( -1544 exp.LoadData, -1545 this=self._parse_table(schema=True), -1546 local=local, -1547 overwrite=overwrite, -1548 inpath=inpath, -1549 partition=self._parse_partition(), -1550 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), -1551 serde=self._match_text_seq("SERDE") and self._parse_string(), -1552 ) -1553 -1554 def _parse_delete(self) -> exp.Expression: -1555 self._match(TokenType.FROM) +1479 def _parse_returns(self) -> exp.Expression: +1480 value: t.Optional[exp.Expression] +1481 is_table = self._match(TokenType.TABLE) +1482 +1483 if is_table: +1484 if self._match(TokenType.LT): +1485 value = self.expression( +1486 exp.Schema, +1487 this="TABLE", +1488 expressions=self._parse_csv(self._parse_struct_kwargs), +1489 ) +1490 if not self._match(TokenType.GT): +1491 self.raise_error("Expecting >") +1492 else: +1493 value = self._parse_schema(exp.Var(this="TABLE")) +1494 else: +1495 value = self._parse_types() +1496 +1497 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) +1498 +1499 def _parse_temporary(self, global_=False) -> exp.Expression: +1500 self._match(TokenType.TEMPORARY) # in case calling from "GLOBAL" +1501 return self.expression(exp.TemporaryProperty, global_=global_) +1502 +1503 def _parse_describe(self) -> exp.Expression: +1504 kind = self._match_set(self.CREATABLES) and self._prev.text +1505 this = self._parse_table() +1506 +1507 return self.expression(exp.Describe, this=this, kind=kind) +1508 +1509 def _parse_insert(self) -> exp.Expression: +1510 overwrite = self._match(TokenType.OVERWRITE) +1511 local = self._match(TokenType.LOCAL) +1512 +1513 this: t.Optional[exp.Expression] +1514 +1515 alternative = None +1516 if self._match_text_seq("DIRECTORY"): +1517 this = self.expression( +1518 exp.Directory, +1519 this=self._parse_var_or_string(), +1520 local=local, +1521 row_format=self._parse_row_format(match_row=True), +1522 ) +1523 else: +1524 if self._match(TokenType.OR): +1525 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text +1526 +1527 self._match(TokenType.INTO) +1528 self._match(TokenType.TABLE) +1529 this = self._parse_table(schema=True) +1530 +1531 return self.expression( +1532 exp.Insert, +1533 this=this, +1534 exists=self._parse_exists(), +1535 partition=self._parse_partition(), +1536 expression=self._parse_ddl_select(), +1537 overwrite=overwrite, +1538 alternative=alternative, +1539 ) +1540 +1541 def _parse_row(self) -> t.Optional[exp.Expression]: +1542 if not self._match(TokenType.FORMAT): +1543 return None +1544 return self._parse_row_format() +1545 +1546 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: +1547 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): +1548 return None +1549 +1550 if self._match_text_seq("SERDE"): +1551 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) +1552 +1553 self._match_text_seq("DELIMITED") +1554 +1555 kwargs = {} 1556 -1557 return self.expression( -1558 exp.Delete, -1559 this=self._parse_table(schema=True), -1560 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), -1561 where=self._parse_where(), -1562 ) -1563 -1564 def _parse_update(self) -> exp.Expression: -1565 return self.expression( -1566 exp.Update, -1567 **{ # type: ignore -1568 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), -1569 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), -1570 "from": self._parse_from(), -1571 "where": self._parse_where(), -1572 }, -1573 ) -1574 -1575 def _parse_uncache(self) -> exp.Expression: -1576 if not self._match(TokenType.TABLE): -1577 self.raise_error("Expecting TABLE after UNCACHE") +1557 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): +1558 kwargs["fields"] = self._parse_string() +1559 if self._match_text_seq("ESCAPED", "BY"): +1560 kwargs["escaped"] = self._parse_string() +1561 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): +1562 kwargs["collection_items"] = self._parse_string() +1563 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): +1564 kwargs["map_keys"] = self._parse_string() +1565 if self._match_text_seq("LINES", "TERMINATED", "BY"): +1566 kwargs["lines"] = self._parse_string() +1567 if self._match_text_seq("NULL", "DEFINED", "AS"): +1568 kwargs["null"] = self._parse_string() +1569 +1570 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore +1571 +1572 def _parse_load_data(self) -> exp.Expression: +1573 local = self._match(TokenType.LOCAL) +1574 self._match_text_seq("INPATH") +1575 inpath = self._parse_string() +1576 overwrite = self._match(TokenType.OVERWRITE) +1577 self._match_pair(TokenType.INTO, TokenType.TABLE) 1578 1579 return self.expression( -1580 exp.Uncache, -1581 exists=self._parse_exists(), -1582 this=self._parse_table(schema=True), -1583 ) -1584 -1585 def _parse_cache(self) -> exp.Expression: -1586 lazy = self._match(TokenType.LAZY) -1587 self._match(TokenType.TABLE) -1588 table = self._parse_table(schema=True) -1589 options = [] -1590 -1591 if self._match(TokenType.OPTIONS): -1592 self._match_l_paren() -1593 k = self._parse_string() -1594 self._match(TokenType.EQ) -1595 v = self._parse_string() -1596 options = [k, v] -1597 self._match_r_paren() -1598 -1599 self._match(TokenType.ALIAS) -1600 return self.expression( -1601 exp.Cache, -1602 this=table, -1603 lazy=lazy, -1604 options=options, -1605 expression=self._parse_select(nested=True), -1606 ) -1607 -1608 def _parse_partition(self) -> t.Optional[exp.Expression]: -1609 if not self._match(TokenType.PARTITION): -1610 return None -1611 -1612 return self.expression( -1613 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) -1614 ) -1615 -1616 def _parse_value(self) -> exp.Expression: -1617 if self._match(TokenType.L_PAREN): -1618 expressions = self._parse_csv(self._parse_conjunction) -1619 self._match_r_paren() -1620 return self.expression(exp.Tuple, expressions=expressions) -1621 -1622 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. -1623 # Source: https://prestodb.io/docs/current/sql/values.html -1624 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) -1625 -1626 def _parse_select( -1627 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True -1628 ) -> t.Optional[exp.Expression]: -1629 cte = self._parse_with() -1630 if cte: -1631 this = self._parse_statement() -1632 -1633 if not this: -1634 self.raise_error("Failed to parse any statement following CTE") -1635 return cte -1636 -1637 if "with" in this.arg_types: -1638 this.set("with", cte) -1639 else: -1640 self.raise_error(f"{this.key} does not support CTE") -1641 this = cte -1642 elif self._match(TokenType.SELECT): -1643 comments = self._prev_comments -1644 -1645 hint = self._parse_hint() -1646 all_ = self._match(TokenType.ALL) -1647 distinct = self._match(TokenType.DISTINCT) -1648 -1649 if distinct: -1650 distinct = self.expression( -1651 exp.Distinct, -1652 on=self._parse_value() if self._match(TokenType.ON) else None, -1653 ) -1654 -1655 if all_ and distinct: -1656 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") +1580 exp.LoadData, +1581 this=self._parse_table(schema=True), +1582 local=local, +1583 overwrite=overwrite, +1584 inpath=inpath, +1585 partition=self._parse_partition(), +1586 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), +1587 serde=self._match_text_seq("SERDE") and self._parse_string(), +1588 ) +1589 +1590 def _parse_delete(self) -> exp.Expression: +1591 self._match(TokenType.FROM) +1592 +1593 return self.expression( +1594 exp.Delete, +1595 this=self._parse_table(schema=True), +1596 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), +1597 where=self._parse_where(), +1598 ) +1599 +1600 def _parse_update(self) -> exp.Expression: +1601 return self.expression( +1602 exp.Update, +1603 **{ # type: ignore +1604 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), +1605 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), +1606 "from": self._parse_from(), +1607 "where": self._parse_where(), +1608 }, +1609 ) +1610 +1611 def _parse_uncache(self) -> exp.Expression: +1612 if not self._match(TokenType.TABLE): +1613 self.raise_error("Expecting TABLE after UNCACHE") +1614 +1615 return self.expression( +1616 exp.Uncache, +1617 exists=self._parse_exists(), +1618 this=self._parse_table(schema=True), +1619 ) +1620 +1621 def _parse_cache(self) -> exp.Expression: +1622 lazy = self._match(TokenType.LAZY) +1623 self._match(TokenType.TABLE) +1624 table = self._parse_table(schema=True) +1625 options = [] +1626 +1627 if self._match(TokenType.OPTIONS): +1628 self._match_l_paren() +1629 k = self._parse_string() +1630 self._match(TokenType.EQ) +1631 v = self._parse_string() +1632 options = [k, v] +1633 self._match_r_paren() +1634 +1635 self._match(TokenType.ALIAS) +1636 return self.expression( +1637 exp.Cache, +1638 this=table, +1639 lazy=lazy, +1640 options=options, +1641 expression=self._parse_select(nested=True), +1642 ) +1643 +1644 def _parse_partition(self) -> t.Optional[exp.Expression]: +1645 if not self._match(TokenType.PARTITION): +1646 return None +1647 +1648 return self.expression( +1649 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) +1650 ) +1651 +1652 def _parse_value(self) -> exp.Expression: +1653 if self._match(TokenType.L_PAREN): +1654 expressions = self._parse_csv(self._parse_conjunction) +1655 self._match_r_paren() +1656 return self.expression(exp.Tuple, expressions=expressions) 1657 -1658 limit = self._parse_limit(top=True) -1659 expressions = self._parse_csv(self._parse_expression) -1660 -1661 this = self.expression( -1662 exp.Select, -1663 hint=hint, -1664 distinct=distinct, -1665 expressions=expressions, -1666 limit=limit, -1667 ) -1668 this.comments = comments -1669 -1670 into = self._parse_into() -1671 if into: -1672 this.set("into", into) -1673 -1674 from_ = self._parse_from() -1675 if from_: -1676 this.set("from", from_) -1677 -1678 self._parse_query_modifiers(this) -1679 elif (table or nested) and self._match(TokenType.L_PAREN): -1680 this = self._parse_table() if table else self._parse_select(nested=True) -1681 self._parse_query_modifiers(this) -1682 this = self._parse_set_operations(this) -1683 self._match_r_paren() +1658 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. +1659 # Source: https://prestodb.io/docs/current/sql/values.html +1660 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) +1661 +1662 def _parse_select( +1663 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True +1664 ) -> t.Optional[exp.Expression]: +1665 cte = self._parse_with() +1666 if cte: +1667 this = self._parse_statement() +1668 +1669 if not this: +1670 self.raise_error("Failed to parse any statement following CTE") +1671 return cte +1672 +1673 if "with" in this.arg_types: +1674 this.set("with", cte) +1675 else: +1676 self.raise_error(f"{this.key} does not support CTE") +1677 this = cte +1678 elif self._match(TokenType.SELECT): +1679 comments = self._prev_comments +1680 +1681 hint = self._parse_hint() +1682 all_ = self._match(TokenType.ALL) +1683 distinct = self._match(TokenType.DISTINCT) 1684 -1685 # early return so that subquery unions aren't parsed again -1686 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 -1687 # Union ALL should be a property of the top select node, not the subquery -1688 return self._parse_subquery(this, parse_alias=parse_subquery_alias) -1689 elif self._match(TokenType.VALUES): -1690 this = self.expression( -1691 exp.Values, -1692 expressions=self._parse_csv(self._parse_value), -1693 alias=self._parse_table_alias(), -1694 ) -1695 else: -1696 this = None -1697 -1698 return self._parse_set_operations(this) -1699 -1700 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: -1701 if not skip_with_token and not self._match(TokenType.WITH): -1702 return None -1703 -1704 recursive = self._match(TokenType.RECURSIVE) +1685 if distinct: +1686 distinct = self.expression( +1687 exp.Distinct, +1688 on=self._parse_value() if self._match(TokenType.ON) else None, +1689 ) +1690 +1691 if all_ and distinct: +1692 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") +1693 +1694 limit = self._parse_limit(top=True) +1695 expressions = self._parse_csv(self._parse_expression) +1696 +1697 this = self.expression( +1698 exp.Select, +1699 hint=hint, +1700 distinct=distinct, +1701 expressions=expressions, +1702 limit=limit, +1703 ) +1704 this.comments = comments 1705 -1706 expressions = [] -1707 while True: -1708 expressions.append(self._parse_cte()) +1706 into = self._parse_into() +1707 if into: +1708 this.set("into", into) 1709 -1710 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): -1711 break -1712 else: -1713 self._match(TokenType.WITH) -1714 -1715 return self.expression(exp.With, expressions=expressions, recursive=recursive) -1716 -1717 def _parse_cte(self) -> exp.Expression: -1718 alias = self._parse_table_alias() -1719 if not alias or not alias.this: -1720 self.raise_error("Expected CTE to have alias") -1721 -1722 self._match(TokenType.ALIAS) -1723 -1724 return self.expression( -1725 exp.CTE, -1726 this=self._parse_wrapped(self._parse_statement), -1727 alias=alias, -1728 ) -1729 -1730 def _parse_table_alias( -1731 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None -1732 ) -> t.Optional[exp.Expression]: -1733 any_token = self._match(TokenType.ALIAS) -1734 alias = self._parse_id_var( -1735 any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS -1736 ) -1737 index = self._index -1738 -1739 if self._match(TokenType.L_PAREN): -1740 columns = self._parse_csv(self._parse_function_parameter) -1741 self._match_r_paren() if columns else self._retreat(index) -1742 else: -1743 columns = None -1744 -1745 if not alias and not columns: -1746 return None -1747 -1748 return self.expression(exp.TableAlias, this=alias, columns=columns) -1749 -1750 def _parse_subquery( -1751 self, this: t.Optional[exp.Expression], parse_alias: bool = True -1752 ) -> exp.Expression: -1753 return self.expression( -1754 exp.Subquery, -1755 this=this, -1756 pivots=self._parse_pivots(), -1757 alias=self._parse_table_alias() if parse_alias else None, -1758 ) +1710 from_ = self._parse_from() +1711 if from_: +1712 this.set("from", from_) +1713 +1714 self._parse_query_modifiers(this) +1715 elif (table or nested) and self._match(TokenType.L_PAREN): +1716 this = self._parse_table() if table else self._parse_select(nested=True) +1717 self._parse_query_modifiers(this) +1718 this = self._parse_set_operations(this) +1719 self._match_r_paren() +1720 +1721 # early return so that subquery unions aren't parsed again +1722 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 +1723 # Union ALL should be a property of the top select node, not the subquery +1724 return self._parse_subquery(this, parse_alias=parse_subquery_alias) +1725 elif self._match(TokenType.VALUES): +1726 this = self.expression( +1727 exp.Values, +1728 expressions=self._parse_csv(self._parse_value), +1729 alias=self._parse_table_alias(), +1730 ) +1731 else: +1732 this = None +1733 +1734 return self._parse_set_operations(this) +1735 +1736 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: +1737 if not skip_with_token and not self._match(TokenType.WITH): +1738 return None +1739 +1740 recursive = self._match(TokenType.RECURSIVE) +1741 +1742 expressions = [] +1743 while True: +1744 expressions.append(self._parse_cte()) +1745 +1746 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): +1747 break +1748 else: +1749 self._match(TokenType.WITH) +1750 +1751 return self.expression(exp.With, expressions=expressions, recursive=recursive) +1752 +1753 def _parse_cte(self) -> exp.Expression: +1754 alias = self._parse_table_alias() +1755 if not alias or not alias.this: +1756 self.raise_error("Expected CTE to have alias") +1757 +1758 self._match(TokenType.ALIAS) 1759 -1760 def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None: -1761 if not isinstance(this, self.MODIFIABLES): -1762 return -1763 -1764 table = isinstance(this, exp.Table) +1760 return self.expression( +1761 exp.CTE, +1762 this=self._parse_wrapped(self._parse_statement), +1763 alias=alias, +1764 ) 1765 -1766 while True: -1767 lateral = self._parse_lateral() -1768 join = self._parse_join() -1769 comma = None if table else self._match(TokenType.COMMA) -1770 if lateral: -1771 this.append("laterals", lateral) -1772 if join: -1773 this.append("joins", join) -1774 if comma: -1775 this.args["from"].append("expressions", self._parse_table()) -1776 if not (lateral or join or comma): -1777 break -1778 -1779 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): -1780 expression = parser(self) -1781 -1782 if expression: -1783 this.set(key, expression) -1784 -1785 def _parse_hint(self) -> t.Optional[exp.Expression]: -1786 if self._match(TokenType.HINT): -1787 hints = self._parse_csv(self._parse_function) -1788 if not self._match_pair(TokenType.STAR, TokenType.SLASH): -1789 self.raise_error("Expected */ after HINT") -1790 return self.expression(exp.Hint, expressions=hints) -1791 -1792 return None -1793 -1794 def _parse_into(self) -> t.Optional[exp.Expression]: -1795 if not self._match(TokenType.INTO): -1796 return None -1797 -1798 temp = self._match(TokenType.TEMPORARY) -1799 unlogged = self._match(TokenType.UNLOGGED) -1800 self._match(TokenType.TABLE) +1766 def _parse_table_alias( +1767 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None +1768 ) -> t.Optional[exp.Expression]: +1769 any_token = self._match(TokenType.ALIAS) +1770 alias = self._parse_id_var( +1771 any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS +1772 ) +1773 index = self._index +1774 +1775 if self._match(TokenType.L_PAREN): +1776 columns = self._parse_csv(self._parse_function_parameter) +1777 self._match_r_paren() if columns else self._retreat(index) +1778 else: +1779 columns = None +1780 +1781 if not alias and not columns: +1782 return None +1783 +1784 return self.expression(exp.TableAlias, this=alias, columns=columns) +1785 +1786 def _parse_subquery( +1787 self, this: t.Optional[exp.Expression], parse_alias: bool = True +1788 ) -> exp.Expression: +1789 return self.expression( +1790 exp.Subquery, +1791 this=this, +1792 pivots=self._parse_pivots(), +1793 alias=self._parse_table_alias() if parse_alias else None, +1794 ) +1795 +1796 def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None: +1797 if not isinstance(this, self.MODIFIABLES): +1798 return +1799 +1800 table = isinstance(this, exp.Table) 1801 -1802 return self.expression( -1803 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged -1804 ) -1805 -1806 def _parse_from(self) -> t.Optional[exp.Expression]: -1807 if not self._match(TokenType.FROM): -1808 return None -1809 -1810 return self.expression( -1811 exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table) -1812 ) -1813 -1814 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: -1815 if not self._match(TokenType.MATCH_RECOGNIZE): -1816 return None -1817 self._match_l_paren() -1818 -1819 partition = self._parse_partition_by() -1820 order = self._parse_order() -1821 measures = ( -1822 self._parse_alias(self._parse_conjunction()) -1823 if self._match_text_seq("MEASURES") -1824 else None -1825 ) -1826 -1827 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): -1828 rows = exp.Var(this="ONE ROW PER MATCH") -1829 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): -1830 text = "ALL ROWS PER MATCH" -1831 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): -1832 text += f" SHOW EMPTY MATCHES" -1833 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): -1834 text += f" OMIT EMPTY MATCHES" -1835 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): -1836 text += f" WITH UNMATCHED ROWS" -1837 rows = exp.Var(this=text) -1838 else: -1839 rows = None -1840 -1841 if self._match_text_seq("AFTER", "MATCH", "SKIP"): -1842 text = "AFTER MATCH SKIP" -1843 if self._match_text_seq("PAST", "LAST", "ROW"): -1844 text += f" PAST LAST ROW" -1845 elif self._match_text_seq("TO", "NEXT", "ROW"): -1846 text += f" TO NEXT ROW" -1847 elif self._match_text_seq("TO", "FIRST"): -1848 text += f" TO FIRST {self._advance_any().text}" # type: ignore -1849 elif self._match_text_seq("TO", "LAST"): -1850 text += f" TO LAST {self._advance_any().text}" # type: ignore -1851 after = exp.Var(this=text) -1852 else: -1853 after = None +1802 while True: +1803 lateral = self._parse_lateral() +1804 join = self._parse_join() +1805 comma = None if table else self._match(TokenType.COMMA) +1806 if lateral: +1807 this.append("laterals", lateral) +1808 if join: +1809 this.append("joins", join) +1810 if comma: +1811 this.args["from"].append("expressions", self._parse_table()) +1812 if not (lateral or join or comma): +1813 break +1814 +1815 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): +1816 expression = parser(self) +1817 +1818 if expression: +1819 this.set(key, expression) +1820 +1821 def _parse_hint(self) -> t.Optional[exp.Expression]: +1822 if self._match(TokenType.HINT): +1823 hints = self._parse_csv(self._parse_function) +1824 if not self._match_pair(TokenType.STAR, TokenType.SLASH): +1825 self.raise_error("Expected */ after HINT") +1826 return self.expression(exp.Hint, expressions=hints) +1827 +1828 return None +1829 +1830 def _parse_into(self) -> t.Optional[exp.Expression]: +1831 if not self._match(TokenType.INTO): +1832 return None +1833 +1834 temp = self._match(TokenType.TEMPORARY) +1835 unlogged = self._match(TokenType.UNLOGGED) +1836 self._match(TokenType.TABLE) +1837 +1838 return self.expression( +1839 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged +1840 ) +1841 +1842 def _parse_from(self) -> t.Optional[exp.Expression]: +1843 if not self._match(TokenType.FROM): +1844 return None +1845 +1846 return self.expression( +1847 exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table) +1848 ) +1849 +1850 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: +1851 if not self._match(TokenType.MATCH_RECOGNIZE): +1852 return None +1853 self._match_l_paren() 1854 -1855 if self._match_text_seq("PATTERN"): -1856 self._match_l_paren() -1857 -1858 if not self._curr: -1859 self.raise_error("Expecting )", self._curr) -1860 -1861 paren = 1 -1862 start = self._curr -1863 -1864 while self._curr and paren > 0: -1865 if self._curr.token_type == TokenType.L_PAREN: -1866 paren += 1 -1867 if self._curr.token_type == TokenType.R_PAREN: -1868 paren -= 1 -1869 end = self._prev -1870 self._advance() -1871 if paren > 0: -1872 self.raise_error("Expecting )", self._curr) -1873 pattern = exp.Var(this=self._find_sql(start, end)) +1855 partition = self._parse_partition_by() +1856 order = self._parse_order() +1857 measures = ( +1858 self._parse_alias(self._parse_conjunction()) +1859 if self._match_text_seq("MEASURES") +1860 else None +1861 ) +1862 +1863 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): +1864 rows = exp.Var(this="ONE ROW PER MATCH") +1865 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): +1866 text = "ALL ROWS PER MATCH" +1867 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): +1868 text += f" SHOW EMPTY MATCHES" +1869 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): +1870 text += f" OMIT EMPTY MATCHES" +1871 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): +1872 text += f" WITH UNMATCHED ROWS" +1873 rows = exp.Var(this=text) 1874 else: -1875 pattern = None +1875 rows = None 1876 -1877 define = ( -1878 self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None -1879 ) -1880 self._match_r_paren() -1881 -1882 return self.expression( -1883 exp.MatchRecognize, -1884 partition_by=partition, -1885 order=order, -1886 measures=measures, -1887 rows=rows, -1888 after=after, -1889 pattern=pattern, -1890 define=define, -1891 ) -1892 -1893 def _parse_lateral(self) -> t.Optional[exp.Expression]: -1894 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) -1895 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) +1877 if self._match_text_seq("AFTER", "MATCH", "SKIP"): +1878 text = "AFTER MATCH SKIP" +1879 if self._match_text_seq("PAST", "LAST", "ROW"): +1880 text += f" PAST LAST ROW" +1881 elif self._match_text_seq("TO", "NEXT", "ROW"): +1882 text += f" TO NEXT ROW" +1883 elif self._match_text_seq("TO", "FIRST"): +1884 text += f" TO FIRST {self._advance_any().text}" # type: ignore +1885 elif self._match_text_seq("TO", "LAST"): +1886 text += f" TO LAST {self._advance_any().text}" # type: ignore +1887 after = exp.Var(this=text) +1888 else: +1889 after = None +1890 +1891 if self._match_text_seq("PATTERN"): +1892 self._match_l_paren() +1893 +1894 if not self._curr: +1895 self.raise_error("Expecting )", self._curr) 1896 -1897 if outer_apply or cross_apply: -1898 this = self._parse_select(table=True) -1899 view = None -1900 outer = not cross_apply -1901 elif self._match(TokenType.LATERAL): -1902 this = self._parse_select(table=True) -1903 view = self._match(TokenType.VIEW) -1904 outer = self._match(TokenType.OUTER) -1905 else: -1906 return None -1907 -1908 if not this: -1909 this = self._parse_function() or self._parse_id_var(any_token=False) -1910 while self._match(TokenType.DOT): -1911 this = exp.Dot( -1912 this=this, -1913 expression=self._parse_function() or self._parse_id_var(any_token=False), -1914 ) -1915 -1916 table_alias: t.Optional[exp.Expression] +1897 paren = 1 +1898 start = self._curr +1899 +1900 while self._curr and paren > 0: +1901 if self._curr.token_type == TokenType.L_PAREN: +1902 paren += 1 +1903 if self._curr.token_type == TokenType.R_PAREN: +1904 paren -= 1 +1905 end = self._prev +1906 self._advance() +1907 if paren > 0: +1908 self.raise_error("Expecting )", self._curr) +1909 pattern = exp.Var(this=self._find_sql(start, end)) +1910 else: +1911 pattern = None +1912 +1913 define = ( +1914 self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None +1915 ) +1916 self._match_r_paren() 1917 -1918 if view: -1919 table = self._parse_id_var(any_token=False) -1920 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] -1921 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) -1922 else: -1923 table_alias = self._parse_table_alias() -1924 -1925 expression = self.expression( -1926 exp.Lateral, -1927 this=this, -1928 view=view, -1929 outer=outer, -1930 alias=table_alias, -1931 ) +1918 return self.expression( +1919 exp.MatchRecognize, +1920 partition_by=partition, +1921 order=order, +1922 measures=measures, +1923 rows=rows, +1924 after=after, +1925 pattern=pattern, +1926 define=define, +1927 ) +1928 +1929 def _parse_lateral(self) -> t.Optional[exp.Expression]: +1930 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) +1931 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 1932 1933 if outer_apply or cross_apply: -1934 return self.expression(exp.Join, this=expression, side=None if cross_apply else "LEFT") -1935 -1936 return expression -1937 -1938 def _parse_join_side_and_kind( -1939 self, -1940 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: -1941 return ( -1942 self._match(TokenType.NATURAL) and self._prev, -1943 self._match_set(self.JOIN_SIDES) and self._prev, -1944 self._match_set(self.JOIN_KINDS) and self._prev, -1945 ) -1946 -1947 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: -1948 natural, side, kind = self._parse_join_side_and_kind() -1949 -1950 if not skip_join_token and not self._match(TokenType.JOIN): -1951 return None -1952 -1953 kwargs: t.Dict[ -1954 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] -1955 ] = {"this": self._parse_table()} -1956 -1957 if natural: -1958 kwargs["natural"] = True -1959 if side: -1960 kwargs["side"] = side.text -1961 if kind: -1962 kwargs["kind"] = kind.text -1963 -1964 if self._match(TokenType.ON): -1965 kwargs["on"] = self._parse_conjunction() -1966 elif self._match(TokenType.USING): -1967 kwargs["using"] = self._parse_wrapped_id_vars() +1934 this = self._parse_select(table=True) +1935 view = None +1936 outer = not cross_apply +1937 elif self._match(TokenType.LATERAL): +1938 this = self._parse_select(table=True) +1939 view = self._match(TokenType.VIEW) +1940 outer = self._match(TokenType.OUTER) +1941 else: +1942 return None +1943 +1944 if not this: +1945 this = self._parse_function() or self._parse_id_var(any_token=False) +1946 while self._match(TokenType.DOT): +1947 this = exp.Dot( +1948 this=this, +1949 expression=self._parse_function() or self._parse_id_var(any_token=False), +1950 ) +1951 +1952 table_alias: t.Optional[exp.Expression] +1953 +1954 if view: +1955 table = self._parse_id_var(any_token=False) +1956 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] +1957 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) +1958 else: +1959 table_alias = self._parse_table_alias() +1960 +1961 expression = self.expression( +1962 exp.Lateral, +1963 this=this, +1964 view=view, +1965 outer=outer, +1966 alias=table_alias, +1967 ) 1968 -1969 return self.expression(exp.Join, **kwargs) # type: ignore -1970 -1971 def _parse_index(self) -> exp.Expression: -1972 index = self._parse_id_var() -1973 self._match(TokenType.ON) -1974 self._match(TokenType.TABLE) # hive -1975 -1976 return self.expression( -1977 exp.Index, -1978 this=index, -1979 table=self.expression(exp.Table, this=self._parse_id_var()), -1980 columns=self._parse_expression(), +1969 if outer_apply or cross_apply: +1970 return self.expression(exp.Join, this=expression, side=None if cross_apply else "LEFT") +1971 +1972 return expression +1973 +1974 def _parse_join_side_and_kind( +1975 self, +1976 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: +1977 return ( +1978 self._match(TokenType.NATURAL) and self._prev, +1979 self._match_set(self.JOIN_SIDES) and self._prev, +1980 self._match_set(self.JOIN_KINDS) and self._prev, 1981 ) 1982 -1983 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: -1984 unique = self._match(TokenType.UNIQUE) -1985 primary = self._match_text_seq("PRIMARY") -1986 amp = self._match_text_seq("AMP") -1987 if not self._match(TokenType.INDEX): -1988 return None -1989 index = self._parse_id_var() -1990 columns = None -1991 if self._match(TokenType.L_PAREN, advance=False): -1992 columns = self._parse_wrapped_csv(self._parse_column) -1993 return self.expression( -1994 exp.Index, -1995 this=index, -1996 columns=columns, -1997 unique=unique, -1998 primary=primary, -1999 amp=amp, -2000 ) -2001 -2002 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: -2003 catalog = None -2004 db = None -2005 table = (not schema and self._parse_function()) or self._parse_id_var(any_token=False) +1983 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: +1984 natural, side, kind = self._parse_join_side_and_kind() +1985 +1986 if not skip_join_token and not self._match(TokenType.JOIN): +1987 return None +1988 +1989 kwargs: t.Dict[ +1990 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] +1991 ] = {"this": self._parse_table()} +1992 +1993 if natural: +1994 kwargs["natural"] = True +1995 if side: +1996 kwargs["side"] = side.text +1997 if kind: +1998 kwargs["kind"] = kind.text +1999 +2000 if self._match(TokenType.ON): +2001 kwargs["on"] = self._parse_conjunction() +2002 elif self._match(TokenType.USING): +2003 kwargs["using"] = self._parse_wrapped_id_vars() +2004 +2005 return self.expression(exp.Join, **kwargs) # type: ignore 2006 -2007 while self._match(TokenType.DOT): -2008 if catalog: -2009 # This allows nesting the table in arbitrarily many dot expressions if needed -2010 table = self.expression(exp.Dot, this=table, expression=self._parse_id_var()) -2011 else: -2012 catalog = db -2013 db = table -2014 table = self._parse_id_var() -2015 -2016 if not table: -2017 self.raise_error(f"Expected table name but got {self._curr}") +2007 def _parse_index(self) -> exp.Expression: +2008 index = self._parse_id_var() +2009 self._match(TokenType.ON) +2010 self._match(TokenType.TABLE) # hive +2011 +2012 return self.expression( +2013 exp.Index, +2014 this=index, +2015 table=self.expression(exp.Table, this=self._parse_id_var()), +2016 columns=self._parse_expression(), +2017 ) 2018 -2019 return self.expression( -2020 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() -2021 ) -2022 -2023 def _parse_table( -2024 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None -2025 ) -> t.Optional[exp.Expression]: -2026 lateral = self._parse_lateral() -2027 -2028 if lateral: -2029 return lateral -2030 -2031 unnest = self._parse_unnest() -2032 -2033 if unnest: -2034 return unnest -2035 -2036 values = self._parse_derived_table_values() +2019 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: +2020 unique = self._match(TokenType.UNIQUE) +2021 primary = self._match_text_seq("PRIMARY") +2022 amp = self._match_text_seq("AMP") +2023 if not self._match(TokenType.INDEX): +2024 return None +2025 index = self._parse_id_var() +2026 columns = None +2027 if self._match(TokenType.L_PAREN, advance=False): +2028 columns = self._parse_wrapped_csv(self._parse_column) +2029 return self.expression( +2030 exp.Index, +2031 this=index, +2032 columns=columns, +2033 unique=unique, +2034 primary=primary, +2035 amp=amp, +2036 ) 2037 -2038 if values: -2039 return values -2040 -2041 subquery = self._parse_select(table=True) +2038 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: +2039 catalog = None +2040 db = None +2041 table = (not schema and self._parse_function()) or self._parse_id_var(any_token=False) 2042 -2043 if subquery: -2044 return subquery -2045 -2046 this = self._parse_table_parts(schema=schema) -2047 -2048 if schema: -2049 return self._parse_schema(this=this) -2050 -2051 if self.alias_post_tablesample: -2052 table_sample = self._parse_table_sample() -2053 -2054 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) -2055 -2056 if alias: -2057 this.set("alias", alias) +2043 while self._match(TokenType.DOT): +2044 if catalog: +2045 # This allows nesting the table in arbitrarily many dot expressions if needed +2046 table = self.expression(exp.Dot, this=table, expression=self._parse_id_var()) +2047 else: +2048 catalog = db +2049 db = table +2050 table = self._parse_id_var() +2051 +2052 if not table: +2053 self.raise_error(f"Expected table name but got {self._curr}") +2054 +2055 return self.expression( +2056 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() +2057 ) 2058 -2059 if not this.args.get("pivots"): -2060 this.set("pivots", self._parse_pivots()) -2061 -2062 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): -2063 this.set( -2064 "hints", -2065 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), -2066 ) -2067 self._match_r_paren() +2059 def _parse_table( +2060 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None +2061 ) -> t.Optional[exp.Expression]: +2062 lateral = self._parse_lateral() +2063 +2064 if lateral: +2065 return lateral +2066 +2067 unnest = self._parse_unnest() 2068 -2069 if not self.alias_post_tablesample: -2070 table_sample = self._parse_table_sample() +2069 if unnest: +2070 return unnest 2071 -2072 if table_sample: -2073 table_sample.set("this", this) -2074 this = table_sample -2075 -2076 return this -2077 -2078 def _parse_unnest(self) -> t.Optional[exp.Expression]: -2079 if not self._match(TokenType.UNNEST): -2080 return None +2072 values = self._parse_derived_table_values() +2073 +2074 if values: +2075 return values +2076 +2077 subquery = self._parse_select(table=True) +2078 +2079 if subquery: +2080 return subquery 2081 -2082 expressions = self._parse_wrapped_csv(self._parse_column) -2083 ordinality = bool(self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY)) -2084 alias = self._parse_table_alias() -2085 -2086 if alias and self.unnest_column_only: -2087 if alias.args.get("columns"): -2088 self.raise_error("Unexpected extra column alias in unnest.") -2089 alias.set("columns", [alias.this]) -2090 alias.set("this", None) +2082 this = self._parse_table_parts(schema=schema) +2083 +2084 if schema: +2085 return self._parse_schema(this=this) +2086 +2087 if self.alias_post_tablesample: +2088 table_sample = self._parse_table_sample() +2089 +2090 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2091 -2092 offset = None -2093 if self._match_pair(TokenType.WITH, TokenType.OFFSET): -2094 self._match(TokenType.ALIAS) -2095 offset = self._parse_conjunction() -2096 -2097 return self.expression( -2098 exp.Unnest, -2099 expressions=expressions, -2100 ordinality=ordinality, -2101 alias=alias, -2102 offset=offset, -2103 ) +2092 if alias: +2093 this.set("alias", alias) +2094 +2095 if not this.args.get("pivots"): +2096 this.set("pivots", self._parse_pivots()) +2097 +2098 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): +2099 this.set( +2100 "hints", +2101 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), +2102 ) +2103 self._match_r_paren() 2104 -2105 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: -2106 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) -2107 if not is_derived and not self._match(TokenType.VALUES): -2108 return None -2109 -2110 expressions = self._parse_csv(self._parse_value) +2105 if not self.alias_post_tablesample: +2106 table_sample = self._parse_table_sample() +2107 +2108 if table_sample: +2109 table_sample.set("this", this) +2110 this = table_sample 2111 -2112 if is_derived: -2113 self._match_r_paren() -2114 -2115 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) -2116 -2117 def _parse_table_sample(self) -> t.Optional[exp.Expression]: -2118 if not self._match(TokenType.TABLE_SAMPLE): -2119 return None -2120 -2121 method = self._parse_var() -2122 bucket_numerator = None -2123 bucket_denominator = None -2124 bucket_field = None -2125 percent = None -2126 rows = None -2127 size = None -2128 seed = None -2129 -2130 self._match_l_paren() -2131 -2132 if self._match(TokenType.BUCKET): -2133 bucket_numerator = self._parse_number() -2134 self._match(TokenType.OUT_OF) -2135 bucket_denominator = bucket_denominator = self._parse_number() -2136 self._match(TokenType.ON) -2137 bucket_field = self._parse_field() -2138 else: -2139 num = self._parse_number() +2112 return this +2113 +2114 def _parse_unnest(self) -> t.Optional[exp.Expression]: +2115 if not self._match(TokenType.UNNEST): +2116 return None +2117 +2118 expressions = self._parse_wrapped_csv(self._parse_column) +2119 ordinality = bool(self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY)) +2120 alias = self._parse_table_alias() +2121 +2122 if alias and self.unnest_column_only: +2123 if alias.args.get("columns"): +2124 self.raise_error("Unexpected extra column alias in unnest.") +2125 alias.set("columns", [alias.this]) +2126 alias.set("this", None) +2127 +2128 offset = None +2129 if self._match_pair(TokenType.WITH, TokenType.OFFSET): +2130 self._match(TokenType.ALIAS) +2131 offset = self._parse_conjunction() +2132 +2133 return self.expression( +2134 exp.Unnest, +2135 expressions=expressions, +2136 ordinality=ordinality, +2137 alias=alias, +2138 offset=offset, +2139 ) 2140 -2141 if self._match(TokenType.PERCENT): -2142 percent = num -2143 elif self._match(TokenType.ROWS): -2144 rows = num -2145 else: -2146 size = num +2141 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: +2142 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) +2143 if not is_derived and not self._match(TokenType.VALUES): +2144 return None +2145 +2146 expressions = self._parse_csv(self._parse_value) 2147 -2148 self._match_r_paren() -2149 -2150 if self._match(TokenType.SEED): -2151 seed = self._parse_wrapped(self._parse_number) +2148 if is_derived: +2149 self._match_r_paren() +2150 +2151 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2152 -2153 return self.expression( -2154 exp.TableSample, -2155 method=method, -2156 bucket_numerator=bucket_numerator, -2157 bucket_denominator=bucket_denominator, -2158 bucket_field=bucket_field, -2159 percent=percent, -2160 rows=rows, -2161 size=size, -2162 seed=seed, -2163 ) -2164 -2165 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: -2166 return list(iter(self._parse_pivot, None)) +2153 def _parse_table_sample(self) -> t.Optional[exp.Expression]: +2154 if not self._match(TokenType.TABLE_SAMPLE): +2155 return None +2156 +2157 method = self._parse_var() +2158 bucket_numerator = None +2159 bucket_denominator = None +2160 bucket_field = None +2161 percent = None +2162 rows = None +2163 size = None +2164 seed = None +2165 +2166 self._match_l_paren() 2167 -2168 def _parse_pivot(self) -> t.Optional[exp.Expression]: -2169 index = self._index -2170 -2171 if self._match(TokenType.PIVOT): -2172 unpivot = False -2173 elif self._match(TokenType.UNPIVOT): -2174 unpivot = True -2175 else: -2176 return None -2177 -2178 expressions = [] -2179 field = None -2180 -2181 if not self._match(TokenType.L_PAREN): -2182 self._retreat(index) -2183 return None -2184 -2185 if unpivot: -2186 expressions = self._parse_csv(self._parse_column) -2187 else: -2188 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) -2189 -2190 if not self._match(TokenType.FOR): -2191 self.raise_error("Expecting FOR") -2192 -2193 value = self._parse_column() -2194 -2195 if not self._match(TokenType.IN): -2196 self.raise_error("Expecting IN") -2197 -2198 field = self._parse_in(value) -2199 -2200 self._match_r_paren() -2201 -2202 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) +2168 if self._match(TokenType.BUCKET): +2169 bucket_numerator = self._parse_number() +2170 self._match(TokenType.OUT_OF) +2171 bucket_denominator = bucket_denominator = self._parse_number() +2172 self._match(TokenType.ON) +2173 bucket_field = self._parse_field() +2174 else: +2175 num = self._parse_number() +2176 +2177 if self._match(TokenType.PERCENT): +2178 percent = num +2179 elif self._match(TokenType.ROWS): +2180 rows = num +2181 else: +2182 size = num +2183 +2184 self._match_r_paren() +2185 +2186 if self._match(TokenType.SEED): +2187 seed = self._parse_wrapped(self._parse_number) +2188 +2189 return self.expression( +2190 exp.TableSample, +2191 method=method, +2192 bucket_numerator=bucket_numerator, +2193 bucket_denominator=bucket_denominator, +2194 bucket_field=bucket_field, +2195 percent=percent, +2196 rows=rows, +2197 size=size, +2198 seed=seed, +2199 ) +2200 +2201 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: +2202 return list(iter(self._parse_pivot, None)) 2203 -2204 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): -2205 pivot.set("alias", self._parse_table_alias()) +2204 def _parse_pivot(self) -> t.Optional[exp.Expression]: +2205 index = self._index 2206 -2207 return pivot -2208 -2209 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: -2210 if not skip_where_token and not self._match(TokenType.WHERE): -2211 return None -2212 -2213 return self.expression( -2214 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() -2215 ) +2207 if self._match(TokenType.PIVOT): +2208 unpivot = False +2209 elif self._match(TokenType.UNPIVOT): +2210 unpivot = True +2211 else: +2212 return None +2213 +2214 expressions = [] +2215 field = None 2216 -2217 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: -2218 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): +2217 if not self._match(TokenType.L_PAREN): +2218 self._retreat(index) 2219 return None 2220 -2221 elements = defaultdict(list) -2222 -2223 while True: -2224 expressions = self._parse_csv(self._parse_conjunction) -2225 if expressions: -2226 elements["expressions"].extend(expressions) -2227 -2228 grouping_sets = self._parse_grouping_sets() -2229 if grouping_sets: -2230 elements["grouping_sets"].extend(grouping_sets) -2231 -2232 rollup = None -2233 cube = None -2234 -2235 with_ = self._match(TokenType.WITH) -2236 if self._match(TokenType.ROLLUP): -2237 rollup = with_ or self._parse_wrapped_csv(self._parse_column) -2238 elements["rollup"].extend(ensure_list(rollup)) +2221 if unpivot: +2222 expressions = self._parse_csv(self._parse_column) +2223 else: +2224 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) +2225 +2226 if not self._match(TokenType.FOR): +2227 self.raise_error("Expecting FOR") +2228 +2229 value = self._parse_column() +2230 +2231 if not self._match(TokenType.IN): +2232 self.raise_error("Expecting IN") +2233 +2234 field = self._parse_in(value) +2235 +2236 self._match_r_paren() +2237 +2238 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2239 -2240 if self._match(TokenType.CUBE): -2241 cube = with_ or self._parse_wrapped_csv(self._parse_column) -2242 elements["cube"].extend(ensure_list(cube)) -2243 -2244 if not (expressions or grouping_sets or rollup or cube): -2245 break -2246 -2247 return self.expression(exp.Group, **elements) # type: ignore +2240 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): +2241 pivot.set("alias", self._parse_table_alias()) +2242 +2243 return pivot +2244 +2245 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: +2246 if not skip_where_token and not self._match(TokenType.WHERE): +2247 return None 2248 -2249 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: -2250 if not self._match(TokenType.GROUPING_SETS): -2251 return None +2249 return self.expression( +2250 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() +2251 ) 2252 -2253 return self._parse_wrapped_csv(self._parse_grouping_set) -2254 -2255 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: -2256 if self._match(TokenType.L_PAREN): -2257 grouping_set = self._parse_csv(self._parse_column) -2258 self._match_r_paren() -2259 return self.expression(exp.Tuple, expressions=grouping_set) -2260 -2261 return self._parse_column() -2262 -2263 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: -2264 if not skip_having_token and not self._match(TokenType.HAVING): -2265 return None -2266 return self.expression(exp.Having, this=self._parse_conjunction()) +2253 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: +2254 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): +2255 return None +2256 +2257 elements = defaultdict(list) +2258 +2259 while True: +2260 expressions = self._parse_csv(self._parse_conjunction) +2261 if expressions: +2262 elements["expressions"].extend(expressions) +2263 +2264 grouping_sets = self._parse_grouping_sets() +2265 if grouping_sets: +2266 elements["grouping_sets"].extend(grouping_sets) 2267 -2268 def _parse_qualify(self) -> t.Optional[exp.Expression]: -2269 if not self._match(TokenType.QUALIFY): -2270 return None -2271 return self.expression(exp.Qualify, this=self._parse_conjunction()) -2272 -2273 def _parse_order( -2274 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False -2275 ) -> t.Optional[exp.Expression]: -2276 if not skip_order_token and not self._match(TokenType.ORDER_BY): -2277 return this -2278 -2279 return self.expression( -2280 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) -2281 ) +2268 rollup = None +2269 cube = None +2270 +2271 with_ = self._match(TokenType.WITH) +2272 if self._match(TokenType.ROLLUP): +2273 rollup = with_ or self._parse_wrapped_csv(self._parse_column) +2274 elements["rollup"].extend(ensure_list(rollup)) +2275 +2276 if self._match(TokenType.CUBE): +2277 cube = with_ or self._parse_wrapped_csv(self._parse_column) +2278 elements["cube"].extend(ensure_list(cube)) +2279 +2280 if not (expressions or grouping_sets or rollup or cube): +2281 break 2282 -2283 def _parse_sort( -2284 self, token_type: TokenType, exp_class: t.Type[exp.Expression] -2285 ) -> t.Optional[exp.Expression]: -2286 if not self._match(token_type): +2283 return self.expression(exp.Group, **elements) # type: ignore +2284 +2285 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: +2286 if not self._match(TokenType.GROUPING_SETS): 2287 return None -2288 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) -2289 -2290 def _parse_ordered(self) -> exp.Expression: -2291 this = self._parse_conjunction() -2292 self._match(TokenType.ASC) -2293 is_desc = self._match(TokenType.DESC) -2294 is_nulls_first = self._match(TokenType.NULLS_FIRST) -2295 is_nulls_last = self._match(TokenType.NULLS_LAST) -2296 desc = is_desc or False -2297 asc = not desc -2298 nulls_first = is_nulls_first or False -2299 explicitly_null_ordered = is_nulls_first or is_nulls_last -2300 if ( -2301 not explicitly_null_ordered -2302 and ( -2303 (asc and self.null_ordering == "nulls_are_small") -2304 or (desc and self.null_ordering != "nulls_are_small") -2305 ) -2306 and self.null_ordering != "nulls_are_last" -2307 ): -2308 nulls_first = True -2309 -2310 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) -2311 -2312 def _parse_limit( -2313 self, this: t.Optional[exp.Expression] = None, top: bool = False -2314 ) -> t.Optional[exp.Expression]: -2315 if self._match(TokenType.TOP if top else TokenType.LIMIT): -2316 limit_paren = self._match(TokenType.L_PAREN) -2317 limit_exp = self.expression( -2318 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() -2319 ) -2320 -2321 if limit_paren: -2322 self._match_r_paren() -2323 -2324 return limit_exp +2288 +2289 return self._parse_wrapped_csv(self._parse_grouping_set) +2290 +2291 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: +2292 if self._match(TokenType.L_PAREN): +2293 grouping_set = self._parse_csv(self._parse_column) +2294 self._match_r_paren() +2295 return self.expression(exp.Tuple, expressions=grouping_set) +2296 +2297 return self._parse_column() +2298 +2299 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: +2300 if not skip_having_token and not self._match(TokenType.HAVING): +2301 return None +2302 return self.expression(exp.Having, this=self._parse_conjunction()) +2303 +2304 def _parse_qualify(self) -> t.Optional[exp.Expression]: +2305 if not self._match(TokenType.QUALIFY): +2306 return None +2307 return self.expression(exp.Qualify, this=self._parse_conjunction()) +2308 +2309 def _parse_order( +2310 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False +2311 ) -> t.Optional[exp.Expression]: +2312 if not skip_order_token and not self._match(TokenType.ORDER_BY): +2313 return this +2314 +2315 return self.expression( +2316 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) +2317 ) +2318 +2319 def _parse_sort( +2320 self, token_type: TokenType, exp_class: t.Type[exp.Expression] +2321 ) -> t.Optional[exp.Expression]: +2322 if not self._match(token_type): +2323 return None +2324 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2325 -2326 if self._match(TokenType.FETCH): -2327 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) -2328 direction = self._prev.text if direction else "FIRST" -2329 count = self._parse_number() -2330 self._match_set((TokenType.ROW, TokenType.ROWS)) -2331 self._match(TokenType.ONLY) -2332 return self.expression(exp.Fetch, direction=direction, count=count) -2333 -2334 return this -2335 -2336 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: -2337 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): -2338 return this -2339 -2340 count = self._parse_number() -2341 self._match_set((TokenType.ROW, TokenType.ROWS)) -2342 return self.expression(exp.Offset, this=this, expression=count) -2343 -2344 def _parse_lock(self) -> t.Optional[exp.Expression]: -2345 if self._match_text_seq("FOR", "UPDATE"): -2346 return self.expression(exp.Lock, update=True) -2347 if self._match_text_seq("FOR", "SHARE"): -2348 return self.expression(exp.Lock, update=False) -2349 -2350 return None -2351 -2352 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: -2353 if not self._match_set(self.SET_OPERATIONS): -2354 return this -2355 -2356 token_type = self._prev.token_type -2357 -2358 if token_type == TokenType.UNION: -2359 expression = exp.Union -2360 elif token_type == TokenType.EXCEPT: -2361 expression = exp.Except -2362 else: -2363 expression = exp.Intersect -2364 -2365 return self.expression( -2366 expression, -2367 this=this, -2368 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), -2369 expression=self._parse_set_operations(self._parse_select(nested=True)), -2370 ) +2326 def _parse_ordered(self) -> exp.Expression: +2327 this = self._parse_conjunction() +2328 self._match(TokenType.ASC) +2329 is_desc = self._match(TokenType.DESC) +2330 is_nulls_first = self._match(TokenType.NULLS_FIRST) +2331 is_nulls_last = self._match(TokenType.NULLS_LAST) +2332 desc = is_desc or False +2333 asc = not desc +2334 nulls_first = is_nulls_first or False +2335 explicitly_null_ordered = is_nulls_first or is_nulls_last +2336 if ( +2337 not explicitly_null_ordered +2338 and ( +2339 (asc and self.null_ordering == "nulls_are_small") +2340 or (desc and self.null_ordering != "nulls_are_small") +2341 ) +2342 and self.null_ordering != "nulls_are_last" +2343 ): +2344 nulls_first = True +2345 +2346 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) +2347 +2348 def _parse_limit( +2349 self, this: t.Optional[exp.Expression] = None, top: bool = False +2350 ) -> t.Optional[exp.Expression]: +2351 if self._match(TokenType.TOP if top else TokenType.LIMIT): +2352 limit_paren = self._match(TokenType.L_PAREN) +2353 limit_exp = self.expression( +2354 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() +2355 ) +2356 +2357 if limit_paren: +2358 self._match_r_paren() +2359 +2360 return limit_exp +2361 +2362 if self._match(TokenType.FETCH): +2363 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) +2364 direction = self._prev.text if direction else "FIRST" +2365 count = self._parse_number() +2366 self._match_set((TokenType.ROW, TokenType.ROWS)) +2367 self._match(TokenType.ONLY) +2368 return self.expression(exp.Fetch, direction=direction, count=count) +2369 +2370 return this 2371 -2372 def _parse_expression(self) -> t.Optional[exp.Expression]: -2373 return self._parse_alias(self._parse_conjunction()) -2374 -2375 def _parse_conjunction(self) -> t.Optional[exp.Expression]: -2376 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) -2377 -2378 def _parse_equality(self) -> t.Optional[exp.Expression]: -2379 return self._parse_tokens(self._parse_comparison, self.EQUALITY) -2380 -2381 def _parse_comparison(self) -> t.Optional[exp.Expression]: -2382 return self._parse_tokens(self._parse_range, self.COMPARISON) -2383 -2384 def _parse_range(self) -> t.Optional[exp.Expression]: -2385 this = self._parse_bitwise() -2386 negate = self._match(TokenType.NOT) +2372 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: +2373 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): +2374 return this +2375 +2376 count = self._parse_number() +2377 self._match_set((TokenType.ROW, TokenType.ROWS)) +2378 return self.expression(exp.Offset, this=this, expression=count) +2379 +2380 def _parse_lock(self) -> t.Optional[exp.Expression]: +2381 if self._match_text_seq("FOR", "UPDATE"): +2382 return self.expression(exp.Lock, update=True) +2383 if self._match_text_seq("FOR", "SHARE"): +2384 return self.expression(exp.Lock, update=False) +2385 +2386 return None 2387 -2388 if self._match_set(self.RANGE_PARSERS): -2389 this = self.RANGE_PARSERS[self._prev.token_type](self, this) -2390 elif self._match(TokenType.ISNULL): -2391 this = self.expression(exp.Is, this=this, expression=exp.Null()) -2392 -2393 # Postgres supports ISNULL and NOTNULL for conditions. -2394 # https://blog.andreiavram.ro/postgresql-null-composite-type/ -2395 if self._match(TokenType.NOTNULL): -2396 this = self.expression(exp.Is, this=this, expression=exp.Null()) -2397 this = self.expression(exp.Not, this=this) -2398 -2399 if negate: -2400 this = self.expression(exp.Not, this=this) -2401 -2402 if self._match(TokenType.IS): -2403 this = self._parse_is(this) -2404 -2405 return this -2406 -2407 def _parse_is(self, this: t.Optional[exp.Expression]) -> exp.Expression: -2408 negate = self._match(TokenType.NOT) -2409 if self._match(TokenType.DISTINCT_FROM): -2410 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ -2411 return self.expression(klass, this=this, expression=self._parse_expression()) -2412 -2413 this = self.expression( -2414 exp.Is, -2415 this=this, -2416 expression=self._parse_null() or self._parse_boolean(), -2417 ) -2418 return self.expression(exp.Not, this=this) if negate else this +2388 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: +2389 if not self._match_set(self.SET_OPERATIONS): +2390 return this +2391 +2392 token_type = self._prev.token_type +2393 +2394 if token_type == TokenType.UNION: +2395 expression = exp.Union +2396 elif token_type == TokenType.EXCEPT: +2397 expression = exp.Except +2398 else: +2399 expression = exp.Intersect +2400 +2401 return self.expression( +2402 expression, +2403 this=this, +2404 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), +2405 expression=self._parse_set_operations(self._parse_select(nested=True)), +2406 ) +2407 +2408 def _parse_expression(self) -> t.Optional[exp.Expression]: +2409 return self._parse_alias(self._parse_conjunction()) +2410 +2411 def _parse_conjunction(self) -> t.Optional[exp.Expression]: +2412 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) +2413 +2414 def _parse_equality(self) -> t.Optional[exp.Expression]: +2415 return self._parse_tokens(self._parse_comparison, self.EQUALITY) +2416 +2417 def _parse_comparison(self) -> t.Optional[exp.Expression]: +2418 return self._parse_tokens(self._parse_range, self.COMPARISON) 2419 -2420 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: -2421 unnest = self._parse_unnest() -2422 if unnest: -2423 this = self.expression(exp.In, this=this, unnest=unnest) -2424 elif self._match(TokenType.L_PAREN): -2425 expressions = self._parse_csv(self._parse_select_or_expression) -2426 -2427 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): -2428 this = self.expression(exp.In, this=this, query=expressions[0]) -2429 else: -2430 this = self.expression(exp.In, this=this, expressions=expressions) -2431 -2432 self._match_r_paren() -2433 else: -2434 this = self.expression(exp.In, this=this, field=self._parse_field()) -2435 -2436 return this +2420 def _parse_range(self) -> t.Optional[exp.Expression]: +2421 this = self._parse_bitwise() +2422 negate = self._match(TokenType.NOT) +2423 +2424 if self._match_set(self.RANGE_PARSERS): +2425 this = self.RANGE_PARSERS[self._prev.token_type](self, this) +2426 elif self._match(TokenType.ISNULL): +2427 this = self.expression(exp.Is, this=this, expression=exp.Null()) +2428 +2429 # Postgres supports ISNULL and NOTNULL for conditions. +2430 # https://blog.andreiavram.ro/postgresql-null-composite-type/ +2431 if self._match(TokenType.NOTNULL): +2432 this = self.expression(exp.Is, this=this, expression=exp.Null()) +2433 this = self.expression(exp.Not, this=this) +2434 +2435 if negate: +2436 this = self.expression(exp.Not, this=this) 2437 -2438 def _parse_between(self, this: exp.Expression) -> exp.Expression: -2439 low = self._parse_bitwise() -2440 self._match(TokenType.AND) -2441 high = self._parse_bitwise() -2442 return self.expression(exp.Between, this=this, low=low, high=high) -2443 -2444 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: -2445 if not self._match(TokenType.ESCAPE): -2446 return this -2447 return self.expression(exp.Escape, this=this, expression=self._parse_string()) +2438 if self._match(TokenType.IS): +2439 this = self._parse_is(this) +2440 +2441 return this +2442 +2443 def _parse_is(self, this: t.Optional[exp.Expression]) -> exp.Expression: +2444 negate = self._match(TokenType.NOT) +2445 if self._match(TokenType.DISTINCT_FROM): +2446 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ +2447 return self.expression(klass, this=this, expression=self._parse_expression()) 2448 -2449 def _parse_bitwise(self) -> t.Optional[exp.Expression]: -2450 this = self._parse_term() -2451 -2452 while True: -2453 if self._match_set(self.BITWISE): -2454 this = self.expression( -2455 self.BITWISE[self._prev.token_type], -2456 this=this, -2457 expression=self._parse_term(), -2458 ) -2459 elif self._match_pair(TokenType.LT, TokenType.LT): -2460 this = self.expression( -2461 exp.BitwiseLeftShift, this=this, expression=self._parse_term() -2462 ) -2463 elif self._match_pair(TokenType.GT, TokenType.GT): -2464 this = self.expression( -2465 exp.BitwiseRightShift, this=this, expression=self._parse_term() -2466 ) -2467 else: -2468 break -2469 -2470 return this +2449 this = self.expression( +2450 exp.Is, +2451 this=this, +2452 expression=self._parse_null() or self._parse_boolean(), +2453 ) +2454 return self.expression(exp.Not, this=this) if negate else this +2455 +2456 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: +2457 unnest = self._parse_unnest() +2458 if unnest: +2459 this = self.expression(exp.In, this=this, unnest=unnest) +2460 elif self._match(TokenType.L_PAREN): +2461 expressions = self._parse_csv(self._parse_select_or_expression) +2462 +2463 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): +2464 this = self.expression(exp.In, this=this, query=expressions[0]) +2465 else: +2466 this = self.expression(exp.In, this=this, expressions=expressions) +2467 +2468 self._match_r_paren() +2469 else: +2470 this = self.expression(exp.In, this=this, field=self._parse_field()) 2471 -2472 def _parse_term(self) -> t.Optional[exp.Expression]: -2473 return self._parse_tokens(self._parse_factor, self.TERM) -2474 -2475 def _parse_factor(self) -> t.Optional[exp.Expression]: -2476 return self._parse_tokens(self._parse_unary, self.FACTOR) -2477 -2478 def _parse_unary(self) -> t.Optional[exp.Expression]: -2479 if self._match_set(self.UNARY_PARSERS): -2480 return self.UNARY_PARSERS[self._prev.token_type](self) -2481 return self._parse_at_time_zone(self._parse_type()) -2482 -2483 def _parse_type(self) -> t.Optional[exp.Expression]: -2484 if self._match(TokenType.INTERVAL): -2485 return self.expression(exp.Interval, this=self._parse_term(), unit=self._parse_var()) -2486 -2487 index = self._index -2488 type_token = self._parse_types(check_func=True) -2489 this = self._parse_column() -2490 -2491 if type_token: -2492 if this and not isinstance(this, exp.Star): -2493 return self.expression(exp.Cast, this=this, to=type_token) -2494 if not type_token.args.get("expressions"): -2495 self._retreat(index) -2496 return self._parse_column() -2497 return type_token -2498 -2499 return this -2500 -2501 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: -2502 index = self._index -2503 -2504 prefix = self._match_text_seq("SYSUDTLIB", ".") +2472 return this +2473 +2474 def _parse_between(self, this: exp.Expression) -> exp.Expression: +2475 low = self._parse_bitwise() +2476 self._match(TokenType.AND) +2477 high = self._parse_bitwise() +2478 return self.expression(exp.Between, this=this, low=low, high=high) +2479 +2480 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: +2481 if not self._match(TokenType.ESCAPE): +2482 return this +2483 return self.expression(exp.Escape, this=this, expression=self._parse_string()) +2484 +2485 def _parse_bitwise(self) -> t.Optional[exp.Expression]: +2486 this = self._parse_term() +2487 +2488 while True: +2489 if self._match_set(self.BITWISE): +2490 this = self.expression( +2491 self.BITWISE[self._prev.token_type], +2492 this=this, +2493 expression=self._parse_term(), +2494 ) +2495 elif self._match_pair(TokenType.LT, TokenType.LT): +2496 this = self.expression( +2497 exp.BitwiseLeftShift, this=this, expression=self._parse_term() +2498 ) +2499 elif self._match_pair(TokenType.GT, TokenType.GT): +2500 this = self.expression( +2501 exp.BitwiseRightShift, this=this, expression=self._parse_term() +2502 ) +2503 else: +2504 break 2505 -2506 if not self._match_set(self.TYPE_TOKENS): -2507 return None -2508 -2509 type_token = self._prev.token_type +2506 return this +2507 +2508 def _parse_term(self) -> t.Optional[exp.Expression]: +2509 return self._parse_tokens(self._parse_factor, self.TERM) 2510 -2511 if type_token == TokenType.PSEUDO_TYPE: -2512 return self.expression(exp.PseudoType, this=self._prev.text) +2511 def _parse_factor(self) -> t.Optional[exp.Expression]: +2512 return self._parse_tokens(self._parse_unary, self.FACTOR) 2513 -2514 nested = type_token in self.NESTED_TYPE_TOKENS -2515 is_struct = type_token == TokenType.STRUCT -2516 expressions = None -2517 maybe_func = False +2514 def _parse_unary(self) -> t.Optional[exp.Expression]: +2515 if self._match_set(self.UNARY_PARSERS): +2516 return self.UNARY_PARSERS[self._prev.token_type](self) +2517 return self._parse_at_time_zone(self._parse_type()) 2518 -2519 if self._match(TokenType.L_PAREN): -2520 if is_struct: -2521 expressions = self._parse_csv(self._parse_struct_kwargs) -2522 elif nested: -2523 expressions = self._parse_csv(self._parse_types) -2524 else: -2525 expressions = self._parse_csv(self._parse_conjunction) +2519 def _parse_type(self) -> t.Optional[exp.Expression]: +2520 if self._match(TokenType.INTERVAL): +2521 return self.expression(exp.Interval, this=self._parse_term(), unit=self._parse_var()) +2522 +2523 index = self._index +2524 type_token = self._parse_types(check_func=True) +2525 this = self._parse_column() 2526 -2527 if not expressions: -2528 self._retreat(index) -2529 return None -2530 -2531 self._match_r_paren() -2532 maybe_func = True -2533 -2534 if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): -2535 this = exp.DataType( -2536 this=exp.DataType.Type.ARRAY, -2537 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], -2538 nested=True, -2539 ) -2540 -2541 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): -2542 this = exp.DataType( -2543 this=exp.DataType.Type.ARRAY, -2544 expressions=[this], -2545 nested=True, -2546 ) -2547 -2548 return this +2527 if type_token: +2528 if this and not isinstance(this, exp.Star): +2529 return self.expression(exp.Cast, this=this, to=type_token) +2530 if not type_token.args.get("expressions"): +2531 self._retreat(index) +2532 return self._parse_column() +2533 return type_token +2534 +2535 return this +2536 +2537 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: +2538 index = self._index +2539 +2540 prefix = self._match_text_seq("SYSUDTLIB", ".") +2541 +2542 if not self._match_set(self.TYPE_TOKENS): +2543 return None +2544 +2545 type_token = self._prev.token_type +2546 +2547 if type_token == TokenType.PSEUDO_TYPE: +2548 return self.expression(exp.PseudoType, this=self._prev.text) 2549 -2550 if self._match(TokenType.L_BRACKET): -2551 self._retreat(index) -2552 return None -2553 -2554 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None -2555 if nested and self._match(TokenType.LT): +2550 nested = type_token in self.NESTED_TYPE_TOKENS +2551 is_struct = type_token == TokenType.STRUCT +2552 expressions = None +2553 maybe_func = False +2554 +2555 if self._match(TokenType.L_PAREN): 2556 if is_struct: 2557 expressions = self._parse_csv(self._parse_struct_kwargs) -2558 else: +2558 elif nested: 2559 expressions = self._parse_csv(self._parse_types) -2560 -2561 if not self._match(TokenType.GT): -2562 self.raise_error("Expecting >") -2563 -2564 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): -2565 values = self._parse_csv(self._parse_conjunction) -2566 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) -2567 -2568 value: t.Optional[exp.Expression] = None -2569 if type_token in self.TIMESTAMPS: -2570 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: -2571 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) -2572 elif ( -2573 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ -2574 ): -2575 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) -2576 elif self._match(TokenType.WITHOUT_TIME_ZONE): -2577 if type_token == TokenType.TIME: -2578 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) -2579 else: -2580 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) -2581 -2582 maybe_func = maybe_func and value is None +2560 else: +2561 expressions = self._parse_csv(self._parse_conjunction) +2562 +2563 if not expressions: +2564 self._retreat(index) +2565 return None +2566 +2567 self._match_r_paren() +2568 maybe_func = True +2569 +2570 if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): +2571 this = exp.DataType( +2572 this=exp.DataType.Type.ARRAY, +2573 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], +2574 nested=True, +2575 ) +2576 +2577 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): +2578 this = exp.DataType( +2579 this=exp.DataType.Type.ARRAY, +2580 expressions=[this], +2581 nested=True, +2582 ) 2583 -2584 if value is None: -2585 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) -2586 elif type_token == TokenType.INTERVAL: -2587 value = self.expression(exp.Interval, unit=self._parse_var()) -2588 -2589 if maybe_func and check_func: -2590 index2 = self._index -2591 peek = self._parse_string() -2592 -2593 if not peek: -2594 self._retreat(index) -2595 return None +2584 return this +2585 +2586 if self._match(TokenType.L_BRACKET): +2587 self._retreat(index) +2588 return None +2589 +2590 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None +2591 if nested and self._match(TokenType.LT): +2592 if is_struct: +2593 expressions = self._parse_csv(self._parse_struct_kwargs) +2594 else: +2595 expressions = self._parse_csv(self._parse_types) 2596 -2597 self._retreat(index2) -2598 -2599 if value: -2600 return value -2601 -2602 return exp.DataType( -2603 this=exp.DataType.Type[type_token.value.upper()], -2604 expressions=expressions, -2605 nested=nested, -2606 values=values, -2607 prefix=prefix, -2608 ) -2609 -2610 def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]: -2611 if self._curr and self._curr.token_type in self.TYPE_TOKENS: -2612 return self._parse_types() -2613 -2614 this = self._parse_id_var() -2615 self._match(TokenType.COLON) -2616 data_type = self._parse_types() +2597 if not self._match(TokenType.GT): +2598 self.raise_error("Expecting >") +2599 +2600 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): +2601 values = self._parse_csv(self._parse_conjunction) +2602 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) +2603 +2604 value: t.Optional[exp.Expression] = None +2605 if type_token in self.TIMESTAMPS: +2606 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: +2607 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) +2608 elif ( +2609 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ +2610 ): +2611 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) +2612 elif self._match(TokenType.WITHOUT_TIME_ZONE): +2613 if type_token == TokenType.TIME: +2614 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) +2615 else: +2616 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2617 -2618 if not data_type: -2619 return None -2620 return self.expression(exp.StructKwarg, this=this, expression=data_type) -2621 -2622 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: -2623 if not self._match(TokenType.AT_TIME_ZONE): -2624 return this -2625 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) -2626 -2627 def _parse_column(self) -> t.Optional[exp.Expression]: -2628 this = self._parse_field() -2629 if isinstance(this, exp.Identifier): -2630 this = self.expression(exp.Column, this=this) -2631 elif not this: -2632 return self._parse_bracket(this) -2633 this = self._parse_bracket(this) +2618 maybe_func = maybe_func and value is None +2619 +2620 if value is None: +2621 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) +2622 elif type_token == TokenType.INTERVAL: +2623 value = self.expression(exp.Interval, unit=self._parse_var()) +2624 +2625 if maybe_func and check_func: +2626 index2 = self._index +2627 peek = self._parse_string() +2628 +2629 if not peek: +2630 self._retreat(index) +2631 return None +2632 +2633 self._retreat(index2) 2634 -2635 while self._match_set(self.COLUMN_OPERATORS): -2636 op_token = self._prev.token_type -2637 op = self.COLUMN_OPERATORS.get(op_token) -2638 -2639 if op_token == TokenType.DCOLON: -2640 field = self._parse_types() -2641 if not field: -2642 self.raise_error("Expected type") -2643 elif op: -2644 self._advance() -2645 value = self._prev.text -2646 field = ( -2647 exp.Literal.number(value) -2648 if self._prev.token_type == TokenType.NUMBER -2649 else exp.Literal.string(value) -2650 ) -2651 else: -2652 field = self._parse_star() or self._parse_function() or self._parse_id_var() +2635 if value: +2636 return value +2637 +2638 return exp.DataType( +2639 this=exp.DataType.Type[type_token.value.upper()], +2640 expressions=expressions, +2641 nested=nested, +2642 values=values, +2643 prefix=prefix, +2644 ) +2645 +2646 def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]: +2647 if self._curr and self._curr.token_type in self.TYPE_TOKENS: +2648 return self._parse_types() +2649 +2650 this = self._parse_id_var() +2651 self._match(TokenType.COLON) +2652 data_type = self._parse_types() 2653 -2654 if isinstance(field, exp.Func): -2655 # bigquery allows function calls like x.y.count(...) -2656 # SAFE.SUBSTR(...) -2657 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules -2658 this = self._replace_columns_with_dots(this) -2659 -2660 if op: -2661 this = op(self, this, field) -2662 elif isinstance(this, exp.Column) and not this.args.get("catalog"): -2663 this = self.expression( -2664 exp.Column, -2665 this=field, -2666 table=this.this, -2667 db=this.args.get("table"), -2668 catalog=this.args.get("db"), -2669 ) -2670 else: -2671 this = self.expression(exp.Dot, this=this, expression=field) -2672 this = self._parse_bracket(this) -2673 -2674 return this -2675 -2676 def _parse_primary(self) -> t.Optional[exp.Expression]: -2677 if self._match_set(self.PRIMARY_PARSERS): -2678 token_type = self._prev.token_type -2679 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) -2680 -2681 if token_type == TokenType.STRING: -2682 expressions = [primary] -2683 while self._match(TokenType.STRING): -2684 expressions.append(exp.Literal.string(self._prev.text)) -2685 if len(expressions) > 1: -2686 return self.expression(exp.Concat, expressions=expressions) -2687 return primary -2688 -2689 if self._match_pair(TokenType.DOT, TokenType.NUMBER): -2690 return exp.Literal.number(f"0.{self._prev.text}") -2691 -2692 if self._match(TokenType.L_PAREN): -2693 comments = self._prev_comments -2694 query = self._parse_select() +2654 if not data_type: +2655 return None +2656 return self.expression(exp.StructKwarg, this=this, expression=data_type) +2657 +2658 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: +2659 if not self._match(TokenType.AT_TIME_ZONE): +2660 return this +2661 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) +2662 +2663 def _parse_column(self) -> t.Optional[exp.Expression]: +2664 this = self._parse_field() +2665 if isinstance(this, exp.Identifier): +2666 this = self.expression(exp.Column, this=this) +2667 elif not this: +2668 return self._parse_bracket(this) +2669 this = self._parse_bracket(this) +2670 +2671 while self._match_set(self.COLUMN_OPERATORS): +2672 op_token = self._prev.token_type +2673 op = self.COLUMN_OPERATORS.get(op_token) +2674 +2675 if op_token == TokenType.DCOLON: +2676 field = self._parse_types() +2677 if not field: +2678 self.raise_error("Expected type") +2679 elif op: +2680 self._advance() +2681 value = self._prev.text +2682 field = ( +2683 exp.Literal.number(value) +2684 if self._prev.token_type == TokenType.NUMBER +2685 else exp.Literal.string(value) +2686 ) +2687 else: +2688 field = self._parse_star() or self._parse_function() or self._parse_id_var() +2689 +2690 if isinstance(field, exp.Func): +2691 # bigquery allows function calls like x.y.count(...) +2692 # SAFE.SUBSTR(...) +2693 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules +2694 this = self._replace_columns_with_dots(this) 2695 -2696 if query: -2697 expressions = [query] -2698 else: -2699 expressions = self._parse_csv( -2700 lambda: self._parse_alias(self._parse_conjunction(), explicit=True) -2701 ) -2702 -2703 this = seq_get(expressions, 0) -2704 self._parse_query_modifiers(this) -2705 self._match_r_paren() -2706 -2707 if isinstance(this, exp.Subqueryable): -2708 this = self._parse_set_operations( -2709 self._parse_subquery(this=this, parse_alias=False) -2710 ) -2711 elif len(expressions) > 1: -2712 this = self.expression(exp.Tuple, expressions=expressions) -2713 else: -2714 this = self.expression(exp.Paren, this=this) -2715 -2716 if this and comments: -2717 this.comments = comments -2718 -2719 return this -2720 -2721 return None -2722 -2723 def _parse_field(self, any_token: bool = False) -> t.Optional[exp.Expression]: -2724 return self._parse_primary() or self._parse_function() or self._parse_id_var(any_token) -2725 -2726 def _parse_function( -2727 self, functions: t.Optional[t.Dict[str, t.Callable]] = None -2728 ) -> t.Optional[exp.Expression]: -2729 if not self._curr: -2730 return None +2696 if op: +2697 this = op(self, this, field) +2698 elif isinstance(this, exp.Column) and not this.args.get("catalog"): +2699 this = self.expression( +2700 exp.Column, +2701 this=field, +2702 table=this.this, +2703 db=this.args.get("table"), +2704 catalog=this.args.get("db"), +2705 ) +2706 else: +2707 this = self.expression(exp.Dot, this=this, expression=field) +2708 this = self._parse_bracket(this) +2709 +2710 return this +2711 +2712 def _parse_primary(self) -> t.Optional[exp.Expression]: +2713 if self._match_set(self.PRIMARY_PARSERS): +2714 token_type = self._prev.token_type +2715 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) +2716 +2717 if token_type == TokenType.STRING: +2718 expressions = [primary] +2719 while self._match(TokenType.STRING): +2720 expressions.append(exp.Literal.string(self._prev.text)) +2721 if len(expressions) > 1: +2722 return self.expression(exp.Concat, expressions=expressions) +2723 return primary +2724 +2725 if self._match_pair(TokenType.DOT, TokenType.NUMBER): +2726 return exp.Literal.number(f"0.{self._prev.text}") +2727 +2728 if self._match(TokenType.L_PAREN): +2729 comments = self._prev_comments +2730 query = self._parse_select() 2731 -2732 token_type = self._curr.token_type -2733 -2734 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): -2735 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) -2736 -2737 if not self._next or self._next.token_type != TokenType.L_PAREN: -2738 if token_type in self.NO_PAREN_FUNCTIONS: -2739 self._advance() -2740 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) -2741 -2742 return None -2743 -2744 if token_type not in self.FUNC_TOKENS: -2745 return None -2746 -2747 this = self._curr.text -2748 upper = this.upper() -2749 self._advance(2) -2750 -2751 parser = self.FUNCTION_PARSERS.get(upper) -2752 -2753 if parser: -2754 this = parser(self) -2755 else: -2756 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) -2757 -2758 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): -2759 this = self.expression(subquery_predicate, this=self._parse_select()) -2760 self._match_r_paren() -2761 return this -2762 -2763 if functions is None: -2764 functions = self.FUNCTIONS -2765 -2766 function = functions.get(upper) -2767 args = self._parse_csv(self._parse_lambda) -2768 -2769 if function: -2770 # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the -2771 # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists. -2772 if count_params(function) == 2: -2773 params = None -2774 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): -2775 params = self._parse_csv(self._parse_lambda) -2776 -2777 this = function(args, params) -2778 else: -2779 this = function(args) -2780 -2781 self.validate_expression(this, args) -2782 else: -2783 this = self.expression(exp.Anonymous, this=this, expressions=args) -2784 -2785 self._match_r_paren(this) -2786 return self._parse_window(this) -2787 -2788 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: -2789 return self._parse_column_def(self._parse_id_var()) -2790 -2791 def _parse_user_defined_function( -2792 self, kind: t.Optional[TokenType] = None -2793 ) -> t.Optional[exp.Expression]: -2794 this = self._parse_id_var() -2795 -2796 while self._match(TokenType.DOT): -2797 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) +2732 if query: +2733 expressions = [query] +2734 else: +2735 expressions = self._parse_csv( +2736 lambda: self._parse_alias(self._parse_conjunction(), explicit=True) +2737 ) +2738 +2739 this = seq_get(expressions, 0) +2740 self._parse_query_modifiers(this) +2741 self._match_r_paren() +2742 +2743 if isinstance(this, exp.Subqueryable): +2744 this = self._parse_set_operations( +2745 self._parse_subquery(this=this, parse_alias=False) +2746 ) +2747 elif len(expressions) > 1: +2748 this = self.expression(exp.Tuple, expressions=expressions) +2749 else: +2750 this = self.expression(exp.Paren, this=this) +2751 +2752 if this and comments: +2753 this.comments = comments +2754 +2755 return this +2756 +2757 return None +2758 +2759 def _parse_field(self, any_token: bool = False) -> t.Optional[exp.Expression]: +2760 return self._parse_primary() or self._parse_function() or self._parse_id_var(any_token) +2761 +2762 def _parse_function( +2763 self, functions: t.Optional[t.Dict[str, t.Callable]] = None +2764 ) -> t.Optional[exp.Expression]: +2765 if not self._curr: +2766 return None +2767 +2768 token_type = self._curr.token_type +2769 +2770 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): +2771 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) +2772 +2773 if not self._next or self._next.token_type != TokenType.L_PAREN: +2774 if token_type in self.NO_PAREN_FUNCTIONS: +2775 self._advance() +2776 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) +2777 +2778 return None +2779 +2780 if token_type not in self.FUNC_TOKENS: +2781 return None +2782 +2783 this = self._curr.text +2784 upper = this.upper() +2785 self._advance(2) +2786 +2787 parser = self.FUNCTION_PARSERS.get(upper) +2788 +2789 if parser: +2790 this = parser(self) +2791 else: +2792 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) +2793 +2794 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): +2795 this = self.expression(subquery_predicate, this=self._parse_select()) +2796 self._match_r_paren() +2797 return this 2798 -2799 if not self._match(TokenType.L_PAREN): -2800 return this +2799 if functions is None: +2800 functions = self.FUNCTIONS 2801 -2802 expressions = self._parse_csv(self._parse_function_parameter) -2803 self._match_r_paren() -2804 return self.expression( -2805 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True -2806 ) -2807 -2808 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: -2809 literal = self._parse_primary() -2810 if literal: -2811 return self.expression(exp.Introducer, this=token.text, expression=literal) +2802 function = functions.get(upper) +2803 args = self._parse_csv(self._parse_lambda) +2804 +2805 if function: +2806 # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the +2807 # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists. +2808 if count_params(function) == 2: +2809 params = None +2810 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): +2811 params = self._parse_csv(self._parse_lambda) 2812 -2813 return self.expression(exp.Identifier, this=token.text) -2814 -2815 def _parse_national(self, token: Token) -> exp.Expression: -2816 return self.expression(exp.National, this=exp.Literal.string(token.text)) -2817 -2818 def _parse_session_parameter(self) -> exp.Expression: -2819 kind = None -2820 this = self._parse_id_var() or self._parse_primary() -2821 -2822 if this and self._match(TokenType.DOT): -2823 kind = this.name -2824 this = self._parse_var() or self._parse_primary() -2825 -2826 return self.expression(exp.SessionParameter, this=this, kind=kind) -2827 -2828 def _parse_lambda(self) -> t.Optional[exp.Expression]: -2829 index = self._index -2830 -2831 if self._match(TokenType.L_PAREN): -2832 expressions = self._parse_csv(self._parse_id_var) -2833 -2834 if not self._match(TokenType.R_PAREN): -2835 self._retreat(index) -2836 else: -2837 expressions = [self._parse_id_var()] -2838 -2839 if self._match_set(self.LAMBDAS): -2840 return self.LAMBDAS[self._prev.token_type](self, expressions) -2841 -2842 self._retreat(index) +2813 this = function(args, params) +2814 else: +2815 this = function(args) +2816 +2817 self.validate_expression(this, args) +2818 else: +2819 this = self.expression(exp.Anonymous, this=this, expressions=args) +2820 +2821 self._match_r_paren(this) +2822 return self._parse_window(this) +2823 +2824 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: +2825 return self._parse_column_def(self._parse_id_var()) +2826 +2827 def _parse_user_defined_function( +2828 self, kind: t.Optional[TokenType] = None +2829 ) -> t.Optional[exp.Expression]: +2830 this = self._parse_id_var() +2831 +2832 while self._match(TokenType.DOT): +2833 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) +2834 +2835 if not self._match(TokenType.L_PAREN): +2836 return this +2837 +2838 expressions = self._parse_csv(self._parse_function_parameter) +2839 self._match_r_paren() +2840 return self.expression( +2841 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True +2842 ) 2843 -2844 this: t.Optional[exp.Expression] -2845 -2846 if self._match(TokenType.DISTINCT): -2847 this = self.expression( -2848 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) -2849 ) -2850 else: -2851 this = self._parse_select_or_expression() -2852 -2853 if self._match(TokenType.IGNORE_NULLS): -2854 this = self.expression(exp.IgnoreNulls, this=this) -2855 else: -2856 self._match(TokenType.RESPECT_NULLS) +2844 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: +2845 literal = self._parse_primary() +2846 if literal: +2847 return self.expression(exp.Introducer, this=token.text, expression=literal) +2848 +2849 return self.expression(exp.Identifier, this=token.text) +2850 +2851 def _parse_national(self, token: Token) -> exp.Expression: +2852 return self.expression(exp.National, this=exp.Literal.string(token.text)) +2853 +2854 def _parse_session_parameter(self) -> exp.Expression: +2855 kind = None +2856 this = self._parse_id_var() or self._parse_primary() 2857 -2858 return self._parse_limit(self._parse_order(this)) -2859 -2860 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: -2861 index = self._index -2862 if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT): -2863 self._retreat(index) -2864 return this -2865 -2866 args = self._parse_csv( -2867 lambda: self._parse_constraint() -2868 or self._parse_column_def(self._parse_field(any_token=True)) -2869 ) -2870 self._match_r_paren() -2871 return self.expression(exp.Schema, this=this, expressions=args) -2872 -2873 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: -2874 kind = self._parse_types() -2875 -2876 if self._match_text_seq("FOR", "ORDINALITY"): -2877 return self.expression(exp.ColumnDef, this=this, ordinality=True) -2878 -2879 constraints = [] -2880 while True: -2881 constraint = self._parse_column_constraint() -2882 if not constraint: -2883 break -2884 constraints.append(constraint) -2885 -2886 if not kind and not constraints: -2887 return this +2858 if this and self._match(TokenType.DOT): +2859 kind = this.name +2860 this = self._parse_var() or self._parse_primary() +2861 +2862 return self.expression(exp.SessionParameter, this=this, kind=kind) +2863 +2864 def _parse_lambda(self) -> t.Optional[exp.Expression]: +2865 index = self._index +2866 +2867 if self._match(TokenType.L_PAREN): +2868 expressions = self._parse_csv(self._parse_id_var) +2869 +2870 if not self._match(TokenType.R_PAREN): +2871 self._retreat(index) +2872 else: +2873 expressions = [self._parse_id_var()] +2874 +2875 if self._match_set(self.LAMBDAS): +2876 return self.LAMBDAS[self._prev.token_type](self, expressions) +2877 +2878 self._retreat(index) +2879 +2880 this: t.Optional[exp.Expression] +2881 +2882 if self._match(TokenType.DISTINCT): +2883 this = self.expression( +2884 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) +2885 ) +2886 else: +2887 this = self._parse_select_or_expression() 2888 -2889 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) -2890 -2891 def _parse_auto_increment(self) -> exp.Expression: -2892 start = None -2893 increment = None -2894 -2895 if self._match(TokenType.L_PAREN, advance=False): -2896 args = self._parse_wrapped_csv(self._parse_bitwise) -2897 start = seq_get(args, 0) -2898 increment = seq_get(args, 1) -2899 elif self._match_text_seq("START"): -2900 start = self._parse_bitwise() -2901 self._match_text_seq("INCREMENT") -2902 increment = self._parse_bitwise() -2903 -2904 if start and increment: -2905 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) -2906 -2907 return exp.AutoIncrementColumnConstraint() +2889 if self._match(TokenType.IGNORE_NULLS): +2890 this = self.expression(exp.IgnoreNulls, this=this) +2891 else: +2892 self._match(TokenType.RESPECT_NULLS) +2893 +2894 return self._parse_limit(self._parse_order(this)) +2895 +2896 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: +2897 index = self._index +2898 if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT): +2899 self._retreat(index) +2900 return this +2901 +2902 args = self._parse_csv( +2903 lambda: self._parse_constraint() +2904 or self._parse_column_def(self._parse_field(any_token=True)) +2905 ) +2906 self._match_r_paren() +2907 return self.expression(exp.Schema, this=this, expressions=args) 2908 -2909 def _parse_compress(self) -> exp.Expression: -2910 if self._match(TokenType.L_PAREN, advance=False): -2911 return self.expression( -2912 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) -2913 ) +2909 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: +2910 kind = self._parse_types() +2911 +2912 if self._match_text_seq("FOR", "ORDINALITY"): +2913 return self.expression(exp.ColumnDef, this=this, ordinality=True) 2914 -2915 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) -2916 -2917 def _parse_generated_as_identity(self) -> exp.Expression: -2918 if self._match(TokenType.BY_DEFAULT): -2919 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False) -2920 else: -2921 self._match_text_seq("ALWAYS") -2922 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) -2923 -2924 self._match_text_seq("AS", "IDENTITY") -2925 if self._match(TokenType.L_PAREN): -2926 if self._match_text_seq("START", "WITH"): -2927 this.set("start", self._parse_bitwise()) -2928 if self._match_text_seq("INCREMENT", "BY"): -2929 this.set("increment", self._parse_bitwise()) -2930 if self._match_text_seq("MINVALUE"): -2931 this.set("minvalue", self._parse_bitwise()) -2932 if self._match_text_seq("MAXVALUE"): -2933 this.set("maxvalue", self._parse_bitwise()) -2934 -2935 if self._match_text_seq("CYCLE"): -2936 this.set("cycle", True) -2937 elif self._match_text_seq("NO", "CYCLE"): -2938 this.set("cycle", False) +2915 constraints = [] +2916 while True: +2917 constraint = self._parse_column_constraint() +2918 if not constraint: +2919 break +2920 constraints.append(constraint) +2921 +2922 if not kind and not constraints: +2923 return this +2924 +2925 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) +2926 +2927 def _parse_auto_increment(self) -> exp.Expression: +2928 start = None +2929 increment = None +2930 +2931 if self._match(TokenType.L_PAREN, advance=False): +2932 args = self._parse_wrapped_csv(self._parse_bitwise) +2933 start = seq_get(args, 0) +2934 increment = seq_get(args, 1) +2935 elif self._match_text_seq("START"): +2936 start = self._parse_bitwise() +2937 self._match_text_seq("INCREMENT") +2938 increment = self._parse_bitwise() 2939 -2940 self._match_r_paren() -2941 -2942 return this -2943 -2944 def _parse_inline(self) -> t.Optional[exp.Expression]: -2945 self._match_text_seq("LENGTH") -2946 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) -2947 -2948 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: -2949 if self._match_text_seq("NULL"): -2950 return self.expression(exp.NotNullColumnConstraint) -2951 if self._match_text_seq("CASESPECIFIC"): -2952 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) -2953 return None -2954 -2955 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: -2956 this = self._parse_references() -2957 if this: -2958 return this +2940 if start and increment: +2941 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) +2942 +2943 return exp.AutoIncrementColumnConstraint() +2944 +2945 def _parse_compress(self) -> exp.Expression: +2946 if self._match(TokenType.L_PAREN, advance=False): +2947 return self.expression( +2948 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) +2949 ) +2950 +2951 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) +2952 +2953 def _parse_generated_as_identity(self) -> exp.Expression: +2954 if self._match(TokenType.BY_DEFAULT): +2955 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False) +2956 else: +2957 self._match_text_seq("ALWAYS") +2958 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 2959 -2960 if self._match(TokenType.CONSTRAINT): -2961 this = self._parse_id_var() -2962 -2963 if self._match_texts(self.CONSTRAINT_PARSERS): -2964 return self.expression( -2965 exp.ColumnConstraint, -2966 this=this, -2967 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), -2968 ) -2969 -2970 return this -2971 -2972 def _parse_constraint(self) -> t.Optional[exp.Expression]: -2973 if not self._match(TokenType.CONSTRAINT): -2974 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) +2960 self._match_text_seq("AS", "IDENTITY") +2961 if self._match(TokenType.L_PAREN): +2962 if self._match_text_seq("START", "WITH"): +2963 this.set("start", self._parse_bitwise()) +2964 if self._match_text_seq("INCREMENT", "BY"): +2965 this.set("increment", self._parse_bitwise()) +2966 if self._match_text_seq("MINVALUE"): +2967 this.set("minvalue", self._parse_bitwise()) +2968 if self._match_text_seq("MAXVALUE"): +2969 this.set("maxvalue", self._parse_bitwise()) +2970 +2971 if self._match_text_seq("CYCLE"): +2972 this.set("cycle", True) +2973 elif self._match_text_seq("NO", "CYCLE"): +2974 this.set("cycle", False) 2975 -2976 this = self._parse_id_var() -2977 expressions = [] -2978 -2979 while True: -2980 constraint = self._parse_unnamed_constraint() or self._parse_function() -2981 if not constraint: -2982 break -2983 expressions.append(constraint) -2984 -2985 return self.expression(exp.Constraint, this=this, expressions=expressions) -2986 -2987 def _parse_unnamed_constraint( -2988 self, constraints: t.Optional[t.Collection[str]] = None -2989 ) -> t.Optional[exp.Expression]: -2990 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): -2991 return None -2992 -2993 constraint = self._prev.text.upper() -2994 if constraint not in self.CONSTRAINT_PARSERS: -2995 self.raise_error(f"No parser found for schema constraint {constraint}.") -2996 -2997 return self.CONSTRAINT_PARSERS[constraint](self) +2976 self._match_r_paren() +2977 +2978 return this +2979 +2980 def _parse_inline(self) -> t.Optional[exp.Expression]: +2981 self._match_text_seq("LENGTH") +2982 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) +2983 +2984 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: +2985 if self._match_text_seq("NULL"): +2986 return self.expression(exp.NotNullColumnConstraint) +2987 if self._match_text_seq("CASESPECIFIC"): +2988 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) +2989 return None +2990 +2991 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: +2992 this = self._parse_references() +2993 if this: +2994 return this +2995 +2996 if self._match(TokenType.CONSTRAINT): +2997 this = self._parse_id_var() 2998 -2999 def _parse_unique(self) -> exp.Expression: -3000 if not self._match(TokenType.L_PAREN, advance=False): -3001 return self.expression(exp.UniqueColumnConstraint) -3002 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) -3003 -3004 def _parse_key_constraint_options(self) -> t.List[str]: -3005 options = [] -3006 while True: -3007 if not self._curr: -3008 break -3009 -3010 if self._match(TokenType.ON): -3011 action = None -3012 on = self._advance_any() and self._prev.text -3013 -3014 if self._match(TokenType.NO_ACTION): -3015 action = "NO ACTION" -3016 elif self._match(TokenType.CASCADE): -3017 action = "CASCADE" -3018 elif self._match_pair(TokenType.SET, TokenType.NULL): -3019 action = "SET NULL" -3020 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): -3021 action = "SET DEFAULT" -3022 else: -3023 self.raise_error("Invalid key constraint") -3024 -3025 options.append(f"ON {on} {action}") -3026 elif self._match_text_seq("NOT", "ENFORCED"): -3027 options.append("NOT ENFORCED") -3028 elif self._match_text_seq("DEFERRABLE"): -3029 options.append("DEFERRABLE") -3030 elif self._match_text_seq("INITIALLY", "DEFERRED"): -3031 options.append("INITIALLY DEFERRED") -3032 elif self._match_text_seq("NORELY"): -3033 options.append("NORELY") -3034 elif self._match_text_seq("MATCH", "FULL"): -3035 options.append("MATCH FULL") -3036 else: -3037 break -3038 -3039 return options -3040 -3041 def _parse_references(self) -> t.Optional[exp.Expression]: -3042 if not self._match(TokenType.REFERENCES): -3043 return None -3044 -3045 expressions = None -3046 this = self._parse_id_var() -3047 -3048 if self._match(TokenType.L_PAREN, advance=False): -3049 expressions = self._parse_wrapped_id_vars() -3050 -3051 options = self._parse_key_constraint_options() -3052 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) -3053 -3054 def _parse_foreign_key(self) -> exp.Expression: -3055 expressions = self._parse_wrapped_id_vars() -3056 reference = self._parse_references() -3057 options = {} -3058 -3059 while self._match(TokenType.ON): -3060 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): -3061 self.raise_error("Expected DELETE or UPDATE") -3062 -3063 kind = self._prev.text.lower() -3064 -3065 if self._match(TokenType.NO_ACTION): -3066 action = "NO ACTION" -3067 elif self._match(TokenType.SET): -3068 self._match_set((TokenType.NULL, TokenType.DEFAULT)) -3069 action = "SET " + self._prev.text.upper() -3070 else: -3071 self._advance() -3072 action = self._prev.text.upper() -3073 -3074 options[kind] = action -3075 -3076 return self.expression( -3077 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore -3078 ) -3079 -3080 def _parse_primary_key(self) -> exp.Expression: -3081 desc = ( -3082 self._match_set((TokenType.ASC, TokenType.DESC)) -3083 and self._prev.token_type == TokenType.DESC -3084 ) -3085 -3086 if not self._match(TokenType.L_PAREN, advance=False): -3087 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) -3088 -3089 expressions = self._parse_wrapped_id_vars() -3090 options = self._parse_key_constraint_options() -3091 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) -3092 -3093 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: -3094 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): -3095 return this -3096 -3097 bracket_kind = self._prev.token_type -3098 expressions: t.List[t.Optional[exp.Expression]] -3099 -3100 if self._match(TokenType.COLON): -3101 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] -3102 else: -3103 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) -3104 -3105 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs -3106 if bracket_kind == TokenType.L_BRACE: -3107 this = self.expression(exp.Struct, expressions=expressions) -3108 elif not this or this.name.upper() == "ARRAY": -3109 this = self.expression(exp.Array, expressions=expressions) -3110 else: -3111 expressions = apply_index_offset(expressions, -self.index_offset) -3112 this = self.expression(exp.Bracket, this=this, expressions=expressions) -3113 -3114 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: -3115 self.raise_error("Expected ]") -3116 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: -3117 self.raise_error("Expected }") -3118 -3119 this.comments = self._prev_comments -3120 return self._parse_bracket(this) +2999 if self._match_texts(self.CONSTRAINT_PARSERS): +3000 return self.expression( +3001 exp.ColumnConstraint, +3002 this=this, +3003 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), +3004 ) +3005 +3006 return this +3007 +3008 def _parse_constraint(self) -> t.Optional[exp.Expression]: +3009 if not self._match(TokenType.CONSTRAINT): +3010 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) +3011 +3012 this = self._parse_id_var() +3013 expressions = [] +3014 +3015 while True: +3016 constraint = self._parse_unnamed_constraint() or self._parse_function() +3017 if not constraint: +3018 break +3019 expressions.append(constraint) +3020 +3021 return self.expression(exp.Constraint, this=this, expressions=expressions) +3022 +3023 def _parse_unnamed_constraint( +3024 self, constraints: t.Optional[t.Collection[str]] = None +3025 ) -> t.Optional[exp.Expression]: +3026 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): +3027 return None +3028 +3029 constraint = self._prev.text.upper() +3030 if constraint not in self.CONSTRAINT_PARSERS: +3031 self.raise_error(f"No parser found for schema constraint {constraint}.") +3032 +3033 return self.CONSTRAINT_PARSERS[constraint](self) +3034 +3035 def _parse_unique(self) -> exp.Expression: +3036 if not self._match(TokenType.L_PAREN, advance=False): +3037 return self.expression(exp.UniqueColumnConstraint) +3038 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) +3039 +3040 def _parse_key_constraint_options(self) -> t.List[str]: +3041 options = [] +3042 while True: +3043 if not self._curr: +3044 break +3045 +3046 if self._match(TokenType.ON): +3047 action = None +3048 on = self._advance_any() and self._prev.text +3049 +3050 if self._match(TokenType.NO_ACTION): +3051 action = "NO ACTION" +3052 elif self._match(TokenType.CASCADE): +3053 action = "CASCADE" +3054 elif self._match_pair(TokenType.SET, TokenType.NULL): +3055 action = "SET NULL" +3056 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): +3057 action = "SET DEFAULT" +3058 else: +3059 self.raise_error("Invalid key constraint") +3060 +3061 options.append(f"ON {on} {action}") +3062 elif self._match_text_seq("NOT", "ENFORCED"): +3063 options.append("NOT ENFORCED") +3064 elif self._match_text_seq("DEFERRABLE"): +3065 options.append("DEFERRABLE") +3066 elif self._match_text_seq("INITIALLY", "DEFERRED"): +3067 options.append("INITIALLY DEFERRED") +3068 elif self._match_text_seq("NORELY"): +3069 options.append("NORELY") +3070 elif self._match_text_seq("MATCH", "FULL"): +3071 options.append("MATCH FULL") +3072 else: +3073 break +3074 +3075 return options +3076 +3077 def _parse_references(self) -> t.Optional[exp.Expression]: +3078 if not self._match(TokenType.REFERENCES): +3079 return None +3080 +3081 expressions = None +3082 this = self._parse_id_var() +3083 +3084 if self._match(TokenType.L_PAREN, advance=False): +3085 expressions = self._parse_wrapped_id_vars() +3086 +3087 options = self._parse_key_constraint_options() +3088 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) +3089 +3090 def _parse_foreign_key(self) -> exp.Expression: +3091 expressions = self._parse_wrapped_id_vars() +3092 reference = self._parse_references() +3093 options = {} +3094 +3095 while self._match(TokenType.ON): +3096 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): +3097 self.raise_error("Expected DELETE or UPDATE") +3098 +3099 kind = self._prev.text.lower() +3100 +3101 if self._match(TokenType.NO_ACTION): +3102 action = "NO ACTION" +3103 elif self._match(TokenType.SET): +3104 self._match_set((TokenType.NULL, TokenType.DEFAULT)) +3105 action = "SET " + self._prev.text.upper() +3106 else: +3107 self._advance() +3108 action = self._prev.text.upper() +3109 +3110 options[kind] = action +3111 +3112 return self.expression( +3113 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore +3114 ) +3115 +3116 def _parse_primary_key(self) -> exp.Expression: +3117 desc = ( +3118 self._match_set((TokenType.ASC, TokenType.DESC)) +3119 and self._prev.token_type == TokenType.DESC +3120 ) 3121 -3122 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: -3123 if self._match(TokenType.COLON): -3124 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) -3125 return this -3126 -3127 def _parse_case(self) -> t.Optional[exp.Expression]: -3128 ifs = [] -3129 default = None -3130 -3131 expression = self._parse_conjunction() +3122 if not self._match(TokenType.L_PAREN, advance=False): +3123 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) +3124 +3125 expressions = self._parse_wrapped_id_vars() +3126 options = self._parse_key_constraint_options() +3127 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) +3128 +3129 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: +3130 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): +3131 return this 3132 -3133 while self._match(TokenType.WHEN): -3134 this = self._parse_conjunction() -3135 self._match(TokenType.THEN) -3136 then = self._parse_conjunction() -3137 ifs.append(self.expression(exp.If, this=this, true=then)) -3138 -3139 if self._match(TokenType.ELSE): -3140 default = self._parse_conjunction() -3141 -3142 if not self._match(TokenType.END): -3143 self.raise_error("Expected END after CASE", self._prev) -3144 -3145 return self._parse_window( -3146 self.expression(exp.Case, this=expression, ifs=ifs, default=default) -3147 ) -3148 -3149 def _parse_if(self) -> t.Optional[exp.Expression]: -3150 if self._match(TokenType.L_PAREN): -3151 args = self._parse_csv(self._parse_conjunction) -3152 this = exp.If.from_arg_list(args) -3153 self.validate_expression(this, args) -3154 self._match_r_paren() -3155 else: -3156 condition = self._parse_conjunction() -3157 self._match(TokenType.THEN) -3158 true = self._parse_conjunction() -3159 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None -3160 self._match(TokenType.END) -3161 this = self.expression(exp.If, this=condition, true=true, false=false) +3133 bracket_kind = self._prev.token_type +3134 expressions: t.List[t.Optional[exp.Expression]] +3135 +3136 if self._match(TokenType.COLON): +3137 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] +3138 else: +3139 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) +3140 +3141 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs +3142 if bracket_kind == TokenType.L_BRACE: +3143 this = self.expression(exp.Struct, expressions=expressions) +3144 elif not this or this.name.upper() == "ARRAY": +3145 this = self.expression(exp.Array, expressions=expressions) +3146 else: +3147 expressions = apply_index_offset(expressions, -self.index_offset) +3148 this = self.expression(exp.Bracket, this=this, expressions=expressions) +3149 +3150 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: +3151 self.raise_error("Expected ]") +3152 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: +3153 self.raise_error("Expected }") +3154 +3155 this.comments = self._prev_comments +3156 return self._parse_bracket(this) +3157 +3158 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: +3159 if self._match(TokenType.COLON): +3160 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) +3161 return this 3162 -3163 return self._parse_window(this) -3164 -3165 def _parse_extract(self) -> exp.Expression: -3166 this = self._parse_function() or self._parse_var() or self._parse_type() -3167 -3168 if self._match(TokenType.FROM): -3169 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) -3170 -3171 if not self._match(TokenType.COMMA): -3172 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) -3173 -3174 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) -3175 -3176 def _parse_cast(self, strict: bool) -> exp.Expression: -3177 this = self._parse_conjunction() -3178 -3179 if not self._match(TokenType.ALIAS): -3180 self.raise_error("Expected AS after CAST") -3181 -3182 to = self._parse_types() -3183 -3184 if not to: -3185 self.raise_error("Expected TYPE after CAST") -3186 elif to.this == exp.DataType.Type.CHAR: -3187 if self._match(TokenType.CHARACTER_SET): -3188 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) -3189 -3190 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) -3191 -3192 def _parse_string_agg(self) -> exp.Expression: -3193 expression: t.Optional[exp.Expression] -3194 -3195 if self._match(TokenType.DISTINCT): -3196 args = self._parse_csv(self._parse_conjunction) -3197 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) -3198 else: -3199 args = self._parse_csv(self._parse_conjunction) -3200 expression = seq_get(args, 0) -3201 -3202 index = self._index -3203 if not self._match(TokenType.R_PAREN): -3204 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) -3205 order = self._parse_order(this=expression) -3206 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) -3207 -3208 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). -3209 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that -3210 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. -3211 if not self._match(TokenType.WITHIN_GROUP): -3212 self._retreat(index) -3213 this = exp.GroupConcat.from_arg_list(args) -3214 self.validate_expression(this, args) -3215 return this -3216 -3217 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) -3218 order = self._parse_order(this=expression) -3219 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) -3220 -3221 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: -3222 to: t.Optional[exp.Expression] -3223 this = self._parse_column() -3224 -3225 if self._match(TokenType.USING): -3226 to = self.expression(exp.CharacterSet, this=self._parse_var()) -3227 elif self._match(TokenType.COMMA): -3228 to = self._parse_types() -3229 else: -3230 to = None -3231 -3232 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) -3233 -3234 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: -3235 args = self._parse_csv(self._parse_bitwise) -3236 -3237 if self._match(TokenType.IN): -3238 return self.expression( -3239 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) -3240 ) -3241 -3242 if haystack_first: -3243 haystack = seq_get(args, 0) -3244 needle = seq_get(args, 1) -3245 else: -3246 needle = seq_get(args, 0) -3247 haystack = seq_get(args, 1) -3248 -3249 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) -3250 -3251 self.validate_expression(this, args) +3163 def _parse_case(self) -> t.Optional[exp.Expression]: +3164 ifs = [] +3165 default = None +3166 +3167 expression = self._parse_conjunction() +3168 +3169 while self._match(TokenType.WHEN): +3170 this = self._parse_conjunction() +3171 self._match(TokenType.THEN) +3172 then = self._parse_conjunction() +3173 ifs.append(self.expression(exp.If, this=this, true=then)) +3174 +3175 if self._match(TokenType.ELSE): +3176 default = self._parse_conjunction() +3177 +3178 if not self._match(TokenType.END): +3179 self.raise_error("Expected END after CASE", self._prev) +3180 +3181 return self._parse_window( +3182 self.expression(exp.Case, this=expression, ifs=ifs, default=default) +3183 ) +3184 +3185 def _parse_if(self) -> t.Optional[exp.Expression]: +3186 if self._match(TokenType.L_PAREN): +3187 args = self._parse_csv(self._parse_conjunction) +3188 this = exp.If.from_arg_list(args) +3189 self.validate_expression(this, args) +3190 self._match_r_paren() +3191 else: +3192 condition = self._parse_conjunction() +3193 self._match(TokenType.THEN) +3194 true = self._parse_conjunction() +3195 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None +3196 self._match(TokenType.END) +3197 this = self.expression(exp.If, this=condition, true=true, false=false) +3198 +3199 return self._parse_window(this) +3200 +3201 def _parse_extract(self) -> exp.Expression: +3202 this = self._parse_function() or self._parse_var() or self._parse_type() +3203 +3204 if self._match(TokenType.FROM): +3205 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) +3206 +3207 if not self._match(TokenType.COMMA): +3208 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) +3209 +3210 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) +3211 +3212 def _parse_cast(self, strict: bool) -> exp.Expression: +3213 this = self._parse_conjunction() +3214 +3215 if not self._match(TokenType.ALIAS): +3216 self.raise_error("Expected AS after CAST") +3217 +3218 to = self._parse_types() +3219 +3220 if not to: +3221 self.raise_error("Expected TYPE after CAST") +3222 elif to.this == exp.DataType.Type.CHAR: +3223 if self._match(TokenType.CHARACTER_SET): +3224 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) +3225 +3226 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) +3227 +3228 def _parse_string_agg(self) -> exp.Expression: +3229 expression: t.Optional[exp.Expression] +3230 +3231 if self._match(TokenType.DISTINCT): +3232 args = self._parse_csv(self._parse_conjunction) +3233 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) +3234 else: +3235 args = self._parse_csv(self._parse_conjunction) +3236 expression = seq_get(args, 0) +3237 +3238 index = self._index +3239 if not self._match(TokenType.R_PAREN): +3240 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) +3241 order = self._parse_order(this=expression) +3242 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) +3243 +3244 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). +3245 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that +3246 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. +3247 if not self._match(TokenType.WITHIN_GROUP): +3248 self._retreat(index) +3249 this = exp.GroupConcat.from_arg_list(args) +3250 self.validate_expression(this, args) +3251 return this 3252 -3253 return this -3254 -3255 def _parse_join_hint(self, func_name: str) -> exp.Expression: -3256 args = self._parse_csv(self._parse_table) -3257 return exp.JoinHint(this=func_name.upper(), expressions=args) -3258 -3259 def _parse_substring(self) -> exp.Expression: -3260 # Postgres supports the form: substring(string [from int] [for int]) -3261 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 -3262 -3263 args = self._parse_csv(self._parse_bitwise) -3264 -3265 if self._match(TokenType.FROM): -3266 args.append(self._parse_bitwise()) -3267 if self._match(TokenType.FOR): -3268 args.append(self._parse_bitwise()) +3253 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) +3254 order = self._parse_order(this=expression) +3255 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) +3256 +3257 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: +3258 to: t.Optional[exp.Expression] +3259 this = self._parse_column() +3260 +3261 if self._match(TokenType.USING): +3262 to = self.expression(exp.CharacterSet, this=self._parse_var()) +3263 elif self._match(TokenType.COMMA): +3264 to = self._parse_types() +3265 else: +3266 to = None +3267 +3268 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3269 -3270 this = exp.Substring.from_arg_list(args) -3271 self.validate_expression(this, args) +3270 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: +3271 args = self._parse_csv(self._parse_bitwise) 3272 -3273 return this -3274 -3275 def _parse_trim(self) -> exp.Expression: -3276 # https://www.w3resource.com/sql/character-functions/trim.php -3277 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html -3278 -3279 position = None -3280 collation = None -3281 -3282 if self._match_set(self.TRIM_TYPES): -3283 position = self._prev.text.upper() +3273 if self._match(TokenType.IN): +3274 return self.expression( +3275 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) +3276 ) +3277 +3278 if haystack_first: +3279 haystack = seq_get(args, 0) +3280 needle = seq_get(args, 1) +3281 else: +3282 needle = seq_get(args, 0) +3283 haystack = seq_get(args, 1) 3284 -3285 expression = self._parse_term() -3286 if self._match_set((TokenType.FROM, TokenType.COMMA)): -3287 this = self._parse_term() -3288 else: -3289 this = expression -3290 expression = None -3291 -3292 if self._match(TokenType.COLLATE): -3293 collation = self._parse_term() +3285 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) +3286 +3287 self.validate_expression(this, args) +3288 +3289 return this +3290 +3291 def _parse_join_hint(self, func_name: str) -> exp.Expression: +3292 args = self._parse_csv(self._parse_table) +3293 return exp.JoinHint(this=func_name.upper(), expressions=args) 3294 -3295 return self.expression( -3296 exp.Trim, -3297 this=this, -3298 position=position, -3299 expression=expression, -3300 collation=collation, -3301 ) -3302 -3303 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: -3304 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) +3295 def _parse_substring(self) -> exp.Expression: +3296 # Postgres supports the form: substring(string [from int] [for int]) +3297 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 +3298 +3299 args = self._parse_csv(self._parse_bitwise) +3300 +3301 if self._match(TokenType.FROM): +3302 args.append(self._parse_bitwise()) +3303 if self._match(TokenType.FOR): +3304 args.append(self._parse_bitwise()) 3305 -3306 def _parse_named_window(self) -> t.Optional[exp.Expression]: -3307 return self._parse_window(self._parse_id_var(), alias=True) +3306 this = exp.Substring.from_arg_list(args) +3307 self.validate_expression(this, args) 3308 -3309 def _parse_window( -3310 self, this: t.Optional[exp.Expression], alias: bool = False -3311 ) -> t.Optional[exp.Expression]: -3312 if self._match(TokenType.FILTER): -3313 where = self._parse_wrapped(self._parse_where) -3314 this = self.expression(exp.Filter, this=this, expression=where) -3315 -3316 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. -3317 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 -3318 if self._match(TokenType.WITHIN_GROUP): -3319 order = self._parse_wrapped(self._parse_order) -3320 this = self.expression(exp.WithinGroup, this=this, expression=order) -3321 -3322 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER -3323 # Some dialects choose to implement and some do not. -3324 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html -3325 -3326 # There is some code above in _parse_lambda that handles -3327 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... -3328 -3329 # The below changes handle -3330 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... -3331 -3332 # Oracle allows both formats -3333 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) -3334 # and Snowflake chose to do the same for familiarity -3335 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes -3336 if self._match(TokenType.IGNORE_NULLS): -3337 this = self.expression(exp.IgnoreNulls, this=this) -3338 elif self._match(TokenType.RESPECT_NULLS): -3339 this = self.expression(exp.RespectNulls, this=this) -3340 -3341 # bigquery select from window x AS (partition by ...) -3342 if alias: -3343 self._match(TokenType.ALIAS) -3344 elif not self._match(TokenType.OVER): -3345 return this -3346 -3347 if not self._match(TokenType.L_PAREN): -3348 return self.expression(exp.Window, this=this, alias=self._parse_id_var(False)) -3349 -3350 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) -3351 partition = self._parse_partition_by() -3352 order = self._parse_order() -3353 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text -3354 -3355 if kind: -3356 self._match(TokenType.BETWEEN) -3357 start = self._parse_window_spec() -3358 self._match(TokenType.AND) -3359 end = self._parse_window_spec() -3360 -3361 spec = self.expression( -3362 exp.WindowSpec, -3363 kind=kind, -3364 start=start["value"], -3365 start_side=start["side"], -3366 end=end["value"], -3367 end_side=end["side"], -3368 ) -3369 else: -3370 spec = None -3371 -3372 self._match_r_paren() -3373 -3374 return self.expression( -3375 exp.Window, -3376 this=this, -3377 partition_by=partition, -3378 order=order, -3379 spec=spec, -3380 alias=window_alias, -3381 ) +3309 return this +3310 +3311 def _parse_trim(self) -> exp.Expression: +3312 # https://www.w3resource.com/sql/character-functions/trim.php +3313 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html +3314 +3315 position = None +3316 collation = None +3317 +3318 if self._match_set(self.TRIM_TYPES): +3319 position = self._prev.text.upper() +3320 +3321 expression = self._parse_term() +3322 if self._match_set((TokenType.FROM, TokenType.COMMA)): +3323 this = self._parse_term() +3324 else: +3325 this = expression +3326 expression = None +3327 +3328 if self._match(TokenType.COLLATE): +3329 collation = self._parse_term() +3330 +3331 return self.expression( +3332 exp.Trim, +3333 this=this, +3334 position=position, +3335 expression=expression, +3336 collation=collation, +3337 ) +3338 +3339 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: +3340 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) +3341 +3342 def _parse_named_window(self) -> t.Optional[exp.Expression]: +3343 return self._parse_window(self._parse_id_var(), alias=True) +3344 +3345 def _parse_window( +3346 self, this: t.Optional[exp.Expression], alias: bool = False +3347 ) -> t.Optional[exp.Expression]: +3348 if self._match(TokenType.FILTER): +3349 where = self._parse_wrapped(self._parse_where) +3350 this = self.expression(exp.Filter, this=this, expression=where) +3351 +3352 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. +3353 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 +3354 if self._match(TokenType.WITHIN_GROUP): +3355 order = self._parse_wrapped(self._parse_order) +3356 this = self.expression(exp.WithinGroup, this=this, expression=order) +3357 +3358 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER +3359 # Some dialects choose to implement and some do not. +3360 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html +3361 +3362 # There is some code above in _parse_lambda that handles +3363 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... +3364 +3365 # The below changes handle +3366 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... +3367 +3368 # Oracle allows both formats +3369 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) +3370 # and Snowflake chose to do the same for familiarity +3371 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes +3372 if self._match(TokenType.IGNORE_NULLS): +3373 this = self.expression(exp.IgnoreNulls, this=this) +3374 elif self._match(TokenType.RESPECT_NULLS): +3375 this = self.expression(exp.RespectNulls, this=this) +3376 +3377 # bigquery select from window x AS (partition by ...) +3378 if alias: +3379 self._match(TokenType.ALIAS) +3380 elif not self._match(TokenType.OVER): +3381 return this 3382 -3383 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: -3384 self._match(TokenType.BETWEEN) +3383 if not self._match(TokenType.L_PAREN): +3384 return self.expression(exp.Window, this=this, alias=self._parse_id_var(False)) 3385 -3386 return { -3387 "value": ( -3388 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text -3389 ) -3390 or self._parse_bitwise(), -3391 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, -3392 } -3393 -3394 def _parse_alias( -3395 self, this: t.Optional[exp.Expression], explicit: bool = False -3396 ) -> t.Optional[exp.Expression]: -3397 any_token = self._match(TokenType.ALIAS) -3398 -3399 if explicit and not any_token: -3400 return this -3401 -3402 if self._match(TokenType.L_PAREN): -3403 aliases = self.expression( -3404 exp.Aliases, -3405 this=this, -3406 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), -3407 ) -3408 self._match_r_paren(aliases) -3409 return aliases -3410 -3411 alias = self._parse_id_var(any_token) -3412 -3413 if alias: -3414 return self.expression(exp.Alias, this=this, alias=alias) -3415 -3416 return this -3417 -3418 def _parse_id_var( -3419 self, -3420 any_token: bool = True, -3421 tokens: t.Optional[t.Collection[TokenType]] = None, -3422 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, -3423 ) -> t.Optional[exp.Expression]: -3424 identifier = self._parse_identifier() -3425 -3426 if identifier: -3427 return identifier -3428 -3429 prefix = "" -3430 -3431 if prefix_tokens: -3432 while self._match_set(prefix_tokens): -3433 prefix += self._prev.text +3386 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) +3387 partition = self._parse_partition_by() +3388 order = self._parse_order() +3389 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text +3390 +3391 if kind: +3392 self._match(TokenType.BETWEEN) +3393 start = self._parse_window_spec() +3394 self._match(TokenType.AND) +3395 end = self._parse_window_spec() +3396 +3397 spec = self.expression( +3398 exp.WindowSpec, +3399 kind=kind, +3400 start=start["value"], +3401 start_side=start["side"], +3402 end=end["value"], +3403 end_side=end["side"], +3404 ) +3405 else: +3406 spec = None +3407 +3408 self._match_r_paren() +3409 +3410 return self.expression( +3411 exp.Window, +3412 this=this, +3413 partition_by=partition, +3414 order=order, +3415 spec=spec, +3416 alias=window_alias, +3417 ) +3418 +3419 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: +3420 self._match(TokenType.BETWEEN) +3421 +3422 return { +3423 "value": ( +3424 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text +3425 ) +3426 or self._parse_bitwise(), +3427 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, +3428 } +3429 +3430 def _parse_alias( +3431 self, this: t.Optional[exp.Expression], explicit: bool = False +3432 ) -> t.Optional[exp.Expression]: +3433 any_token = self._match(TokenType.ALIAS) 3434 -3435 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): -3436 quoted = self._prev.token_type == TokenType.STRING -3437 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) -3438 -3439 return None -3440 -3441 def _parse_string(self) -> t.Optional[exp.Expression]: -3442 if self._match(TokenType.STRING): -3443 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) -3444 return self._parse_placeholder() -3445 -3446 def _parse_number(self) -> t.Optional[exp.Expression]: -3447 if self._match(TokenType.NUMBER): -3448 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) -3449 return self._parse_placeholder() -3450 -3451 def _parse_identifier(self) -> t.Optional[exp.Expression]: -3452 if self._match(TokenType.IDENTIFIER): -3453 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) -3454 return self._parse_placeholder() -3455 -3456 def _parse_var(self, any_token: bool = False) -> t.Optional[exp.Expression]: -3457 if (any_token and self._advance_any()) or self._match(TokenType.VAR): -3458 return self.expression(exp.Var, this=self._prev.text) -3459 return self._parse_placeholder() -3460 -3461 def _advance_any(self) -> t.Optional[Token]: -3462 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: -3463 self._advance() -3464 return self._prev -3465 return None +3435 if explicit and not any_token: +3436 return this +3437 +3438 if self._match(TokenType.L_PAREN): +3439 aliases = self.expression( +3440 exp.Aliases, +3441 this=this, +3442 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), +3443 ) +3444 self._match_r_paren(aliases) +3445 return aliases +3446 +3447 alias = self._parse_id_var(any_token) +3448 +3449 if alias: +3450 return self.expression(exp.Alias, this=this, alias=alias) +3451 +3452 return this +3453 +3454 def _parse_id_var( +3455 self, +3456 any_token: bool = True, +3457 tokens: t.Optional[t.Collection[TokenType]] = None, +3458 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, +3459 ) -> t.Optional[exp.Expression]: +3460 identifier = self._parse_identifier() +3461 +3462 if identifier: +3463 return identifier +3464 +3465 prefix = "" 3466 -3467 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: -3468 return self._parse_var() or self._parse_string() -3469 -3470 def _parse_null(self) -> t.Optional[exp.Expression]: -3471 if self._match(TokenType.NULL): -3472 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) -3473 return None +3467 if prefix_tokens: +3468 while self._match_set(prefix_tokens): +3469 prefix += self._prev.text +3470 +3471 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): +3472 quoted = self._prev.token_type == TokenType.STRING +3473 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3474 -3475 def _parse_boolean(self) -> t.Optional[exp.Expression]: -3476 if self._match(TokenType.TRUE): -3477 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) -3478 if self._match(TokenType.FALSE): -3479 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) -3480 return None +3475 return None +3476 +3477 def _parse_string(self) -> t.Optional[exp.Expression]: +3478 if self._match(TokenType.STRING): +3479 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) +3480 return self._parse_placeholder() 3481 -3482 def _parse_star(self) -> t.Optional[exp.Expression]: -3483 if self._match(TokenType.STAR): -3484 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) -3485 return None +3482 def _parse_number(self) -> t.Optional[exp.Expression]: +3483 if self._match(TokenType.NUMBER): +3484 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) +3485 return self._parse_placeholder() 3486 -3487 def _parse_parameter(self) -> exp.Expression: -3488 wrapped = self._match(TokenType.L_BRACE) -3489 this = self._parse_var() or self._parse_primary() -3490 self._match(TokenType.R_BRACE) -3491 return self.expression(exp.Parameter, this=this, wrapped=wrapped) -3492 -3493 def _parse_placeholder(self) -> t.Optional[exp.Expression]: -3494 if self._match_set(self.PLACEHOLDER_PARSERS): -3495 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) -3496 if placeholder: -3497 return placeholder -3498 self._advance(-1) -3499 return None -3500 -3501 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: -3502 if not self._match(TokenType.EXCEPT): -3503 return None -3504 if self._match(TokenType.L_PAREN, advance=False): -3505 return self._parse_wrapped_csv(self._parse_column) -3506 return self._parse_csv(self._parse_column) -3507 -3508 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: -3509 if not self._match(TokenType.REPLACE): -3510 return None -3511 if self._match(TokenType.L_PAREN, advance=False): -3512 return self._parse_wrapped_csv(self._parse_expression) -3513 return self._parse_csv(self._parse_expression) -3514 -3515 def _parse_csv( -3516 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA -3517 ) -> t.List[t.Optional[exp.Expression]]: -3518 parse_result = parse_method() -3519 items = [parse_result] if parse_result is not None else [] -3520 -3521 while self._match(sep): -3522 if parse_result and self._prev_comments: -3523 parse_result.comments = self._prev_comments -3524 -3525 parse_result = parse_method() -3526 if parse_result is not None: -3527 items.append(parse_result) +3487 def _parse_identifier(self) -> t.Optional[exp.Expression]: +3488 if self._match(TokenType.IDENTIFIER): +3489 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) +3490 return self._parse_placeholder() +3491 +3492 def _parse_var(self, any_token: bool = False) -> t.Optional[exp.Expression]: +3493 if (any_token and self._advance_any()) or self._match(TokenType.VAR): +3494 return self.expression(exp.Var, this=self._prev.text) +3495 return self._parse_placeholder() +3496 +3497 def _advance_any(self) -> t.Optional[Token]: +3498 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: +3499 self._advance() +3500 return self._prev +3501 return None +3502 +3503 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: +3504 return self._parse_var() or self._parse_string() +3505 +3506 def _parse_null(self) -> t.Optional[exp.Expression]: +3507 if self._match(TokenType.NULL): +3508 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) +3509 return None +3510 +3511 def _parse_boolean(self) -> t.Optional[exp.Expression]: +3512 if self._match(TokenType.TRUE): +3513 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) +3514 if self._match(TokenType.FALSE): +3515 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) +3516 return None +3517 +3518 def _parse_star(self) -> t.Optional[exp.Expression]: +3519 if self._match(TokenType.STAR): +3520 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) +3521 return None +3522 +3523 def _parse_parameter(self) -> exp.Expression: +3524 wrapped = self._match(TokenType.L_BRACE) +3525 this = self._parse_var() or self._parse_primary() +3526 self._match(TokenType.R_BRACE) +3527 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 3528 -3529 return items -3530 -3531 def _parse_tokens( -3532 self, parse_method: t.Callable, expressions: t.Dict -3533 ) -> t.Optional[exp.Expression]: -3534 this = parse_method() -3535 -3536 while self._match_set(expressions): -3537 this = self.expression( -3538 expressions[self._prev.token_type], -3539 this=this, -3540 comments=self._prev_comments, -3541 expression=parse_method(), -3542 ) +3529 def _parse_placeholder(self) -> t.Optional[exp.Expression]: +3530 if self._match_set(self.PLACEHOLDER_PARSERS): +3531 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) +3532 if placeholder: +3533 return placeholder +3534 self._advance(-1) +3535 return None +3536 +3537 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: +3538 if not self._match(TokenType.EXCEPT): +3539 return None +3540 if self._match(TokenType.L_PAREN, advance=False): +3541 return self._parse_wrapped_csv(self._parse_column) +3542 return self._parse_csv(self._parse_column) 3543 -3544 return this -3545 -3546 def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]: -3547 return self._parse_wrapped_csv(self._parse_id_var) -3548 -3549 def _parse_wrapped_csv( -3550 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA -3551 ) -> t.List[t.Optional[exp.Expression]]: -3552 return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep)) -3553 -3554 def _parse_wrapped(self, parse_method: t.Callable) -> t.Any: -3555 self._match_l_paren() -3556 parse_result = parse_method() -3557 self._match_r_paren() -3558 return parse_result -3559 -3560 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: -3561 return self._parse_select() or self._parse_expression() -3562 -3563 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: -3564 return self._parse_set_operations( -3565 self._parse_select(nested=True, parse_subquery_alias=False) -3566 ) -3567 -3568 def _parse_transaction(self) -> exp.Expression: -3569 this = None -3570 if self._match_texts(self.TRANSACTION_KIND): -3571 this = self._prev.text -3572 -3573 self._match_texts({"TRANSACTION", "WORK"}) -3574 -3575 modes = [] -3576 while True: -3577 mode = [] -3578 while self._match(TokenType.VAR): -3579 mode.append(self._prev.text) -3580 -3581 if mode: -3582 modes.append(" ".join(mode)) -3583 if not self._match(TokenType.COMMA): -3584 break -3585 -3586 return self.expression(exp.Transaction, this=this, modes=modes) -3587 -3588 def _parse_commit_or_rollback(self) -> exp.Expression: -3589 chain = None -3590 savepoint = None -3591 is_rollback = self._prev.token_type == TokenType.ROLLBACK -3592 -3593 self._match_texts({"TRANSACTION", "WORK"}) -3594 -3595 if self._match_text_seq("TO"): -3596 self._match_text_seq("SAVEPOINT") -3597 savepoint = self._parse_id_var() +3544 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: +3545 if not self._match(TokenType.REPLACE): +3546 return None +3547 if self._match(TokenType.L_PAREN, advance=False): +3548 return self._parse_wrapped_csv(self._parse_expression) +3549 return self._parse_csv(self._parse_expression) +3550 +3551 def _parse_csv( +3552 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA +3553 ) -> t.List[t.Optional[exp.Expression]]: +3554 parse_result = parse_method() +3555 items = [parse_result] if parse_result is not None else [] +3556 +3557 while self._match(sep): +3558 if parse_result and self._prev_comments: +3559 parse_result.comments = self._prev_comments +3560 +3561 parse_result = parse_method() +3562 if parse_result is not None: +3563 items.append(parse_result) +3564 +3565 return items +3566 +3567 def _parse_tokens( +3568 self, parse_method: t.Callable, expressions: t.Dict +3569 ) -> t.Optional[exp.Expression]: +3570 this = parse_method() +3571 +3572 while self._match_set(expressions): +3573 this = self.expression( +3574 expressions[self._prev.token_type], +3575 this=this, +3576 comments=self._prev_comments, +3577 expression=parse_method(), +3578 ) +3579 +3580 return this +3581 +3582 def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]: +3583 return self._parse_wrapped_csv(self._parse_id_var) +3584 +3585 def _parse_wrapped_csv( +3586 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA +3587 ) -> t.List[t.Optional[exp.Expression]]: +3588 return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep)) +3589 +3590 def _parse_wrapped(self, parse_method: t.Callable) -> t.Any: +3591 self._match_l_paren() +3592 parse_result = parse_method() +3593 self._match_r_paren() +3594 return parse_result +3595 +3596 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: +3597 return self._parse_select() or self._parse_expression() 3598 -3599 if self._match(TokenType.AND): -3600 chain = not self._match_text_seq("NO") -3601 self._match_text_seq("CHAIN") -3602 -3603 if is_rollback: -3604 return self.expression(exp.Rollback, savepoint=savepoint) -3605 return self.expression(exp.Commit, chain=chain) -3606 -3607 def _parse_add_column(self) -> t.Optional[exp.Expression]: -3608 if not self._match_text_seq("ADD"): -3609 return None +3599 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: +3600 return self._parse_set_operations( +3601 self._parse_select(nested=True, parse_subquery_alias=False) +3602 ) +3603 +3604 def _parse_transaction(self) -> exp.Expression: +3605 this = None +3606 if self._match_texts(self.TRANSACTION_KIND): +3607 this = self._prev.text +3608 +3609 self._match_texts({"TRANSACTION", "WORK"}) 3610 -3611 self._match(TokenType.COLUMN) -3612 exists_column = self._parse_exists(not_=True) -3613 expression = self._parse_column_def(self._parse_field(any_token=True)) -3614 -3615 if expression: -3616 expression.set("exists", exists_column) -3617 -3618 return expression -3619 -3620 def _parse_drop_column(self) -> t.Optional[exp.Expression]: -3621 return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN") -3622 -3623 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html -3624 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: -3625 return self.expression( -3626 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists -3627 ) +3611 modes = [] +3612 while True: +3613 mode = [] +3614 while self._match(TokenType.VAR): +3615 mode.append(self._prev.text) +3616 +3617 if mode: +3618 modes.append(" ".join(mode)) +3619 if not self._match(TokenType.COMMA): +3620 break +3621 +3622 return self.expression(exp.Transaction, this=this, modes=modes) +3623 +3624 def _parse_commit_or_rollback(self) -> exp.Expression: +3625 chain = None +3626 savepoint = None +3627 is_rollback = self._prev.token_type == TokenType.ROLLBACK 3628 -3629 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: -3630 this = None -3631 kind = self._prev.token_type -3632 -3633 if kind == TokenType.CONSTRAINT: -3634 this = self._parse_id_var() -3635 -3636 if self._match_text_seq("CHECK"): -3637 expression = self._parse_wrapped(self._parse_conjunction) -3638 enforced = self._match_text_seq("ENFORCED") -3639 -3640 return self.expression( -3641 exp.AddConstraint, this=this, expression=expression, enforced=enforced -3642 ) -3643 -3644 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): -3645 expression = self._parse_foreign_key() -3646 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): -3647 expression = self._parse_primary_key() -3648 -3649 return self.expression(exp.AddConstraint, this=this, expression=expression) +3629 self._match_texts({"TRANSACTION", "WORK"}) +3630 +3631 if self._match_text_seq("TO"): +3632 self._match_text_seq("SAVEPOINT") +3633 savepoint = self._parse_id_var() +3634 +3635 if self._match(TokenType.AND): +3636 chain = not self._match_text_seq("NO") +3637 self._match_text_seq("CHAIN") +3638 +3639 if is_rollback: +3640 return self.expression(exp.Rollback, savepoint=savepoint) +3641 return self.expression(exp.Commit, chain=chain) +3642 +3643 def _parse_add_column(self) -> t.Optional[exp.Expression]: +3644 if not self._match_text_seq("ADD"): +3645 return None +3646 +3647 self._match(TokenType.COLUMN) +3648 exists_column = self._parse_exists(not_=True) +3649 expression = self._parse_column_def(self._parse_field(any_token=True)) 3650 -3651 def _parse_alter(self) -> t.Optional[exp.Expression]: -3652 if not self._match(TokenType.TABLE): -3653 return self._parse_as_command(self._prev) -3654 -3655 exists = self._parse_exists() -3656 this = self._parse_table(schema=True) -3657 -3658 actions: t.Optional[exp.Expression | t.List[t.Optional[exp.Expression]]] = None -3659 -3660 index = self._index -3661 if self._match(TokenType.DELETE): -3662 actions = [self.expression(exp.Delete, where=self._parse_where())] -3663 elif self._match_text_seq("ADD"): -3664 if self._match_set(self.ADD_CONSTRAINT_TOKENS): -3665 actions = self._parse_csv(self._parse_add_constraint) -3666 else: -3667 self._retreat(index) -3668 actions = self._parse_csv(self._parse_add_column) -3669 elif self._match_text_seq("DROP"): -3670 partition_exists = self._parse_exists() +3651 if expression: +3652 expression.set("exists", exists_column) +3653 +3654 return expression +3655 +3656 def _parse_drop_column(self) -> t.Optional[exp.Expression]: +3657 return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN") +3658 +3659 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html +3660 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: +3661 return self.expression( +3662 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists +3663 ) +3664 +3665 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: +3666 this = None +3667 kind = self._prev.token_type +3668 +3669 if kind == TokenType.CONSTRAINT: +3670 this = self._parse_id_var() 3671 -3672 if self._match(TokenType.PARTITION, advance=False): -3673 actions = self._parse_csv( -3674 lambda: self._parse_drop_partition(exists=partition_exists) -3675 ) -3676 else: -3677 self._retreat(index) -3678 actions = self._parse_csv(self._parse_drop_column) -3679 elif self._match_text_seq("RENAME", "TO"): -3680 actions = self.expression(exp.RenameTable, this=self._parse_table(schema=True)) -3681 elif self._match_text_seq("ALTER"): -3682 self._match(TokenType.COLUMN) -3683 column = self._parse_field(any_token=True) +3672 if self._match_text_seq("CHECK"): +3673 expression = self._parse_wrapped(self._parse_conjunction) +3674 enforced = self._match_text_seq("ENFORCED") +3675 +3676 return self.expression( +3677 exp.AddConstraint, this=this, expression=expression, enforced=enforced +3678 ) +3679 +3680 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): +3681 expression = self._parse_foreign_key() +3682 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): +3683 expression = self._parse_primary_key() 3684 -3685 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): -3686 actions = self.expression(exp.AlterColumn, this=column, drop=True) -3687 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): -3688 actions = self.expression( -3689 exp.AlterColumn, this=column, default=self._parse_conjunction() -3690 ) -3691 else: -3692 self._match_text_seq("SET", "DATA") -3693 actions = self.expression( -3694 exp.AlterColumn, -3695 this=column, -3696 dtype=self._match_text_seq("TYPE") and self._parse_types(), -3697 collate=self._match(TokenType.COLLATE) and self._parse_term(), -3698 using=self._match(TokenType.USING) and self._parse_conjunction(), -3699 ) -3700 -3701 actions = ensure_list(actions) -3702 return self.expression(exp.AlterTable, this=this, exists=exists, actions=actions) -3703 -3704 def _parse_show(self) -> t.Optional[exp.Expression]: -3705 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore -3706 if parser: -3707 return parser(self) -3708 self._advance() -3709 return self.expression(exp.Show, this=self._prev.text.upper()) -3710 -3711 def _default_parse_set_item(self) -> exp.Expression: -3712 return self.expression( -3713 exp.SetItem, -3714 this=self._parse_statement(), -3715 ) +3685 return self.expression(exp.AddConstraint, this=this, expression=expression) +3686 +3687 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: +3688 index = self._index - 1 +3689 +3690 if self._match_set(self.ADD_CONSTRAINT_TOKENS): +3691 return self._parse_csv(self._parse_add_constraint) +3692 +3693 self._retreat(index) +3694 return self._parse_csv(self._parse_add_column) +3695 +3696 def _parse_alter_table_alter(self) -> exp.Expression: +3697 self._match(TokenType.COLUMN) +3698 column = self._parse_field(any_token=True) +3699 +3700 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): +3701 return self.expression(exp.AlterColumn, this=column, drop=True) +3702 if self._match_pair(TokenType.SET, TokenType.DEFAULT): +3703 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) +3704 +3705 self._match_text_seq("SET", "DATA") +3706 return self.expression( +3707 exp.AlterColumn, +3708 this=column, +3709 dtype=self._match_text_seq("TYPE") and self._parse_types(), +3710 collate=self._match(TokenType.COLLATE) and self._parse_term(), +3711 using=self._match(TokenType.USING) and self._parse_conjunction(), +3712 ) +3713 +3714 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: +3715 index = self._index - 1 3716 -3717 def _parse_set_item(self) -> t.Optional[exp.Expression]: -3718 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore -3719 return parser(self) if parser else self._default_parse_set_item() +3717 partition_exists = self._parse_exists() +3718 if self._match(TokenType.PARTITION, advance=False): +3719 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 3720 -3721 def _parse_merge(self) -> exp.Expression: -3722 self._match(TokenType.INTO) -3723 target = self._parse_table() -3724 -3725 self._match(TokenType.USING) -3726 using = self._parse_table() +3721 self._retreat(index) +3722 return self._parse_csv(self._parse_drop_column) +3723 +3724 def _parse_alter_table_rename(self) -> exp.Expression: +3725 self._match_text_seq("TO") +3726 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 3727 -3728 self._match(TokenType.ON) -3729 on = self._parse_conjunction() -3730 -3731 whens = [] -3732 while self._match(TokenType.WHEN): -3733 this = self._parse_conjunction() -3734 self._match(TokenType.THEN) -3735 -3736 if self._match(TokenType.INSERT): -3737 _this = self._parse_star() -3738 if _this: -3739 then = self.expression(exp.Insert, this=_this) -3740 else: -3741 then = self.expression( -3742 exp.Insert, -3743 this=self._parse_value(), -3744 expression=self._match(TokenType.VALUES) and self._parse_value(), -3745 ) -3746 elif self._match(TokenType.UPDATE): -3747 expressions = self._parse_star() -3748 if expressions: -3749 then = self.expression(exp.Update, expressions=expressions) -3750 else: -3751 then = self.expression( -3752 exp.Update, -3753 expressions=self._match(TokenType.SET) -3754 and self._parse_csv(self._parse_equality), -3755 ) -3756 elif self._match(TokenType.DELETE): -3757 then = self.expression(exp.Var, this=self._prev.text) -3758 -3759 whens.append(self.expression(exp.When, this=this, then=then)) -3760 -3761 return self.expression( -3762 exp.Merge, -3763 this=target, -3764 using=using, -3765 on=on, -3766 expressions=whens, -3767 ) -3768 -3769 def _parse_set(self) -> exp.Expression: -3770 return self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) -3771 -3772 def _parse_as_command(self, start: Token) -> exp.Command: -3773 while self._curr: -3774 self._advance() -3775 return exp.Command(this=self._find_sql(start, self._prev)) -3776 -3777 def _find_parser( -3778 self, parsers: t.Dict[str, t.Callable], trie: t.Dict -3779 ) -> t.Optional[t.Callable]: -3780 index = self._index -3781 this = [] -3782 while True: -3783 # The current token might be multiple words -3784 curr = self._curr.text.upper() -3785 key = curr.split(" ") -3786 this.append(curr) -3787 self._advance() -3788 result, trie = in_trie(trie, key) -3789 if result == 0: -3790 break -3791 if result == 2: -3792 subparser = parsers[" ".join(this)] -3793 return subparser -3794 self._retreat(index) -3795 return None -3796 -3797 def _match(self, token_type, advance=True): -3798 if not self._curr: -3799 return None -3800 -3801 if self._curr.token_type == token_type: -3802 if advance: -3803 self._advance() -3804 return True -3805 -3806 return None +3728 def _parse_alter(self) -> t.Optional[exp.Expression]: +3729 if not self._match(TokenType.TABLE): +3730 return self._parse_as_command(self._prev) +3731 +3732 exists = self._parse_exists() +3733 this = self._parse_table(schema=True) +3734 +3735 if not self._curr: +3736 return None +3737 +3738 parser = self.ALTER_PARSERS.get(self._curr.text.upper()) +3739 actions = ensure_list(self._advance() or parser(self)) if parser else [] # type: ignore +3740 +3741 return self.expression(exp.AlterTable, this=this, exists=exists, actions=actions) +3742 +3743 def _parse_show(self) -> t.Optional[exp.Expression]: +3744 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore +3745 if parser: +3746 return parser(self) +3747 self._advance() +3748 return self.expression(exp.Show, this=self._prev.text.upper()) +3749 +3750 def _default_parse_set_item(self) -> exp.Expression: +3751 return self.expression( +3752 exp.SetItem, +3753 this=self._parse_statement(), +3754 ) +3755 +3756 def _parse_set_item(self) -> t.Optional[exp.Expression]: +3757 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore +3758 return parser(self) if parser else self._default_parse_set_item() +3759 +3760 def _parse_merge(self) -> exp.Expression: +3761 self._match(TokenType.INTO) +3762 target = self._parse_table() +3763 +3764 self._match(TokenType.USING) +3765 using = self._parse_table() +3766 +3767 self._match(TokenType.ON) +3768 on = self._parse_conjunction() +3769 +3770 whens = [] +3771 while self._match(TokenType.WHEN): +3772 this = self._parse_conjunction() +3773 self._match(TokenType.THEN) +3774 +3775 if self._match(TokenType.INSERT): +3776 _this = self._parse_star() +3777 if _this: +3778 then = self.expression(exp.Insert, this=_this) +3779 else: +3780 then = self.expression( +3781 exp.Insert, +3782 this=self._parse_value(), +3783 expression=self._match(TokenType.VALUES) and self._parse_value(), +3784 ) +3785 elif self._match(TokenType.UPDATE): +3786 expressions = self._parse_star() +3787 if expressions: +3788 then = self.expression(exp.Update, expressions=expressions) +3789 else: +3790 then = self.expression( +3791 exp.Update, +3792 expressions=self._match(TokenType.SET) +3793 and self._parse_csv(self._parse_equality), +3794 ) +3795 elif self._match(TokenType.DELETE): +3796 then = self.expression(exp.Var, this=self._prev.text) +3797 +3798 whens.append(self.expression(exp.When, this=this, then=then)) +3799 +3800 return self.expression( +3801 exp.Merge, +3802 this=target, +3803 using=using, +3804 on=on, +3805 expressions=whens, +3806 ) 3807 -3808 def _match_set(self, types, advance=True): -3809 if not self._curr: -3810 return None -3811 -3812 if self._curr.token_type in types: -3813 if advance: -3814 self._advance() -3815 return True -3816 -3817 return None -3818 -3819 def _match_pair(self, token_type_a, token_type_b, advance=True): -3820 if not self._curr or not self._next: -3821 return None -3822 -3823 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: -3824 if advance: -3825 self._advance(2) -3826 return True -3827 -3828 return None -3829 -3830 def _match_l_paren(self, expression=None): -3831 if not self._match(TokenType.L_PAREN): -3832 self.raise_error("Expecting (") -3833 if expression and self._prev_comments: -3834 expression.comments = self._prev_comments -3835 -3836 def _match_r_paren(self, expression=None): -3837 if not self._match(TokenType.R_PAREN): -3838 self.raise_error("Expecting )") -3839 if expression and self._prev_comments: -3840 expression.comments = self._prev_comments +3808 def _parse_set(self) -> exp.Expression: +3809 return self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) +3810 +3811 def _parse_as_command(self, start: Token) -> exp.Command: +3812 while self._curr: +3813 self._advance() +3814 text = self._find_sql(start, self._prev) +3815 size = len(start.text) +3816 return exp.Command(this=text[:size], expression=text[size:]) +3817 +3818 def _find_parser( +3819 self, parsers: t.Dict[str, t.Callable], trie: t.Dict +3820 ) -> t.Optional[t.Callable]: +3821 index = self._index +3822 this = [] +3823 while True: +3824 # The current token might be multiple words +3825 curr = self._curr.text.upper() +3826 key = curr.split(" ") +3827 this.append(curr) +3828 self._advance() +3829 result, trie = in_trie(trie, key) +3830 if result == 0: +3831 break +3832 if result == 2: +3833 subparser = parsers[" ".join(this)] +3834 return subparser +3835 self._retreat(index) +3836 return None +3837 +3838 def _match(self, token_type, advance=True): +3839 if not self._curr: +3840 return None 3841 -3842 def _match_texts(self, texts, advance=True): -3843 if self._curr and self._curr.text.upper() in texts: -3844 if advance: -3845 self._advance() -3846 return True -3847 return False +3842 if self._curr.token_type == token_type: +3843 if advance: +3844 self._advance() +3845 return True +3846 +3847 return None 3848 -3849 def _match_text_seq(self, *texts, advance=True): -3850 index = self._index -3851 for text in texts: -3852 if self._curr and self._curr.text.upper() == text: -3853 self._advance() -3854 else: -3855 self._retreat(index) -3856 return False +3849 def _match_set(self, types, advance=True): +3850 if not self._curr: +3851 return None +3852 +3853 if self._curr.token_type in types: +3854 if advance: +3855 self._advance() +3856 return True 3857 -3858 if not advance: -3859 self._retreat(index) -3860 -3861 return True -3862 -3863 def _replace_columns_with_dots(self, this): -3864 if isinstance(this, exp.Dot): -3865 exp.replace_children(this, self._replace_columns_with_dots) -3866 elif isinstance(this, exp.Column): -3867 exp.replace_children(this, self._replace_columns_with_dots) -3868 table = this.args.get("table") -3869 this = ( -3870 self.expression(exp.Dot, this=table, expression=this.this) -3871 if table -3872 else self.expression(exp.Var, this=this.name) -3873 ) -3874 elif isinstance(this, exp.Identifier): -3875 this = self.expression(exp.Var, this=this.name) -3876 return this -3877 -3878 def _replace_lambda(self, node, lambda_variables): -3879 if isinstance(node, exp.Column): -3880 if node.name in lambda_variables: -3881 return node.this -3882 return node +3858 return None +3859 +3860 def _match_pair(self, token_type_a, token_type_b, advance=True): +3861 if not self._curr or not self._next: +3862 return None +3863 +3864 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: +3865 if advance: +3866 self._advance(2) +3867 return True +3868 +3869 return None +3870 +3871 def _match_l_paren(self, expression=None): +3872 if not self._match(TokenType.L_PAREN): +3873 self.raise_error("Expecting (") +3874 if expression and self._prev_comments: +3875 expression.comments = self._prev_comments +3876 +3877 def _match_r_paren(self, expression=None): +3878 if not self._match(TokenType.R_PAREN): +3879 self.raise_error("Expecting )") +3880 if expression and self._prev_comments: +3881 expression.comments = self._prev_comments +3882 +3883 def _match_texts(self, texts, advance=True): +3884 if self._curr and self._curr.text.upper() in texts: +3885 if advance: +3886 self._advance() +3887 return True +3888 return False +3889 +3890 def _match_text_seq(self, *texts, advance=True): +3891 index = self._index +3892 for text in texts: +3893 if self._curr and self._curr.text.upper() == text: +3894 self._advance() +3895 else: +3896 self._retreat(index) +3897 return False +3898 +3899 if not advance: +3900 self._retreat(index) +3901 +3902 return True +3903 +3904 def _replace_columns_with_dots(self, this): +3905 if isinstance(this, exp.Dot): +3906 exp.replace_children(this, self._replace_columns_with_dots) +3907 elif isinstance(this, exp.Column): +3908 exp.replace_children(this, self._replace_columns_with_dots) +3909 table = this.args.get("table") +3910 this = ( +3911 self.expression(exp.Dot, this=table, expression=this.this) +3912 if table +3913 else self.expression(exp.Var, this=this.name) +3914 ) +3915 elif isinstance(this, exp.Identifier): +3916 this = self.expression(exp.Var, this=this.name) +3917 return this +3918 +3919 def _replace_lambda(self, node, lambda_variables): +3920 if isinstance(node, exp.Column): +3921 if node.name in lambda_variables: +3922 return node.this +3923 return node @@ -4128,3729 +4169,3770 @@ 158 159 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 160 - 161 ID_VAR_TOKENS = { - 162 TokenType.VAR, - 163 TokenType.ANTI, - 164 TokenType.APPLY, - 165 TokenType.AUTO_INCREMENT, - 166 TokenType.BEGIN, - 167 TokenType.BOTH, - 168 TokenType.BUCKET, - 169 TokenType.CACHE, - 170 TokenType.CASCADE, - 171 TokenType.COLLATE, - 172 TokenType.COLUMN, - 173 TokenType.COMMAND, - 174 TokenType.COMMIT, - 175 TokenType.COMPOUND, - 176 TokenType.CONSTRAINT, - 177 TokenType.CURRENT_TIME, - 178 TokenType.DEFAULT, - 179 TokenType.DELETE, - 180 TokenType.DESCRIBE, - 181 TokenType.DIV, - 182 TokenType.END, - 183 TokenType.EXECUTE, - 184 TokenType.ESCAPE, - 185 TokenType.FALSE, - 186 TokenType.FIRST, - 187 TokenType.FILTER, - 188 TokenType.FOLLOWING, - 189 TokenType.FORMAT, - 190 TokenType.FUNCTION, - 191 TokenType.IF, - 192 TokenType.INDEX, - 193 TokenType.ISNULL, - 194 TokenType.INTERVAL, - 195 TokenType.LAZY, - 196 TokenType.LEADING, - 197 TokenType.LEFT, - 198 TokenType.LOCAL, - 199 TokenType.MATERIALIZED, - 200 TokenType.MERGE, - 201 TokenType.NATURAL, - 202 TokenType.NEXT, - 203 TokenType.OFFSET, - 204 TokenType.ONLY, - 205 TokenType.OPTIONS, - 206 TokenType.ORDINALITY, - 207 TokenType.PERCENT, - 208 TokenType.PIVOT, - 209 TokenType.PRECEDING, - 210 TokenType.RANGE, - 211 TokenType.REFERENCES, - 212 TokenType.RIGHT, - 213 TokenType.ROW, - 214 TokenType.ROWS, - 215 TokenType.SCHEMA, - 216 TokenType.SEED, - 217 TokenType.SEMI, - 218 TokenType.SET, - 219 TokenType.SHOW, - 220 TokenType.SORTKEY, - 221 TokenType.TABLE, - 222 TokenType.TEMPORARY, - 223 TokenType.TOP, - 224 TokenType.TRAILING, - 225 TokenType.TRUE, - 226 TokenType.UNBOUNDED, - 227 TokenType.UNIQUE, - 228 TokenType.UNLOGGED, - 229 TokenType.UNPIVOT, - 230 TokenType.PROCEDURE, - 231 TokenType.VIEW, - 232 TokenType.VOLATILE, - 233 TokenType.WINDOW, - 234 *SUBQUERY_PREDICATES, - 235 *TYPE_TOKENS, - 236 *NO_PAREN_FUNCTIONS, - 237 } - 238 - 239 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { - 240 TokenType.APPLY, - 241 TokenType.LEFT, - 242 TokenType.NATURAL, - 243 TokenType.OFFSET, - 244 TokenType.RIGHT, - 245 TokenType.WINDOW, - 246 } - 247 - 248 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} - 249 - 250 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} - 251 - 252 FUNC_TOKENS = { - 253 TokenType.COMMAND, - 254 TokenType.CURRENT_DATE, - 255 TokenType.CURRENT_DATETIME, - 256 TokenType.CURRENT_TIMESTAMP, - 257 TokenType.CURRENT_TIME, - 258 TokenType.FILTER, - 259 TokenType.FIRST, - 260 TokenType.FORMAT, - 261 TokenType.IDENTIFIER, - 262 TokenType.INDEX, - 263 TokenType.ISNULL, - 264 TokenType.ILIKE, - 265 TokenType.LIKE, - 266 TokenType.MERGE, - 267 TokenType.OFFSET, - 268 TokenType.PRIMARY_KEY, - 269 TokenType.REPLACE, - 270 TokenType.ROW, - 271 TokenType.UNNEST, - 272 TokenType.VAR, - 273 TokenType.LEFT, - 274 TokenType.RIGHT, - 275 TokenType.DATE, - 276 TokenType.DATETIME, - 277 TokenType.TABLE, - 278 TokenType.TIMESTAMP, - 279 TokenType.TIMESTAMPTZ, - 280 TokenType.WINDOW, - 281 *TYPE_TOKENS, - 282 *SUBQUERY_PREDICATES, - 283 } - 284 - 285 CONJUNCTION = { - 286 TokenType.AND: exp.And, - 287 TokenType.OR: exp.Or, - 288 } - 289 - 290 EQUALITY = { - 291 TokenType.EQ: exp.EQ, - 292 TokenType.NEQ: exp.NEQ, - 293 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, - 294 } - 295 - 296 COMPARISON = { - 297 TokenType.GT: exp.GT, - 298 TokenType.GTE: exp.GTE, - 299 TokenType.LT: exp.LT, - 300 TokenType.LTE: exp.LTE, - 301 } - 302 - 303 BITWISE = { - 304 TokenType.AMP: exp.BitwiseAnd, - 305 TokenType.CARET: exp.BitwiseXor, - 306 TokenType.PIPE: exp.BitwiseOr, - 307 TokenType.DPIPE: exp.DPipe, - 308 } - 309 - 310 TERM = { - 311 TokenType.DASH: exp.Sub, - 312 TokenType.PLUS: exp.Add, - 313 TokenType.MOD: exp.Mod, - 314 TokenType.COLLATE: exp.Collate, - 315 } - 316 - 317 FACTOR = { - 318 TokenType.DIV: exp.IntDiv, - 319 TokenType.LR_ARROW: exp.Distance, - 320 TokenType.SLASH: exp.Div, - 321 TokenType.STAR: exp.Mul, - 322 } - 323 - 324 TIMESTAMPS = { - 325 TokenType.TIME, - 326 TokenType.TIMESTAMP, - 327 TokenType.TIMESTAMPTZ, - 328 TokenType.TIMESTAMPLTZ, - 329 } - 330 - 331 SET_OPERATIONS = { - 332 TokenType.UNION, - 333 TokenType.INTERSECT, - 334 TokenType.EXCEPT, - 335 } - 336 - 337 JOIN_SIDES = { - 338 TokenType.LEFT, - 339 TokenType.RIGHT, - 340 TokenType.FULL, - 341 } - 342 - 343 JOIN_KINDS = { - 344 TokenType.INNER, - 345 TokenType.OUTER, - 346 TokenType.CROSS, - 347 TokenType.SEMI, - 348 TokenType.ANTI, - 349 } - 350 - 351 LAMBDAS = { - 352 TokenType.ARROW: lambda self, expressions: self.expression( - 353 exp.Lambda, - 354 this=self._parse_conjunction().transform( - 355 self._replace_lambda, {node.name for node in expressions} - 356 ), - 357 expressions=expressions, - 358 ), - 359 TokenType.FARROW: lambda self, expressions: self.expression( - 360 exp.Kwarg, - 361 this=exp.Var(this=expressions[0].name), - 362 expression=self._parse_conjunction(), - 363 ), - 364 } - 365 - 366 COLUMN_OPERATORS = { - 367 TokenType.DOT: None, - 368 TokenType.DCOLON: lambda self, this, to: self.expression( - 369 exp.Cast, - 370 this=this, - 371 to=to, - 372 ), - 373 TokenType.ARROW: lambda self, this, path: self.expression( - 374 exp.JSONExtract, - 375 this=this, - 376 expression=path, - 377 ), - 378 TokenType.DARROW: lambda self, this, path: self.expression( - 379 exp.JSONExtractScalar, + 161 DB_CREATABLES = { + 162 TokenType.DATABASE, + 163 TokenType.SCHEMA, + 164 TokenType.TABLE, + 165 TokenType.VIEW, + 166 } + 167 + 168 CREATABLES = { + 169 TokenType.COLUMN, + 170 TokenType.FUNCTION, + 171 TokenType.INDEX, + 172 TokenType.PROCEDURE, + 173 *DB_CREATABLES, + 174 } + 175 + 176 ID_VAR_TOKENS = { + 177 TokenType.VAR, + 178 TokenType.ANTI, + 179 TokenType.APPLY, + 180 TokenType.AUTO_INCREMENT, + 181 TokenType.BEGIN, + 182 TokenType.BOTH, + 183 TokenType.BUCKET, + 184 TokenType.CACHE, + 185 TokenType.CASCADE, + 186 TokenType.COLLATE, + 187 TokenType.COMMAND, + 188 TokenType.COMMENT, + 189 TokenType.COMMIT, + 190 TokenType.COMPOUND, + 191 TokenType.CONSTRAINT, + 192 TokenType.CURRENT_TIME, + 193 TokenType.DEFAULT, + 194 TokenType.DELETE, + 195 TokenType.DESCRIBE, + 196 TokenType.DIV, + 197 TokenType.END, + 198 TokenType.EXECUTE, + 199 TokenType.ESCAPE, + 200 TokenType.FALSE, + 201 TokenType.FIRST, + 202 TokenType.FILTER, + 203 TokenType.FOLLOWING, + 204 TokenType.FORMAT, + 205 TokenType.IF, + 206 TokenType.ISNULL, + 207 TokenType.INTERVAL, + 208 TokenType.LAZY, + 209 TokenType.LEADING, + 210 TokenType.LEFT, + 211 TokenType.LOCAL, + 212 TokenType.MATERIALIZED, + 213 TokenType.MERGE, + 214 TokenType.NATURAL, + 215 TokenType.NEXT, + 216 TokenType.OFFSET, + 217 TokenType.ONLY, + 218 TokenType.OPTIONS, + 219 TokenType.ORDINALITY, + 220 TokenType.PERCENT, + 221 TokenType.PIVOT, + 222 TokenType.PRECEDING, + 223 TokenType.RANGE, + 224 TokenType.REFERENCES, + 225 TokenType.RIGHT, + 226 TokenType.ROW, + 227 TokenType.ROWS, + 228 TokenType.SEED, + 229 TokenType.SEMI, + 230 TokenType.SET, + 231 TokenType.SHOW, + 232 TokenType.SORTKEY, + 233 TokenType.TEMPORARY, + 234 TokenType.TOP, + 235 TokenType.TRAILING, + 236 TokenType.TRUE, + 237 TokenType.UNBOUNDED, + 238 TokenType.UNIQUE, + 239 TokenType.UNLOGGED, + 240 TokenType.UNPIVOT, + 241 TokenType.VOLATILE, + 242 TokenType.WINDOW, + 243 *CREATABLES, + 244 *SUBQUERY_PREDICATES, + 245 *TYPE_TOKENS, + 246 *NO_PAREN_FUNCTIONS, + 247 } + 248 + 249 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { + 250 TokenType.APPLY, + 251 TokenType.LEFT, + 252 TokenType.NATURAL, + 253 TokenType.OFFSET, + 254 TokenType.RIGHT, + 255 TokenType.WINDOW, + 256 } + 257 + 258 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} + 259 + 260 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} + 261 + 262 FUNC_TOKENS = { + 263 TokenType.COMMAND, + 264 TokenType.CURRENT_DATE, + 265 TokenType.CURRENT_DATETIME, + 266 TokenType.CURRENT_TIMESTAMP, + 267 TokenType.CURRENT_TIME, + 268 TokenType.FILTER, + 269 TokenType.FIRST, + 270 TokenType.FORMAT, + 271 TokenType.IDENTIFIER, + 272 TokenType.INDEX, + 273 TokenType.ISNULL, + 274 TokenType.ILIKE, + 275 TokenType.LIKE, + 276 TokenType.MERGE, + 277 TokenType.OFFSET, + 278 TokenType.PRIMARY_KEY, + 279 TokenType.REPLACE, + 280 TokenType.ROW, + 281 TokenType.UNNEST, + 282 TokenType.VAR, + 283 TokenType.LEFT, + 284 TokenType.RIGHT, + 285 TokenType.DATE, + 286 TokenType.DATETIME, + 287 TokenType.TABLE, + 288 TokenType.TIMESTAMP, + 289 TokenType.TIMESTAMPTZ, + 290 TokenType.WINDOW, + 291 *TYPE_TOKENS, + 292 *SUBQUERY_PREDICATES, + 293 } + 294 + 295 CONJUNCTION = { + 296 TokenType.AND: exp.And, + 297 TokenType.OR: exp.Or, + 298 } + 299 + 300 EQUALITY = { + 301 TokenType.EQ: exp.EQ, + 302 TokenType.NEQ: exp.NEQ, + 303 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, + 304 } + 305 + 306 COMPARISON = { + 307 TokenType.GT: exp.GT, + 308 TokenType.GTE: exp.GTE, + 309 TokenType.LT: exp.LT, + 310 TokenType.LTE: exp.LTE, + 311 } + 312 + 313 BITWISE = { + 314 TokenType.AMP: exp.BitwiseAnd, + 315 TokenType.CARET: exp.BitwiseXor, + 316 TokenType.PIPE: exp.BitwiseOr, + 317 TokenType.DPIPE: exp.DPipe, + 318 } + 319 + 320 TERM = { + 321 TokenType.DASH: exp.Sub, + 322 TokenType.PLUS: exp.Add, + 323 TokenType.MOD: exp.Mod, + 324 TokenType.COLLATE: exp.Collate, + 325 } + 326 + 327 FACTOR = { + 328 TokenType.DIV: exp.IntDiv, + 329 TokenType.LR_ARROW: exp.Distance, + 330 TokenType.SLASH: exp.Div, + 331 TokenType.STAR: exp.Mul, + 332 } + 333 + 334 TIMESTAMPS = { + 335 TokenType.TIME, + 336 TokenType.TIMESTAMP, + 337 TokenType.TIMESTAMPTZ, + 338 TokenType.TIMESTAMPLTZ, + 339 } + 340 + 341 SET_OPERATIONS = { + 342 TokenType.UNION, + 343 TokenType.INTERSECT, + 344 TokenType.EXCEPT, + 345 } + 346 + 347 JOIN_SIDES = { + 348 TokenType.LEFT, + 349 TokenType.RIGHT, + 350 TokenType.FULL, + 351 } + 352 + 353 JOIN_KINDS = { + 354 TokenType.INNER, + 355 TokenType.OUTER, + 356 TokenType.CROSS, + 357 TokenType.SEMI, + 358 TokenType.ANTI, + 359 } + 360 + 361 LAMBDAS = { + 362 TokenType.ARROW: lambda self, expressions: self.expression( + 363 exp.Lambda, + 364 this=self._parse_conjunction().transform( + 365 self._replace_lambda, {node.name for node in expressions} + 366 ), + 367 expressions=expressions, + 368 ), + 369 TokenType.FARROW: lambda self, expressions: self.expression( + 370 exp.Kwarg, + 371 this=exp.Var(this=expressions[0].name), + 372 expression=self._parse_conjunction(), + 373 ), + 374 } + 375 + 376 COLUMN_OPERATORS = { + 377 TokenType.DOT: None, + 378 TokenType.DCOLON: lambda self, this, to: self.expression( + 379 exp.Cast, 380 this=this, - 381 expression=path, + 381 to=to, 382 ), - 383 TokenType.HASH_ARROW: lambda self, this, path: self.expression( - 384 exp.JSONBExtract, + 383 TokenType.ARROW: lambda self, this, path: self.expression( + 384 exp.JSONExtract, 385 this=this, 386 expression=path, 387 ), - 388 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( - 389 exp.JSONBExtractScalar, + 388 TokenType.DARROW: lambda self, this, path: self.expression( + 389 exp.JSONExtractScalar, 390 this=this, 391 expression=path, 392 ), - 393 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( - 394 exp.JSONBContains, + 393 TokenType.HASH_ARROW: lambda self, this, path: self.expression( + 394 exp.JSONBExtract, 395 this=this, - 396 expression=key, + 396 expression=path, 397 ), - 398 } - 399 - 400 EXPRESSION_PARSERS = { - 401 exp.Column: lambda self: self._parse_column(), - 402 exp.DataType: lambda self: self._parse_types(), - 403 exp.From: lambda self: self._parse_from(), - 404 exp.Group: lambda self: self._parse_group(), - 405 exp.Identifier: lambda self: self._parse_id_var(), - 406 exp.Lateral: lambda self: self._parse_lateral(), - 407 exp.Join: lambda self: self._parse_join(), - 408 exp.Order: lambda self: self._parse_order(), - 409 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), - 410 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), - 411 exp.Lambda: lambda self: self._parse_lambda(), - 412 exp.Limit: lambda self: self._parse_limit(), - 413 exp.Offset: lambda self: self._parse_offset(), - 414 exp.TableAlias: lambda self: self._parse_table_alias(), - 415 exp.Table: lambda self: self._parse_table(), - 416 exp.Condition: lambda self: self._parse_conjunction(), - 417 exp.Expression: lambda self: self._parse_statement(), - 418 exp.Properties: lambda self: self._parse_properties(), - 419 exp.Where: lambda self: self._parse_where(), - 420 exp.Ordered: lambda self: self._parse_ordered(), - 421 exp.Having: lambda self: self._parse_having(), - 422 exp.With: lambda self: self._parse_with(), - 423 exp.Window: lambda self: self._parse_named_window(), - 424 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), - 425 } - 426 - 427 STATEMENT_PARSERS = { - 428 TokenType.ALTER: lambda self: self._parse_alter(), - 429 TokenType.BEGIN: lambda self: self._parse_transaction(), - 430 TokenType.CACHE: lambda self: self._parse_cache(), - 431 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), - 432 TokenType.CREATE: lambda self: self._parse_create(), - 433 TokenType.DELETE: lambda self: self._parse_delete(), - 434 TokenType.DESC: lambda self: self._parse_describe(), - 435 TokenType.DESCRIBE: lambda self: self._parse_describe(), - 436 TokenType.DROP: lambda self: self._parse_drop(), - 437 TokenType.END: lambda self: self._parse_commit_or_rollback(), - 438 TokenType.INSERT: lambda self: self._parse_insert(), - 439 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), - 440 TokenType.MERGE: lambda self: self._parse_merge(), - 441 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), - 442 TokenType.UNCACHE: lambda self: self._parse_uncache(), - 443 TokenType.UPDATE: lambda self: self._parse_update(), - 444 TokenType.USE: lambda self: self.expression( - 445 exp.Use, - 446 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) - 447 and exp.Var(this=self._prev.text), - 448 this=self._parse_table(schema=False), - 449 ), - 450 } - 451 - 452 UNARY_PARSERS = { - 453 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op - 454 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), - 455 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), - 456 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), - 457 } - 458 - 459 PRIMARY_PARSERS = { - 460 TokenType.STRING: lambda self, token: self.expression( - 461 exp.Literal, this=token.text, is_string=True - 462 ), - 463 TokenType.NUMBER: lambda self, token: self.expression( - 464 exp.Literal, this=token.text, is_string=False - 465 ), - 466 TokenType.STAR: lambda self, _: self.expression( - 467 exp.Star, - 468 **{"except": self._parse_except(), "replace": self._parse_replace()}, - 469 ), - 470 TokenType.NULL: lambda self, _: self.expression(exp.Null), - 471 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), - 472 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), - 473 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), - 474 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), - 475 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), - 476 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), - 477 TokenType.NATIONAL: lambda self, token: self._parse_national(token), - 478 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), - 479 } - 480 - 481 PLACEHOLDER_PARSERS = { - 482 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), - 483 TokenType.PARAMETER: lambda self: self._parse_parameter(), - 484 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) - 485 if self._match_set((TokenType.NUMBER, TokenType.VAR)) - 486 else None, - 487 } - 488 - 489 RANGE_PARSERS = { - 490 TokenType.BETWEEN: lambda self, this: self._parse_between(this), - 491 TokenType.GLOB: lambda self, this: self._parse_escape( - 492 self.expression(exp.Glob, this=this, expression=self._parse_bitwise()) - 493 ), - 494 TokenType.IN: lambda self, this: self._parse_in(this), - 495 TokenType.IS: lambda self, this: self._parse_is(this), - 496 TokenType.LIKE: lambda self, this: self._parse_escape( - 497 self.expression(exp.Like, this=this, expression=self._parse_bitwise()) - 498 ), - 499 TokenType.ILIKE: lambda self, this: self._parse_escape( - 500 self.expression(exp.ILike, this=this, expression=self._parse_bitwise()) - 501 ), - 502 TokenType.IRLIKE: lambda self, this: self.expression( - 503 exp.RegexpILike, this=this, expression=self._parse_bitwise() + 398 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( + 399 exp.JSONBExtractScalar, + 400 this=this, + 401 expression=path, + 402 ), + 403 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( + 404 exp.JSONBContains, + 405 this=this, + 406 expression=key, + 407 ), + 408 } + 409 + 410 EXPRESSION_PARSERS = { + 411 exp.Column: lambda self: self._parse_column(), + 412 exp.DataType: lambda self: self._parse_types(), + 413 exp.From: lambda self: self._parse_from(), + 414 exp.Group: lambda self: self._parse_group(), + 415 exp.Identifier: lambda self: self._parse_id_var(), + 416 exp.Lateral: lambda self: self._parse_lateral(), + 417 exp.Join: lambda self: self._parse_join(), + 418 exp.Order: lambda self: self._parse_order(), + 419 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), + 420 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), + 421 exp.Lambda: lambda self: self._parse_lambda(), + 422 exp.Limit: lambda self: self._parse_limit(), + 423 exp.Offset: lambda self: self._parse_offset(), + 424 exp.TableAlias: lambda self: self._parse_table_alias(), + 425 exp.Table: lambda self: self._parse_table(), + 426 exp.Condition: lambda self: self._parse_conjunction(), + 427 exp.Expression: lambda self: self._parse_statement(), + 428 exp.Properties: lambda self: self._parse_properties(), + 429 exp.Where: lambda self: self._parse_where(), + 430 exp.Ordered: lambda self: self._parse_ordered(), + 431 exp.Having: lambda self: self._parse_having(), + 432 exp.With: lambda self: self._parse_with(), + 433 exp.Window: lambda self: self._parse_named_window(), + 434 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), + 435 } + 436 + 437 STATEMENT_PARSERS = { + 438 TokenType.ALTER: lambda self: self._parse_alter(), + 439 TokenType.BEGIN: lambda self: self._parse_transaction(), + 440 TokenType.CACHE: lambda self: self._parse_cache(), + 441 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), + 442 TokenType.COMMENT: lambda self: self._parse_comment(), + 443 TokenType.CREATE: lambda self: self._parse_create(), + 444 TokenType.DELETE: lambda self: self._parse_delete(), + 445 TokenType.DESC: lambda self: self._parse_describe(), + 446 TokenType.DESCRIBE: lambda self: self._parse_describe(), + 447 TokenType.DROP: lambda self: self._parse_drop(), + 448 TokenType.END: lambda self: self._parse_commit_or_rollback(), + 449 TokenType.INSERT: lambda self: self._parse_insert(), + 450 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), + 451 TokenType.MERGE: lambda self: self._parse_merge(), + 452 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), + 453 TokenType.UNCACHE: lambda self: self._parse_uncache(), + 454 TokenType.UPDATE: lambda self: self._parse_update(), + 455 TokenType.USE: lambda self: self.expression( + 456 exp.Use, + 457 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) + 458 and exp.Var(this=self._prev.text), + 459 this=self._parse_table(schema=False), + 460 ), + 461 } + 462 + 463 UNARY_PARSERS = { + 464 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op + 465 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), + 466 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), + 467 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), + 468 } + 469 + 470 PRIMARY_PARSERS = { + 471 TokenType.STRING: lambda self, token: self.expression( + 472 exp.Literal, this=token.text, is_string=True + 473 ), + 474 TokenType.NUMBER: lambda self, token: self.expression( + 475 exp.Literal, this=token.text, is_string=False + 476 ), + 477 TokenType.STAR: lambda self, _: self.expression( + 478 exp.Star, + 479 **{"except": self._parse_except(), "replace": self._parse_replace()}, + 480 ), + 481 TokenType.NULL: lambda self, _: self.expression(exp.Null), + 482 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), + 483 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), + 484 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), + 485 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), + 486 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), + 487 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), + 488 TokenType.NATIONAL: lambda self, token: self._parse_national(token), + 489 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), + 490 } + 491 + 492 PLACEHOLDER_PARSERS = { + 493 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), + 494 TokenType.PARAMETER: lambda self: self._parse_parameter(), + 495 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) + 496 if self._match_set((TokenType.NUMBER, TokenType.VAR)) + 497 else None, + 498 } + 499 + 500 RANGE_PARSERS = { + 501 TokenType.BETWEEN: lambda self, this: self._parse_between(this), + 502 TokenType.GLOB: lambda self, this: self._parse_escape( + 503 self.expression(exp.Glob, this=this, expression=self._parse_bitwise()) 504 ), - 505 TokenType.RLIKE: lambda self, this: self.expression( - 506 exp.RegexpLike, this=this, expression=self._parse_bitwise() + 505 TokenType.OVERLAPS: lambda self, this: self._parse_escape( + 506 self.expression(exp.Overlaps, this=this, expression=self._parse_bitwise()) 507 ), - 508 TokenType.SIMILAR_TO: lambda self, this: self.expression( - 509 exp.SimilarTo, this=this, expression=self._parse_bitwise() - 510 ), - 511 } - 512 - 513 PROPERTY_PARSERS = { - 514 "AFTER": lambda self: self._parse_afterjournal( - 515 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" - 516 ), - 517 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), - 518 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), - 519 "BEFORE": lambda self: self._parse_journal( - 520 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" + 508 TokenType.IN: lambda self, this: self._parse_in(this), + 509 TokenType.IS: lambda self, this: self._parse_is(this), + 510 TokenType.LIKE: lambda self, this: self._parse_escape( + 511 self.expression(exp.Like, this=this, expression=self._parse_bitwise()) + 512 ), + 513 TokenType.ILIKE: lambda self, this: self._parse_escape( + 514 self.expression(exp.ILike, this=this, expression=self._parse_bitwise()) + 515 ), + 516 TokenType.IRLIKE: lambda self, this: self.expression( + 517 exp.RegexpILike, this=this, expression=self._parse_bitwise() + 518 ), + 519 TokenType.RLIKE: lambda self, this: self.expression( + 520 exp.RegexpLike, this=this, expression=self._parse_bitwise() 521 ), - 522 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), - 523 "CHARACTER SET": lambda self: self._parse_character_set(), - 524 "CHECKSUM": lambda self: self._parse_checksum(), - 525 "CLUSTER BY": lambda self: self.expression( - 526 exp.Cluster, expressions=self._parse_csv(self._parse_ordered) - 527 ), - 528 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), - 529 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), - 530 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( - 531 default=self._prev.text.upper() == "DEFAULT" - 532 ), - 533 "DEFINER": lambda self: self._parse_definer(), - 534 "DETERMINISTIC": lambda self: self.expression( - 535 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") - 536 ), - 537 "DISTKEY": lambda self: self._parse_distkey(), - 538 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), - 539 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), - 540 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), - 541 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), - 542 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), - 543 "FREESPACE": lambda self: self._parse_freespace(), - 544 "GLOBAL": lambda self: self._parse_temporary(global_=True), - 545 "IMMUTABLE": lambda self: self.expression( - 546 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") - 547 ), - 548 "JOURNAL": lambda self: self._parse_journal( - 549 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" + 522 TokenType.SIMILAR_TO: lambda self, this: self.expression( + 523 exp.SimilarTo, this=this, expression=self._parse_bitwise() + 524 ), + 525 } + 526 + 527 PROPERTY_PARSERS = { + 528 "AFTER": lambda self: self._parse_afterjournal( + 529 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" + 530 ), + 531 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), + 532 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), + 533 "BEFORE": lambda self: self._parse_journal( + 534 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" + 535 ), + 536 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), + 537 "CHARACTER SET": lambda self: self._parse_character_set(), + 538 "CHECKSUM": lambda self: self._parse_checksum(), + 539 "CLUSTER BY": lambda self: self.expression( + 540 exp.Cluster, expressions=self._parse_csv(self._parse_ordered) + 541 ), + 542 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), + 543 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), + 544 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( + 545 default=self._prev.text.upper() == "DEFAULT" + 546 ), + 547 "DEFINER": lambda self: self._parse_definer(), + 548 "DETERMINISTIC": lambda self: self.expression( + 549 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 550 ), - 551 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), - 552 "LIKE": lambda self: self._parse_create_like(), - 553 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), - 554 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), - 555 "LOCK": lambda self: self._parse_locking(), - 556 "LOCKING": lambda self: self._parse_locking(), - 557 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), - 558 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), - 559 "MAX": lambda self: self._parse_datablocksize(), - 560 "MAXIMUM": lambda self: self._parse_datablocksize(), - 561 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( - 562 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" - 563 ), - 564 "MIN": lambda self: self._parse_datablocksize(), - 565 "MINIMUM": lambda self: self._parse_datablocksize(), - 566 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), - 567 "NO": lambda self: self._parse_noprimaryindex(), - 568 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), - 569 "ON": lambda self: self._parse_oncommit(), - 570 "PARTITION BY": lambda self: self._parse_partitioned_by(), - 571 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), - 572 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), - 573 "RETURNS": lambda self: self._parse_returns(), - 574 "ROW": lambda self: self._parse_row(), - 575 "SET": lambda self: self.expression(exp.SetProperty, multi=False), - 576 "SORTKEY": lambda self: self._parse_sortkey(), - 577 "STABLE": lambda self: self.expression( - 578 exp.VolatilityProperty, this=exp.Literal.string("STABLE") - 579 ), - 580 "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty), - 581 "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty), - 582 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), - 583 "TEMPORARY": lambda self: self._parse_temporary(global_=False), - 584 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), - 585 "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty), - 586 "VOLATILE": lambda self: self.expression( - 587 exp.VolatilityProperty, this=exp.Literal.string("VOLATILE") - 588 ), - 589 "WITH": lambda self: self._parse_with_property(), - 590 } - 591 - 592 CONSTRAINT_PARSERS = { - 593 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), - 594 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), - 595 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), - 596 "CHARACTER SET": lambda self: self.expression( - 597 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() - 598 ), - 599 "CHECK": lambda self: self.expression( - 600 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) - 601 ), - 602 "COLLATE": lambda self: self.expression( - 603 exp.CollateColumnConstraint, this=self._parse_var() - 604 ), - 605 "COMMENT": lambda self: self.expression( - 606 exp.CommentColumnConstraint, this=self._parse_string() - 607 ), - 608 "COMPRESS": lambda self: self._parse_compress(), - 609 "DEFAULT": lambda self: self.expression( - 610 exp.DefaultColumnConstraint, this=self._parse_bitwise() - 611 ), - 612 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), - 613 "FOREIGN KEY": lambda self: self._parse_foreign_key(), - 614 "FORMAT": lambda self: self.expression( - 615 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() - 616 ), - 617 "GENERATED": lambda self: self._parse_generated_as_identity(), - 618 "IDENTITY": lambda self: self._parse_auto_increment(), - 619 "INLINE": lambda self: self._parse_inline(), - 620 "LIKE": lambda self: self._parse_create_like(), - 621 "NOT": lambda self: self._parse_not_constraint(), - 622 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), - 623 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), - 624 "PRIMARY KEY": lambda self: self._parse_primary_key(), - 625 "TITLE": lambda self: self.expression( - 626 exp.TitleColumnConstraint, this=self._parse_var_or_string() - 627 ), - 628 "UNIQUE": lambda self: self._parse_unique(), - 629 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), - 630 } - 631 - 632 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} - 633 - 634 NO_PAREN_FUNCTION_PARSERS = { - 635 TokenType.CASE: lambda self: self._parse_case(), - 636 TokenType.IF: lambda self: self._parse_if(), - 637 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), - 638 } - 639 - 640 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { - 641 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), - 642 "TRY_CONVERT": lambda self: self._parse_convert(False), - 643 "EXTRACT": lambda self: self._parse_extract(), - 644 "POSITION": lambda self: self._parse_position(), - 645 "SUBSTRING": lambda self: self._parse_substring(), - 646 "TRIM": lambda self: self._parse_trim(), - 647 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), - 648 "TRY_CAST": lambda self: self._parse_cast(False), - 649 "STRING_AGG": lambda self: self._parse_string_agg(), - 650 } - 651 - 652 QUERY_MODIFIER_PARSERS = { - 653 "match": lambda self: self._parse_match_recognize(), - 654 "where": lambda self: self._parse_where(), - 655 "group": lambda self: self._parse_group(), - 656 "having": lambda self: self._parse_having(), - 657 "qualify": lambda self: self._parse_qualify(), - 658 "windows": lambda self: self._parse_window_clause(), - 659 "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute), - 660 "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), - 661 "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), - 662 "order": lambda self: self._parse_order(), - 663 "limit": lambda self: self._parse_limit(), - 664 "offset": lambda self: self._parse_offset(), - 665 "lock": lambda self: self._parse_lock(), - 666 } - 667 - 668 SHOW_PARSERS: t.Dict[str, t.Callable] = {} - 669 SET_PARSERS: t.Dict[str, t.Callable] = {} - 670 - 671 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) - 672 - 673 CREATABLES = { - 674 TokenType.COLUMN, - 675 TokenType.FUNCTION, - 676 TokenType.INDEX, - 677 TokenType.PROCEDURE, - 678 TokenType.SCHEMA, - 679 TokenType.TABLE, - 680 TokenType.VIEW, - 681 } - 682 - 683 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} - 684 - 685 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} - 686 - 687 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} - 688 - 689 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} - 690 - 691 STRICT_CAST = True + 551 "DISTKEY": lambda self: self._parse_distkey(), + 552 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), + 553 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), + 554 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), + 555 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), + 556 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), + 557 "FREESPACE": lambda self: self._parse_freespace(), + 558 "GLOBAL": lambda self: self._parse_temporary(global_=True), + 559 "IMMUTABLE": lambda self: self.expression( + 560 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") + 561 ), + 562 "JOURNAL": lambda self: self._parse_journal( + 563 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" + 564 ), + 565 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), + 566 "LIKE": lambda self: self._parse_create_like(), + 567 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), + 568 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), + 569 "LOCK": lambda self: self._parse_locking(), + 570 "LOCKING": lambda self: self._parse_locking(), + 571 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), + 572 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), + 573 "MAX": lambda self: self._parse_datablocksize(), + 574 "MAXIMUM": lambda self: self._parse_datablocksize(), + 575 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( + 576 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" + 577 ), + 578 "MIN": lambda self: self._parse_datablocksize(), + 579 "MINIMUM": lambda self: self._parse_datablocksize(), + 580 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), + 581 "NO": lambda self: self._parse_noprimaryindex(), + 582 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), + 583 "ON": lambda self: self._parse_oncommit(), + 584 "PARTITION BY": lambda self: self._parse_partitioned_by(), + 585 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), + 586 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), + 587 "RETURNS": lambda self: self._parse_returns(), + 588 "ROW": lambda self: self._parse_row(), + 589 "SET": lambda self: self.expression(exp.SetProperty, multi=False), + 590 "SORTKEY": lambda self: self._parse_sortkey(), + 591 "STABLE": lambda self: self.expression( + 592 exp.VolatilityProperty, this=exp.Literal.string("STABLE") + 593 ), + 594 "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty), + 595 "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty), + 596 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), + 597 "TEMPORARY": lambda self: self._parse_temporary(global_=False), + 598 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), + 599 "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty), + 600 "VOLATILE": lambda self: self.expression( + 601 exp.VolatilityProperty, this=exp.Literal.string("VOLATILE") + 602 ), + 603 "WITH": lambda self: self._parse_with_property(), + 604 } + 605 + 606 CONSTRAINT_PARSERS = { + 607 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), + 608 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), + 609 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), + 610 "CHARACTER SET": lambda self: self.expression( + 611 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() + 612 ), + 613 "CHECK": lambda self: self.expression( + 614 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) + 615 ), + 616 "COLLATE": lambda self: self.expression( + 617 exp.CollateColumnConstraint, this=self._parse_var() + 618 ), + 619 "COMMENT": lambda self: self.expression( + 620 exp.CommentColumnConstraint, this=self._parse_string() + 621 ), + 622 "COMPRESS": lambda self: self._parse_compress(), + 623 "DEFAULT": lambda self: self.expression( + 624 exp.DefaultColumnConstraint, this=self._parse_bitwise() + 625 ), + 626 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), + 627 "FOREIGN KEY": lambda self: self._parse_foreign_key(), + 628 "FORMAT": lambda self: self.expression( + 629 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() + 630 ), + 631 "GENERATED": lambda self: self._parse_generated_as_identity(), + 632 "IDENTITY": lambda self: self._parse_auto_increment(), + 633 "INLINE": lambda self: self._parse_inline(), + 634 "LIKE": lambda self: self._parse_create_like(), + 635 "NOT": lambda self: self._parse_not_constraint(), + 636 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), + 637 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), + 638 "PRIMARY KEY": lambda self: self._parse_primary_key(), + 639 "TITLE": lambda self: self.expression( + 640 exp.TitleColumnConstraint, this=self._parse_var_or_string() + 641 ), + 642 "UNIQUE": lambda self: self._parse_unique(), + 643 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), + 644 } + 645 + 646 ALTER_PARSERS = { + 647 "ADD": lambda self: self._parse_alter_table_add(), + 648 "ALTER": lambda self: self._parse_alter_table_alter(), + 649 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), + 650 "DROP": lambda self: self._parse_alter_table_drop(), + 651 "RENAME": lambda self: self._parse_alter_table_rename(), + 652 } + 653 + 654 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} + 655 + 656 NO_PAREN_FUNCTION_PARSERS = { + 657 TokenType.CASE: lambda self: self._parse_case(), + 658 TokenType.IF: lambda self: self._parse_if(), + 659 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), + 660 } + 661 + 662 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { + 663 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), + 664 "TRY_CONVERT": lambda self: self._parse_convert(False), + 665 "EXTRACT": lambda self: self._parse_extract(), + 666 "POSITION": lambda self: self._parse_position(), + 667 "SUBSTRING": lambda self: self._parse_substring(), + 668 "TRIM": lambda self: self._parse_trim(), + 669 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), + 670 "TRY_CAST": lambda self: self._parse_cast(False), + 671 "STRING_AGG": lambda self: self._parse_string_agg(), + 672 } + 673 + 674 QUERY_MODIFIER_PARSERS = { + 675 "match": lambda self: self._parse_match_recognize(), + 676 "where": lambda self: self._parse_where(), + 677 "group": lambda self: self._parse_group(), + 678 "having": lambda self: self._parse_having(), + 679 "qualify": lambda self: self._parse_qualify(), + 680 "windows": lambda self: self._parse_window_clause(), + 681 "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute), + 682 "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), + 683 "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), + 684 "order": lambda self: self._parse_order(), + 685 "limit": lambda self: self._parse_limit(), + 686 "offset": lambda self: self._parse_offset(), + 687 "lock": lambda self: self._parse_lock(), + 688 } + 689 + 690 SHOW_PARSERS: t.Dict[str, t.Callable] = {} + 691 SET_PARSERS: t.Dict[str, t.Callable] = {} 692 - 693 __slots__ = ( - 694 "error_level", - 695 "error_message_context", - 696 "sql", - 697 "errors", - 698 "index_offset", - 699 "unnest_column_only", - 700 "alias_post_tablesample", - 701 "max_errors", - 702 "null_ordering", - 703 "_tokens", - 704 "_index", - 705 "_curr", - 706 "_next", - 707 "_prev", - 708 "_prev_comments", - 709 "_show_trie", - 710 "_set_trie", - 711 ) - 712 - 713 def __init__( - 714 self, - 715 error_level: t.Optional[ErrorLevel] = None, - 716 error_message_context: int = 100, - 717 index_offset: int = 0, - 718 unnest_column_only: bool = False, - 719 alias_post_tablesample: bool = False, - 720 max_errors: int = 3, - 721 null_ordering: t.Optional[str] = None, - 722 ): - 723 self.error_level = error_level or ErrorLevel.IMMEDIATE - 724 self.error_message_context = error_message_context - 725 self.index_offset = index_offset - 726 self.unnest_column_only = unnest_column_only - 727 self.alias_post_tablesample = alias_post_tablesample - 728 self.max_errors = max_errors - 729 self.null_ordering = null_ordering - 730 self.reset() - 731 - 732 def reset(self): - 733 self.sql = "" - 734 self.errors = [] - 735 self._tokens = [] - 736 self._index = 0 - 737 self._curr = None - 738 self._next = None - 739 self._prev = None - 740 self._prev_comments = None - 741 - 742 def parse( - 743 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None - 744 ) -> t.List[t.Optional[exp.Expression]]: - 745 """ - 746 Parses a list of tokens and returns a list of syntax trees, one tree - 747 per parsed SQL statement. - 748 - 749 Args: - 750 raw_tokens: the list of tokens. - 751 sql: the original SQL string, used to produce helpful debug messages. - 752 - 753 Returns: - 754 The list of syntax trees. - 755 """ - 756 return self._parse( - 757 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql - 758 ) - 759 - 760 def parse_into( - 761 self, - 762 expression_types: exp.IntoType, - 763 raw_tokens: t.List[Token], - 764 sql: t.Optional[str] = None, - 765 ) -> t.List[t.Optional[exp.Expression]]: - 766 """ - 767 Parses a list of tokens into a given Expression type. If a collection of Expression - 768 types is given instead, this method will try to parse the token list into each one - 769 of them, stopping at the first for which the parsing succeeds. - 770 - 771 Args: - 772 expression_types: the expression type(s) to try and parse the token list into. - 773 raw_tokens: the list of tokens. - 774 sql: the original SQL string, used to produce helpful debug messages. - 775 - 776 Returns: - 777 The target Expression. - 778 """ - 779 errors = [] - 780 for expression_type in ensure_collection(expression_types): - 781 parser = self.EXPRESSION_PARSERS.get(expression_type) - 782 if not parser: - 783 raise TypeError(f"No parser registered for {expression_type}") - 784 try: - 785 return self._parse(parser, raw_tokens, sql) - 786 except ParseError as e: - 787 e.errors[0]["into_expression"] = expression_type - 788 errors.append(e) - 789 raise ParseError( - 790 f"Failed to parse into {expression_types}", - 791 errors=merge_errors(errors), - 792 ) from errors[-1] - 793 - 794 def _parse( - 795 self, - 796 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], - 797 raw_tokens: t.List[Token], - 798 sql: t.Optional[str] = None, - 799 ) -> t.List[t.Optional[exp.Expression]]: - 800 self.reset() - 801 self.sql = sql or "" - 802 total = len(raw_tokens) - 803 chunks: t.List[t.List[Token]] = [[]] - 804 - 805 for i, token in enumerate(raw_tokens): - 806 if token.token_type == TokenType.SEMICOLON: - 807 if i < total - 1: - 808 chunks.append([]) - 809 else: - 810 chunks[-1].append(token) - 811 - 812 expressions = [] - 813 - 814 for tokens in chunks: - 815 self._index = -1 - 816 self._tokens = tokens - 817 self._advance() - 818 - 819 expressions.append(parse_method(self)) - 820 - 821 if self._index < len(self._tokens): - 822 self.raise_error("Invalid expression / Unexpected token") + 693 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) + 694 + 695 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} + 696 + 697 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} + 698 + 699 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} + 700 + 701 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} + 702 + 703 STRICT_CAST = True + 704 + 705 __slots__ = ( + 706 "error_level", + 707 "error_message_context", + 708 "sql", + 709 "errors", + 710 "index_offset", + 711 "unnest_column_only", + 712 "alias_post_tablesample", + 713 "max_errors", + 714 "null_ordering", + 715 "_tokens", + 716 "_index", + 717 "_curr", + 718 "_next", + 719 "_prev", + 720 "_prev_comments", + 721 "_show_trie", + 722 "_set_trie", + 723 ) + 724 + 725 def __init__( + 726 self, + 727 error_level: t.Optional[ErrorLevel] = None, + 728 error_message_context: int = 100, + 729 index_offset: int = 0, + 730 unnest_column_only: bool = False, + 731 alias_post_tablesample: bool = False, + 732 max_errors: int = 3, + 733 null_ordering: t.Optional[str] = None, + 734 ): + 735 self.error_level = error_level or ErrorLevel.IMMEDIATE + 736 self.error_message_context = error_message_context + 737 self.index_offset = index_offset + 738 self.unnest_column_only = unnest_column_only + 739 self.alias_post_tablesample = alias_post_tablesample + 740 self.max_errors = max_errors + 741 self.null_ordering = null_ordering + 742 self.reset() + 743 + 744 def reset(self): + 745 self.sql = "" + 746 self.errors = [] + 747 self._tokens = [] + 748 self._index = 0 + 749 self._curr = None + 750 self._next = None + 751 self._prev = None + 752 self._prev_comments = None + 753 + 754 def parse( + 755 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None + 756 ) -> t.List[t.Optional[exp.Expression]]: + 757 """ + 758 Parses a list of tokens and returns a list of syntax trees, one tree + 759 per parsed SQL statement. + 760 + 761 Args: + 762 raw_tokens: the list of tokens. + 763 sql: the original SQL string, used to produce helpful debug messages. + 764 + 765 Returns: + 766 The list of syntax trees. + 767 """ + 768 return self._parse( + 769 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql + 770 ) + 771 + 772 def parse_into( + 773 self, + 774 expression_types: exp.IntoType, + 775 raw_tokens: t.List[Token], + 776 sql: t.Optional[str] = None, + 777 ) -> t.List[t.Optional[exp.Expression]]: + 778 """ + 779 Parses a list of tokens into a given Expression type. If a collection of Expression + 780 types is given instead, this method will try to parse the token list into each one + 781 of them, stopping at the first for which the parsing succeeds. + 782 + 783 Args: + 784 expression_types: the expression type(s) to try and parse the token list into. + 785 raw_tokens: the list of tokens. + 786 sql: the original SQL string, used to produce helpful debug messages. + 787 + 788 Returns: + 789 The target Expression. + 790 """ + 791 errors = [] + 792 for expression_type in ensure_collection(expression_types): + 793 parser = self.EXPRESSION_PARSERS.get(expression_type) + 794 if not parser: + 795 raise TypeError(f"No parser registered for {expression_type}") + 796 try: + 797 return self._parse(parser, raw_tokens, sql) + 798 except ParseError as e: + 799 e.errors[0]["into_expression"] = expression_type + 800 errors.append(e) + 801 raise ParseError( + 802 f"Failed to parse into {expression_types}", + 803 errors=merge_errors(errors), + 804 ) from errors[-1] + 805 + 806 def _parse( + 807 self, + 808 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], + 809 raw_tokens: t.List[Token], + 810 sql: t.Optional[str] = None, + 811 ) -> t.List[t.Optional[exp.Expression]]: + 812 self.reset() + 813 self.sql = sql or "" + 814 total = len(raw_tokens) + 815 chunks: t.List[t.List[Token]] = [[]] + 816 + 817 for i, token in enumerate(raw_tokens): + 818 if token.token_type == TokenType.SEMICOLON: + 819 if i < total - 1: + 820 chunks.append([]) + 821 else: + 822 chunks[-1].append(token) 823 - 824 self.check_errors() + 824 expressions = [] 825 - 826 return expressions - 827 - 828 def check_errors(self) -> None: - 829 """ - 830 Logs or raises any found errors, depending on the chosen error level setting. - 831 """ - 832 if self.error_level == ErrorLevel.WARN: - 833 for error in self.errors: - 834 logger.error(str(error)) - 835 elif self.error_level == ErrorLevel.RAISE and self.errors: - 836 raise ParseError( - 837 concat_messages(self.errors, self.max_errors), - 838 errors=merge_errors(self.errors), - 839 ) - 840 - 841 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: - 842 """ - 843 Appends an error in the list of recorded errors or raises it, depending on the chosen - 844 error level setting. - 845 """ - 846 token = token or self._curr or self._prev or Token.string("") - 847 start = self._find_token(token) - 848 end = start + len(token.text) - 849 start_context = self.sql[max(start - self.error_message_context, 0) : start] - 850 highlight = self.sql[start:end] - 851 end_context = self.sql[end : end + self.error_message_context] + 826 for tokens in chunks: + 827 self._index = -1 + 828 self._tokens = tokens + 829 self._advance() + 830 + 831 expressions.append(parse_method(self)) + 832 + 833 if self._index < len(self._tokens): + 834 self.raise_error("Invalid expression / Unexpected token") + 835 + 836 self.check_errors() + 837 + 838 return expressions + 839 + 840 def check_errors(self) -> None: + 841 """ + 842 Logs or raises any found errors, depending on the chosen error level setting. + 843 """ + 844 if self.error_level == ErrorLevel.WARN: + 845 for error in self.errors: + 846 logger.error(str(error)) + 847 elif self.error_level == ErrorLevel.RAISE and self.errors: + 848 raise ParseError( + 849 concat_messages(self.errors, self.max_errors), + 850 errors=merge_errors(self.errors), + 851 ) 852 - 853 error = ParseError.new( - 854 f"{message}. Line {token.line}, Col: {token.col}.\n" - 855 f" {start_context}\033[4m{highlight}\033[0m{end_context}", - 856 description=message, - 857 line=token.line, - 858 col=token.col, - 859 start_context=start_context, - 860 highlight=highlight, - 861 end_context=end_context, - 862 ) - 863 - 864 if self.error_level == ErrorLevel.IMMEDIATE: - 865 raise error - 866 - 867 self.errors.append(error) - 868 - 869 def expression( - 870 self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs - 871 ) -> exp.Expression: - 872 """ - 873 Creates a new, validated Expression. - 874 - 875 Args: - 876 exp_class: the expression class to instantiate. - 877 comments: an optional list of comments to attach to the expression. - 878 kwargs: the arguments to set for the expression along with their respective values. - 879 - 880 Returns: - 881 The target expression. - 882 """ - 883 instance = exp_class(**kwargs) - 884 if self._prev_comments: - 885 instance.comments = self._prev_comments - 886 self._prev_comments = None - 887 if comments: - 888 instance.comments = comments - 889 self.validate_expression(instance) - 890 return instance + 853 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: + 854 """ + 855 Appends an error in the list of recorded errors or raises it, depending on the chosen + 856 error level setting. + 857 """ + 858 token = token or self._curr or self._prev or Token.string("") + 859 start = self._find_token(token) + 860 end = start + len(token.text) + 861 start_context = self.sql[max(start - self.error_message_context, 0) : start] + 862 highlight = self.sql[start:end] + 863 end_context = self.sql[end : end + self.error_message_context] + 864 + 865 error = ParseError.new( + 866 f"{message}. Line {token.line}, Col: {token.col}.\n" + 867 f" {start_context}\033[4m{highlight}\033[0m{end_context}", + 868 description=message, + 869 line=token.line, + 870 col=token.col, + 871 start_context=start_context, + 872 highlight=highlight, + 873 end_context=end_context, + 874 ) + 875 + 876 if self.error_level == ErrorLevel.IMMEDIATE: + 877 raise error + 878 + 879 self.errors.append(error) + 880 + 881 def expression( + 882 self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs + 883 ) -> exp.Expression: + 884 """ + 885 Creates a new, validated Expression. + 886 + 887 Args: + 888 exp_class: the expression class to instantiate. + 889 comments: an optional list of comments to attach to the expression. + 890 kwargs: the arguments to set for the expression along with their respective values. 891 - 892 def validate_expression( - 893 self, expression: exp.Expression, args: t.Optional[t.List] = None - 894 ) -> None: - 895 """ - 896 Validates an already instantiated expression, making sure that all its mandatory arguments - 897 are set. - 898 - 899 Args: - 900 expression: the expression to validate. - 901 args: an optional list of items that was used to instantiate the expression, if it's a Func. - 902 """ - 903 if self.error_level == ErrorLevel.IGNORE: - 904 return - 905 - 906 for error_message in expression.error_messages(args): - 907 self.raise_error(error_message) - 908 - 909 def _find_sql(self, start: Token, end: Token) -> str: - 910 return self.sql[self._find_token(start) : self._find_token(end) + len(end.text)] - 911 - 912 def _find_token(self, token: Token) -> int: - 913 line = 1 - 914 col = 1 - 915 index = 0 - 916 - 917 while line < token.line or col < token.col: - 918 if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK: - 919 line += 1 - 920 col = 1 - 921 else: - 922 col += 1 - 923 index += 1 - 924 - 925 return index - 926 - 927 def _advance(self, times: int = 1) -> None: - 928 self._index += times - 929 self._curr = seq_get(self._tokens, self._index) - 930 self._next = seq_get(self._tokens, self._index + 1) - 931 if self._index > 0: - 932 self._prev = self._tokens[self._index - 1] - 933 self._prev_comments = self._prev.comments - 934 else: - 935 self._prev = None - 936 self._prev_comments = None - 937 - 938 def _retreat(self, index: int) -> None: - 939 self._advance(index - self._index) - 940 - 941 def _parse_command(self) -> exp.Expression: - 942 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) - 943 - 944 def _parse_statement(self) -> t.Optional[exp.Expression]: - 945 if self._curr is None: - 946 return None - 947 - 948 if self._match_set(self.STATEMENT_PARSERS): - 949 return self.STATEMENT_PARSERS[self._prev.token_type](self) - 950 - 951 if self._match_set(Tokenizer.COMMANDS): - 952 return self._parse_command() - 953 - 954 expression = self._parse_expression() - 955 expression = self._parse_set_operations(expression) if expression else self._parse_select() - 956 - 957 self._parse_query_modifiers(expression) - 958 return expression + 892 Returns: + 893 The target expression. + 894 """ + 895 instance = exp_class(**kwargs) + 896 if self._prev_comments: + 897 instance.comments = self._prev_comments + 898 self._prev_comments = None + 899 if comments: + 900 instance.comments = comments + 901 self.validate_expression(instance) + 902 return instance + 903 + 904 def validate_expression( + 905 self, expression: exp.Expression, args: t.Optional[t.List] = None + 906 ) -> None: + 907 """ + 908 Validates an already instantiated expression, making sure that all its mandatory arguments + 909 are set. + 910 + 911 Args: + 912 expression: the expression to validate. + 913 args: an optional list of items that was used to instantiate the expression, if it's a Func. + 914 """ + 915 if self.error_level == ErrorLevel.IGNORE: + 916 return + 917 + 918 for error_message in expression.error_messages(args): + 919 self.raise_error(error_message) + 920 + 921 def _find_sql(self, start: Token, end: Token) -> str: + 922 return self.sql[self._find_token(start) : self._find_token(end) + len(end.text)] + 923 + 924 def _find_token(self, token: Token) -> int: + 925 line = 1 + 926 col = 1 + 927 index = 0 + 928 + 929 while line < token.line or col < token.col: + 930 if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK: + 931 line += 1 + 932 col = 1 + 933 else: + 934 col += 1 + 935 index += 1 + 936 + 937 return index + 938 + 939 def _advance(self, times: int = 1) -> None: + 940 self._index += times + 941 self._curr = seq_get(self._tokens, self._index) + 942 self._next = seq_get(self._tokens, self._index + 1) + 943 if self._index > 0: + 944 self._prev = self._tokens[self._index - 1] + 945 self._prev_comments = self._prev.comments + 946 else: + 947 self._prev = None + 948 self._prev_comments = None + 949 + 950 def _retreat(self, index: int) -> None: + 951 self._advance(index - self._index) + 952 + 953 def _parse_command(self) -> exp.Expression: + 954 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) + 955 + 956 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: + 957 start = self._prev + 958 exists = self._parse_exists() if allow_exists else None 959 - 960 def _parse_drop(self, default_kind: t.Optional[str] = None) -> t.Optional[exp.Expression]: - 961 start = self._prev - 962 temporary = self._match(TokenType.TEMPORARY) - 963 materialized = self._match(TokenType.MATERIALIZED) - 964 kind = self._match_set(self.CREATABLES) and self._prev.text - 965 if not kind: - 966 if default_kind: - 967 kind = default_kind - 968 else: - 969 return self._parse_as_command(start) - 970 - 971 return self.expression( - 972 exp.Drop, - 973 exists=self._parse_exists(), - 974 this=self._parse_table(schema=True), - 975 kind=kind, - 976 temporary=temporary, - 977 materialized=materialized, - 978 cascade=self._match(TokenType.CASCADE), - 979 ) - 980 - 981 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: - 982 return ( - 983 self._match(TokenType.IF) - 984 and (not not_ or self._match(TokenType.NOT)) - 985 and self._match(TokenType.EXISTS) - 986 ) - 987 - 988 def _parse_create(self) -> t.Optional[exp.Expression]: - 989 start = self._prev - 990 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( - 991 TokenType.OR, TokenType.REPLACE - 992 ) - 993 unique = self._match(TokenType.UNIQUE) + 960 self._match(TokenType.ON) + 961 + 962 kind = self._match_set(self.CREATABLES) and self._prev + 963 + 964 if not kind: + 965 return self._parse_as_command(start) + 966 + 967 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): + 968 this = self._parse_user_defined_function(kind=kind.token_type) + 969 elif kind.token_type == TokenType.TABLE: + 970 this = self._parse_table() + 971 elif kind.token_type == TokenType.COLUMN: + 972 this = self._parse_column() + 973 else: + 974 this = self._parse_id_var() + 975 + 976 self._match(TokenType.IS) + 977 + 978 return self.expression( + 979 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists + 980 ) + 981 + 982 def _parse_statement(self) -> t.Optional[exp.Expression]: + 983 if self._curr is None: + 984 return None + 985 + 986 if self._match_set(self.STATEMENT_PARSERS): + 987 return self.STATEMENT_PARSERS[self._prev.token_type](self) + 988 + 989 if self._match_set(Tokenizer.COMMANDS): + 990 return self._parse_command() + 991 + 992 expression = self._parse_expression() + 993 expression = self._parse_set_operations(expression) if expression else self._parse_select() 994 - 995 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): - 996 self._match(TokenType.TABLE) + 995 self._parse_query_modifiers(expression) + 996 return expression 997 - 998 properties = None - 999 create_token = self._match_set(self.CREATABLES) and self._prev -1000 -1001 if not create_token: -1002 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE -1003 create_token = self._match_set(self.CREATABLES) and self._prev -1004 -1005 if not properties or not create_token: -1006 return self._parse_as_command(start) -1007 -1008 exists = self._parse_exists(not_=True) -1009 this = None -1010 expression = None -1011 indexes = None -1012 no_schema_binding = None -1013 begin = None -1014 -1015 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): -1016 this = self._parse_user_defined_function(kind=create_token.token_type) -1017 temp_properties = self._parse_properties() -1018 if properties and temp_properties: -1019 properties.expressions.extend(temp_properties.expressions) -1020 elif temp_properties: -1021 properties = temp_properties -1022 -1023 self._match(TokenType.ALIAS) -1024 begin = self._match(TokenType.BEGIN) -1025 return_ = self._match_text_seq("RETURN") -1026 expression = self._parse_statement() -1027 -1028 if return_: -1029 expression = self.expression(exp.Return, this=expression) -1030 elif create_token.token_type == TokenType.INDEX: -1031 this = self._parse_index() -1032 elif create_token.token_type in ( -1033 TokenType.TABLE, -1034 TokenType.VIEW, -1035 TokenType.SCHEMA, -1036 ): -1037 table_parts = self._parse_table_parts(schema=True) -1038 -1039 # exp.Properties.Location.POST_NAME -1040 if self._match(TokenType.COMMA): -1041 temp_properties = self._parse_properties(before=True) -1042 if properties and temp_properties: -1043 properties.expressions.extend(temp_properties.expressions) -1044 elif temp_properties: -1045 properties = temp_properties + 998 def _parse_drop(self, default_kind: t.Optional[str] = None) -> t.Optional[exp.Expression]: + 999 start = self._prev +1000 temporary = self._match(TokenType.TEMPORARY) +1001 materialized = self._match(TokenType.MATERIALIZED) +1002 kind = self._match_set(self.CREATABLES) and self._prev.text +1003 if not kind: +1004 if default_kind: +1005 kind = default_kind +1006 else: +1007 return self._parse_as_command(start) +1008 +1009 return self.expression( +1010 exp.Drop, +1011 exists=self._parse_exists(), +1012 this=self._parse_table(schema=True), +1013 kind=kind, +1014 temporary=temporary, +1015 materialized=materialized, +1016 cascade=self._match(TokenType.CASCADE), +1017 ) +1018 +1019 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: +1020 return ( +1021 self._match(TokenType.IF) +1022 and (not not_ or self._match(TokenType.NOT)) +1023 and self._match(TokenType.EXISTS) +1024 ) +1025 +1026 def _parse_create(self) -> t.Optional[exp.Expression]: +1027 start = self._prev +1028 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( +1029 TokenType.OR, TokenType.REPLACE +1030 ) +1031 unique = self._match(TokenType.UNIQUE) +1032 volatile = self._match(TokenType.VOLATILE) +1033 +1034 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): +1035 self._match(TokenType.TABLE) +1036 +1037 properties = None +1038 create_token = self._match_set(self.CREATABLES) and self._prev +1039 +1040 if not create_token: +1041 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE +1042 create_token = self._match_set(self.CREATABLES) and self._prev +1043 +1044 if not properties or not create_token: +1045 return self._parse_as_command(start) 1046 -1047 this = self._parse_schema(this=table_parts) -1048 -1049 # exp.Properties.Location.POST_SCHEMA and POST_WITH -1050 temp_properties = self._parse_properties() -1051 if properties and temp_properties: -1052 properties.expressions.extend(temp_properties.expressions) -1053 elif temp_properties: -1054 properties = temp_properties -1055 -1056 self._match(TokenType.ALIAS) -1057 -1058 # exp.Properties.Location.POST_ALIAS -1059 if not ( -1060 self._match(TokenType.SELECT, advance=False) -1061 or self._match(TokenType.WITH, advance=False) -1062 or self._match(TokenType.L_PAREN, advance=False) -1063 ): -1064 temp_properties = self._parse_properties() -1065 if properties and temp_properties: -1066 properties.expressions.extend(temp_properties.expressions) -1067 elif temp_properties: -1068 properties = temp_properties -1069 -1070 expression = self._parse_ddl_select() -1071 -1072 if create_token.token_type == TokenType.TABLE: -1073 # exp.Properties.Location.POST_EXPRESSION -1074 temp_properties = self._parse_properties() -1075 if properties and temp_properties: -1076 properties.expressions.extend(temp_properties.expressions) -1077 elif temp_properties: -1078 properties = temp_properties -1079 -1080 indexes = [] -1081 while True: -1082 index = self._parse_create_table_index() +1047 exists = self._parse_exists(not_=True) +1048 this = None +1049 expression = None +1050 indexes = None +1051 no_schema_binding = None +1052 begin = None +1053 +1054 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): +1055 this = self._parse_user_defined_function(kind=create_token.token_type) +1056 temp_properties = self._parse_properties() +1057 if properties and temp_properties: +1058 properties.expressions.extend(temp_properties.expressions) +1059 elif temp_properties: +1060 properties = temp_properties +1061 +1062 self._match(TokenType.ALIAS) +1063 begin = self._match(TokenType.BEGIN) +1064 return_ = self._match_text_seq("RETURN") +1065 expression = self._parse_statement() +1066 +1067 if return_: +1068 expression = self.expression(exp.Return, this=expression) +1069 elif create_token.token_type == TokenType.INDEX: +1070 this = self._parse_index() +1071 elif create_token.token_type in self.DB_CREATABLES: +1072 table_parts = self._parse_table_parts(schema=True) +1073 +1074 # exp.Properties.Location.POST_NAME +1075 if self._match(TokenType.COMMA): +1076 temp_properties = self._parse_properties(before=True) +1077 if properties and temp_properties: +1078 properties.expressions.extend(temp_properties.expressions) +1079 elif temp_properties: +1080 properties = temp_properties +1081 +1082 this = self._parse_schema(this=table_parts) 1083 -1084 # exp.Properties.Location.POST_INDEX -1085 if self._match(TokenType.PARTITION_BY, advance=False): -1086 temp_properties = self._parse_properties() -1087 if properties and temp_properties: -1088 properties.expressions.extend(temp_properties.expressions) -1089 elif temp_properties: -1090 properties = temp_properties -1091 -1092 if not index: -1093 break -1094 else: -1095 indexes.append(index) -1096 elif create_token.token_type == TokenType.VIEW: -1097 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): -1098 no_schema_binding = True -1099 -1100 return self.expression( -1101 exp.Create, -1102 this=this, -1103 kind=create_token.text, -1104 unique=unique, -1105 expression=expression, -1106 exists=exists, -1107 properties=properties, -1108 replace=replace, -1109 indexes=indexes, -1110 no_schema_binding=no_schema_binding, -1111 begin=begin, -1112 ) -1113 -1114 def _parse_property_before(self) -> t.Optional[exp.Expression]: -1115 self._match(TokenType.COMMA) -1116 -1117 # parsers look to _prev for no/dual/default, so need to consume first -1118 self._match_text_seq("NO") -1119 self._match_text_seq("DUAL") -1120 self._match_text_seq("DEFAULT") -1121 -1122 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): -1123 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) -1124 -1125 return None +1084 # exp.Properties.Location.POST_SCHEMA and POST_WITH +1085 temp_properties = self._parse_properties() +1086 if properties and temp_properties: +1087 properties.expressions.extend(temp_properties.expressions) +1088 elif temp_properties: +1089 properties = temp_properties +1090 +1091 self._match(TokenType.ALIAS) +1092 +1093 # exp.Properties.Location.POST_ALIAS +1094 if not ( +1095 self._match(TokenType.SELECT, advance=False) +1096 or self._match(TokenType.WITH, advance=False) +1097 or self._match(TokenType.L_PAREN, advance=False) +1098 ): +1099 temp_properties = self._parse_properties() +1100 if properties and temp_properties: +1101 properties.expressions.extend(temp_properties.expressions) +1102 elif temp_properties: +1103 properties = temp_properties +1104 +1105 expression = self._parse_ddl_select() +1106 +1107 if create_token.token_type == TokenType.TABLE: +1108 # exp.Properties.Location.POST_EXPRESSION +1109 temp_properties = self._parse_properties() +1110 if properties and temp_properties: +1111 properties.expressions.extend(temp_properties.expressions) +1112 elif temp_properties: +1113 properties = temp_properties +1114 +1115 indexes = [] +1116 while True: +1117 index = self._parse_create_table_index() +1118 +1119 # exp.Properties.Location.POST_INDEX +1120 if self._match(TokenType.PARTITION_BY, advance=False): +1121 temp_properties = self._parse_properties() +1122 if properties and temp_properties: +1123 properties.expressions.extend(temp_properties.expressions) +1124 elif temp_properties: +1125 properties = temp_properties 1126 -1127 def _parse_property(self) -> t.Optional[exp.Expression]: -1128 if self._match_texts(self.PROPERTY_PARSERS): -1129 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) -1130 -1131 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): -1132 return self._parse_character_set(default=True) -1133 -1134 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): -1135 return self._parse_sortkey(compound=True) -1136 -1137 if self._match_text_seq("SQL", "SECURITY"): -1138 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) -1139 -1140 assignment = self._match_pair( -1141 TokenType.VAR, TokenType.EQ, advance=False -1142 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) -1143 -1144 if assignment: -1145 key = self._parse_var_or_string() -1146 self._match(TokenType.EQ) -1147 return self.expression(exp.Property, this=key, value=self._parse_column()) -1148 -1149 return None -1150 -1151 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: -1152 self._match(TokenType.EQ) -1153 self._match(TokenType.ALIAS) -1154 return self.expression( -1155 exp_class, -1156 this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), -1157 ) -1158 -1159 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: -1160 properties = [] -1161 -1162 while True: -1163 if before: -1164 identified_property = self._parse_property_before() -1165 else: -1166 identified_property = self._parse_property() -1167 -1168 if not identified_property: -1169 break -1170 for p in ensure_collection(identified_property): -1171 properties.append(p) +1127 if not index: +1128 break +1129 else: +1130 indexes.append(index) +1131 elif create_token.token_type == TokenType.VIEW: +1132 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): +1133 no_schema_binding = True +1134 +1135 return self.expression( +1136 exp.Create, +1137 this=this, +1138 kind=create_token.text, +1139 replace=replace, +1140 unique=unique, +1141 volatile=volatile, +1142 expression=expression, +1143 exists=exists, +1144 properties=properties, +1145 indexes=indexes, +1146 no_schema_binding=no_schema_binding, +1147 begin=begin, +1148 ) +1149 +1150 def _parse_property_before(self) -> t.Optional[exp.Expression]: +1151 self._match(TokenType.COMMA) +1152 +1153 # parsers look to _prev for no/dual/default, so need to consume first +1154 self._match_text_seq("NO") +1155 self._match_text_seq("DUAL") +1156 self._match_text_seq("DEFAULT") +1157 +1158 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): +1159 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) +1160 +1161 return None +1162 +1163 def _parse_property(self) -> t.Optional[exp.Expression]: +1164 if self._match_texts(self.PROPERTY_PARSERS): +1165 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) +1166 +1167 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): +1168 return self._parse_character_set(default=True) +1169 +1170 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): +1171 return self._parse_sortkey(compound=True) 1172 -1173 if properties: -1174 return self.expression(exp.Properties, expressions=properties) +1173 if self._match_text_seq("SQL", "SECURITY"): +1174 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1175 -1176 return None -1177 -1178 def _parse_fallback(self, no=False) -> exp.Expression: -1179 self._match_text_seq("FALLBACK") -1180 return self.expression( -1181 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") -1182 ) -1183 -1184 def _parse_with_property( -1185 self, -1186 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: -1187 self._match(TokenType.WITH) -1188 if self._match(TokenType.L_PAREN, advance=False): -1189 return self._parse_wrapped_csv(self._parse_property) -1190 -1191 if self._match_text_seq("JOURNAL"): -1192 return self._parse_withjournaltable() -1193 -1194 if self._match_text_seq("DATA"): -1195 return self._parse_withdata(no=False) -1196 elif self._match_text_seq("NO", "DATA"): -1197 return self._parse_withdata(no=True) -1198 -1199 if not self._next: -1200 return None -1201 -1202 return self._parse_withisolatedloading() +1176 assignment = self._match_pair( +1177 TokenType.VAR, TokenType.EQ, advance=False +1178 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) +1179 +1180 if assignment: +1181 key = self._parse_var_or_string() +1182 self._match(TokenType.EQ) +1183 return self.expression(exp.Property, this=key, value=self._parse_column()) +1184 +1185 return None +1186 +1187 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: +1188 self._match(TokenType.EQ) +1189 self._match(TokenType.ALIAS) +1190 return self.expression( +1191 exp_class, +1192 this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), +1193 ) +1194 +1195 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: +1196 properties = [] +1197 +1198 while True: +1199 if before: +1200 identified_property = self._parse_property_before() +1201 else: +1202 identified_property = self._parse_property() 1203 -1204 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html -1205 def _parse_definer(self) -> t.Optional[exp.Expression]: -1206 self._match(TokenType.EQ) -1207 -1208 user = self._parse_id_var() -1209 self._match(TokenType.PARAMETER) -1210 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) +1204 if not identified_property: +1205 break +1206 for p in ensure_collection(identified_property): +1207 properties.append(p) +1208 +1209 if properties: +1210 return self.expression(exp.Properties, expressions=properties) 1211 -1212 if not user or not host: -1213 return None -1214 -1215 return exp.DefinerProperty(this=f"{user}@{host}") -1216 -1217 def _parse_withjournaltable(self) -> exp.Expression: -1218 self._match(TokenType.TABLE) -1219 self._match(TokenType.EQ) -1220 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) -1221 -1222 def _parse_log(self, no=False) -> exp.Expression: -1223 self._match_text_seq("LOG") -1224 return self.expression(exp.LogProperty, no=no) -1225 -1226 def _parse_journal(self, no=False, dual=False) -> exp.Expression: -1227 before = self._match_text_seq("BEFORE") -1228 self._match_text_seq("JOURNAL") -1229 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) -1230 -1231 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: -1232 self._match_text_seq("NOT") -1233 self._match_text_seq("LOCAL") -1234 self._match_text_seq("AFTER", "JOURNAL") -1235 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) -1236 -1237 def _parse_checksum(self) -> exp.Expression: -1238 self._match_text_seq("CHECKSUM") -1239 self._match(TokenType.EQ) -1240 -1241 on = None -1242 if self._match(TokenType.ON): -1243 on = True -1244 elif self._match_text_seq("OFF"): -1245 on = False -1246 default = self._match(TokenType.DEFAULT) +1212 return None +1213 +1214 def _parse_fallback(self, no=False) -> exp.Expression: +1215 self._match_text_seq("FALLBACK") +1216 return self.expression( +1217 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") +1218 ) +1219 +1220 def _parse_with_property( +1221 self, +1222 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: +1223 self._match(TokenType.WITH) +1224 if self._match(TokenType.L_PAREN, advance=False): +1225 return self._parse_wrapped_csv(self._parse_property) +1226 +1227 if self._match_text_seq("JOURNAL"): +1228 return self._parse_withjournaltable() +1229 +1230 if self._match_text_seq("DATA"): +1231 return self._parse_withdata(no=False) +1232 elif self._match_text_seq("NO", "DATA"): +1233 return self._parse_withdata(no=True) +1234 +1235 if not self._next: +1236 return None +1237 +1238 return self._parse_withisolatedloading() +1239 +1240 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html +1241 def _parse_definer(self) -> t.Optional[exp.Expression]: +1242 self._match(TokenType.EQ) +1243 +1244 user = self._parse_id_var() +1245 self._match(TokenType.PARAMETER) +1246 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1247 -1248 return self.expression( -1249 exp.ChecksumProperty, -1250 on=on, -1251 default=default, -1252 ) -1253 -1254 def _parse_freespace(self) -> exp.Expression: -1255 self._match_text_seq("FREESPACE") -1256 self._match(TokenType.EQ) -1257 return self.expression( -1258 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) -1259 ) -1260 -1261 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: -1262 self._match_text_seq("MERGEBLOCKRATIO") -1263 if self._match(TokenType.EQ): -1264 return self.expression( -1265 exp.MergeBlockRatioProperty, -1266 this=self._parse_number(), -1267 percent=self._match(TokenType.PERCENT), -1268 ) -1269 else: -1270 return self.expression( -1271 exp.MergeBlockRatioProperty, -1272 no=no, -1273 default=default, -1274 ) -1275 -1276 def _parse_datablocksize(self, default=None) -> exp.Expression: -1277 if default: -1278 self._match_text_seq("DATABLOCKSIZE") -1279 return self.expression(exp.DataBlocksizeProperty, default=True) -1280 elif self._match_texts(("MIN", "MINIMUM")): -1281 self._match_text_seq("DATABLOCKSIZE") -1282 return self.expression(exp.DataBlocksizeProperty, min=True) -1283 elif self._match_texts(("MAX", "MAXIMUM")): -1284 self._match_text_seq("DATABLOCKSIZE") -1285 return self.expression(exp.DataBlocksizeProperty, min=False) -1286 -1287 self._match_text_seq("DATABLOCKSIZE") -1288 self._match(TokenType.EQ) -1289 size = self._parse_number() -1290 units = None -1291 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): -1292 units = self._prev.text -1293 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) -1294 -1295 def _parse_blockcompression(self) -> exp.Expression: -1296 self._match_text_seq("BLOCKCOMPRESSION") -1297 self._match(TokenType.EQ) -1298 always = self._match_text_seq("ALWAYS") -1299 manual = self._match_text_seq("MANUAL") -1300 never = self._match_text_seq("NEVER") -1301 default = self._match_text_seq("DEFAULT") -1302 autotemp = None -1303 if self._match_text_seq("AUTOTEMP"): -1304 autotemp = self._parse_schema() -1305 -1306 return self.expression( -1307 exp.BlockCompressionProperty, -1308 always=always, -1309 manual=manual, -1310 never=never, -1311 default=default, -1312 autotemp=autotemp, -1313 ) -1314 -1315 def _parse_withisolatedloading(self) -> exp.Expression: -1316 no = self._match_text_seq("NO") -1317 concurrent = self._match_text_seq("CONCURRENT") -1318 self._match_text_seq("ISOLATED", "LOADING") -1319 for_all = self._match_text_seq("FOR", "ALL") -1320 for_insert = self._match_text_seq("FOR", "INSERT") -1321 for_none = self._match_text_seq("FOR", "NONE") -1322 return self.expression( -1323 exp.IsolatedLoadingProperty, -1324 no=no, -1325 concurrent=concurrent, -1326 for_all=for_all, -1327 for_insert=for_insert, -1328 for_none=for_none, -1329 ) +1248 if not user or not host: +1249 return None +1250 +1251 return exp.DefinerProperty(this=f"{user}@{host}") +1252 +1253 def _parse_withjournaltable(self) -> exp.Expression: +1254 self._match(TokenType.TABLE) +1255 self._match(TokenType.EQ) +1256 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) +1257 +1258 def _parse_log(self, no=False) -> exp.Expression: +1259 self._match_text_seq("LOG") +1260 return self.expression(exp.LogProperty, no=no) +1261 +1262 def _parse_journal(self, no=False, dual=False) -> exp.Expression: +1263 before = self._match_text_seq("BEFORE") +1264 self._match_text_seq("JOURNAL") +1265 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) +1266 +1267 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: +1268 self._match_text_seq("NOT") +1269 self._match_text_seq("LOCAL") +1270 self._match_text_seq("AFTER", "JOURNAL") +1271 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) +1272 +1273 def _parse_checksum(self) -> exp.Expression: +1274 self._match_text_seq("CHECKSUM") +1275 self._match(TokenType.EQ) +1276 +1277 on = None +1278 if self._match(TokenType.ON): +1279 on = True +1280 elif self._match_text_seq("OFF"): +1281 on = False +1282 default = self._match(TokenType.DEFAULT) +1283 +1284 return self.expression( +1285 exp.ChecksumProperty, +1286 on=on, +1287 default=default, +1288 ) +1289 +1290 def _parse_freespace(self) -> exp.Expression: +1291 self._match_text_seq("FREESPACE") +1292 self._match(TokenType.EQ) +1293 return self.expression( +1294 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) +1295 ) +1296 +1297 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: +1298 self._match_text_seq("MERGEBLOCKRATIO") +1299 if self._match(TokenType.EQ): +1300 return self.expression( +1301 exp.MergeBlockRatioProperty, +1302 this=self._parse_number(), +1303 percent=self._match(TokenType.PERCENT), +1304 ) +1305 else: +1306 return self.expression( +1307 exp.MergeBlockRatioProperty, +1308 no=no, +1309 default=default, +1310 ) +1311 +1312 def _parse_datablocksize(self, default=None) -> exp.Expression: +1313 if default: +1314 self._match_text_seq("DATABLOCKSIZE") +1315 return self.expression(exp.DataBlocksizeProperty, default=True) +1316 elif self._match_texts(("MIN", "MINIMUM")): +1317 self._match_text_seq("DATABLOCKSIZE") +1318 return self.expression(exp.DataBlocksizeProperty, min=True) +1319 elif self._match_texts(("MAX", "MAXIMUM")): +1320 self._match_text_seq("DATABLOCKSIZE") +1321 return self.expression(exp.DataBlocksizeProperty, min=False) +1322 +1323 self._match_text_seq("DATABLOCKSIZE") +1324 self._match(TokenType.EQ) +1325 size = self._parse_number() +1326 units = None +1327 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): +1328 units = self._prev.text +1329 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) 1330 -1331 def _parse_locking(self) -> exp.Expression: -1332 if self._match(TokenType.TABLE): -1333 kind = "TABLE" -1334 elif self._match(TokenType.VIEW): -1335 kind = "VIEW" -1336 elif self._match(TokenType.ROW): -1337 kind = "ROW" -1338 elif self._match_text_seq("DATABASE"): -1339 kind = "DATABASE" -1340 else: -1341 kind = None -1342 -1343 if kind in ("DATABASE", "TABLE", "VIEW"): -1344 this = self._parse_table_parts() -1345 else: -1346 this = None -1347 -1348 if self._match(TokenType.FOR): -1349 for_or_in = "FOR" -1350 elif self._match(TokenType.IN): -1351 for_or_in = "IN" -1352 else: -1353 for_or_in = None -1354 -1355 if self._match_text_seq("ACCESS"): -1356 lock_type = "ACCESS" -1357 elif self._match_texts(("EXCL", "EXCLUSIVE")): -1358 lock_type = "EXCLUSIVE" -1359 elif self._match_text_seq("SHARE"): -1360 lock_type = "SHARE" -1361 elif self._match_text_seq("READ"): -1362 lock_type = "READ" -1363 elif self._match_text_seq("WRITE"): -1364 lock_type = "WRITE" -1365 elif self._match_text_seq("CHECKSUM"): -1366 lock_type = "CHECKSUM" -1367 else: -1368 lock_type = None -1369 -1370 override = self._match_text_seq("OVERRIDE") -1371 -1372 return self.expression( -1373 exp.LockingProperty, -1374 this=this, -1375 kind=kind, -1376 for_or_in=for_or_in, -1377 lock_type=lock_type, -1378 override=override, -1379 ) -1380 -1381 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: -1382 if self._match(TokenType.PARTITION_BY): -1383 return self._parse_csv(self._parse_conjunction) -1384 return [] -1385 -1386 def _parse_partitioned_by(self) -> exp.Expression: -1387 self._match(TokenType.EQ) -1388 return self.expression( -1389 exp.PartitionedByProperty, -1390 this=self._parse_schema() or self._parse_bracket(self._parse_field()), -1391 ) -1392 -1393 def _parse_withdata(self, no=False) -> exp.Expression: -1394 if self._match_text_seq("AND", "STATISTICS"): -1395 statistics = True -1396 elif self._match_text_seq("AND", "NO", "STATISTICS"): -1397 statistics = False -1398 else: -1399 statistics = None -1400 -1401 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) -1402 -1403 def _parse_noprimaryindex(self) -> exp.Expression: -1404 self._match_text_seq("PRIMARY", "INDEX") -1405 return exp.NoPrimaryIndexProperty() -1406 -1407 def _parse_oncommit(self) -> exp.Expression: -1408 self._match_text_seq("COMMIT", "PRESERVE", "ROWS") -1409 return exp.OnCommitProperty() -1410 -1411 def _parse_distkey(self) -> exp.Expression: -1412 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) -1413 -1414 def _parse_create_like(self) -> t.Optional[exp.Expression]: -1415 table = self._parse_table(schema=True) -1416 options = [] -1417 while self._match_texts(("INCLUDING", "EXCLUDING")): -1418 this = self._prev.text.upper() -1419 id_var = self._parse_id_var() -1420 -1421 if not id_var: -1422 return None -1423 -1424 options.append( -1425 self.expression( -1426 exp.Property, -1427 this=this, -1428 value=exp.Var(this=id_var.this.upper()), -1429 ) -1430 ) -1431 return self.expression(exp.LikeProperty, this=table, expressions=options) -1432 -1433 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: -1434 return self.expression( -1435 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound -1436 ) -1437 -1438 def _parse_character_set(self, default: bool = False) -> exp.Expression: -1439 self._match(TokenType.EQ) -1440 return self.expression( -1441 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default -1442 ) -1443 -1444 def _parse_returns(self) -> exp.Expression: -1445 value: t.Optional[exp.Expression] -1446 is_table = self._match(TokenType.TABLE) -1447 -1448 if is_table: -1449 if self._match(TokenType.LT): -1450 value = self.expression( -1451 exp.Schema, -1452 this="TABLE", -1453 expressions=self._parse_csv(self._parse_struct_kwargs), -1454 ) -1455 if not self._match(TokenType.GT): -1456 self.raise_error("Expecting >") -1457 else: -1458 value = self._parse_schema(exp.Var(this="TABLE")) -1459 else: -1460 value = self._parse_types() -1461 -1462 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) -1463 -1464 def _parse_temporary(self, global_=False) -> exp.Expression: -1465 self._match(TokenType.TEMPORARY) # in case calling from "GLOBAL" -1466 return self.expression(exp.TemporaryProperty, global_=global_) -1467 -1468 def _parse_describe(self) -> exp.Expression: -1469 kind = self._match_set(self.CREATABLES) and self._prev.text -1470 this = self._parse_table() -1471 -1472 return self.expression(exp.Describe, this=this, kind=kind) +1331 def _parse_blockcompression(self) -> exp.Expression: +1332 self._match_text_seq("BLOCKCOMPRESSION") +1333 self._match(TokenType.EQ) +1334 always = self._match_text_seq("ALWAYS") +1335 manual = self._match_text_seq("MANUAL") +1336 never = self._match_text_seq("NEVER") +1337 default = self._match_text_seq("DEFAULT") +1338 autotemp = None +1339 if self._match_text_seq("AUTOTEMP"): +1340 autotemp = self._parse_schema() +1341 +1342 return self.expression( +1343 exp.BlockCompressionProperty, +1344 always=always, +1345 manual=manual, +1346 never=never, +1347 default=default, +1348 autotemp=autotemp, +1349 ) +1350 +1351 def _parse_withisolatedloading(self) -> exp.Expression: +1352 no = self._match_text_seq("NO") +1353 concurrent = self._match_text_seq("CONCURRENT") +1354 self._match_text_seq("ISOLATED", "LOADING") +1355 for_all = self._match_text_seq("FOR", "ALL") +1356 for_insert = self._match_text_seq("FOR", "INSERT") +1357 for_none = self._match_text_seq("FOR", "NONE") +1358 return self.expression( +1359 exp.IsolatedLoadingProperty, +1360 no=no, +1361 concurrent=concurrent, +1362 for_all=for_all, +1363 for_insert=for_insert, +1364 for_none=for_none, +1365 ) +1366 +1367 def _parse_locking(self) -> exp.Expression: +1368 if self._match(TokenType.TABLE): +1369 kind = "TABLE" +1370 elif self._match(TokenType.VIEW): +1371 kind = "VIEW" +1372 elif self._match(TokenType.ROW): +1373 kind = "ROW" +1374 elif self._match_text_seq("DATABASE"): +1375 kind = "DATABASE" +1376 else: +1377 kind = None +1378 +1379 if kind in ("DATABASE", "TABLE", "VIEW"): +1380 this = self._parse_table_parts() +1381 else: +1382 this = None +1383 +1384 if self._match(TokenType.FOR): +1385 for_or_in = "FOR" +1386 elif self._match(TokenType.IN): +1387 for_or_in = "IN" +1388 else: +1389 for_or_in = None +1390 +1391 if self._match_text_seq("ACCESS"): +1392 lock_type = "ACCESS" +1393 elif self._match_texts(("EXCL", "EXCLUSIVE")): +1394 lock_type = "EXCLUSIVE" +1395 elif self._match_text_seq("SHARE"): +1396 lock_type = "SHARE" +1397 elif self._match_text_seq("READ"): +1398 lock_type = "READ" +1399 elif self._match_text_seq("WRITE"): +1400 lock_type = "WRITE" +1401 elif self._match_text_seq("CHECKSUM"): +1402 lock_type = "CHECKSUM" +1403 else: +1404 lock_type = None +1405 +1406 override = self._match_text_seq("OVERRIDE") +1407 +1408 return self.expression( +1409 exp.LockingProperty, +1410 this=this, +1411 kind=kind, +1412 for_or_in=for_or_in, +1413 lock_type=lock_type, +1414 override=override, +1415 ) +1416 +1417 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: +1418 if self._match(TokenType.PARTITION_BY): +1419 return self._parse_csv(self._parse_conjunction) +1420 return [] +1421 +1422 def _parse_partitioned_by(self) -> exp.Expression: +1423 self._match(TokenType.EQ) +1424 return self.expression( +1425 exp.PartitionedByProperty, +1426 this=self._parse_schema() or self._parse_bracket(self._parse_field()), +1427 ) +1428 +1429 def _parse_withdata(self, no=False) -> exp.Expression: +1430 if self._match_text_seq("AND", "STATISTICS"): +1431 statistics = True +1432 elif self._match_text_seq("AND", "NO", "STATISTICS"): +1433 statistics = False +1434 else: +1435 statistics = None +1436 +1437 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) +1438 +1439 def _parse_noprimaryindex(self) -> exp.Expression: +1440 self._match_text_seq("PRIMARY", "INDEX") +1441 return exp.NoPrimaryIndexProperty() +1442 +1443 def _parse_oncommit(self) -> exp.Expression: +1444 self._match_text_seq("COMMIT", "PRESERVE", "ROWS") +1445 return exp.OnCommitProperty() +1446 +1447 def _parse_distkey(self) -> exp.Expression: +1448 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) +1449 +1450 def _parse_create_like(self) -> t.Optional[exp.Expression]: +1451 table = self._parse_table(schema=True) +1452 options = [] +1453 while self._match_texts(("INCLUDING", "EXCLUDING")): +1454 this = self._prev.text.upper() +1455 id_var = self._parse_id_var() +1456 +1457 if not id_var: +1458 return None +1459 +1460 options.append( +1461 self.expression( +1462 exp.Property, +1463 this=this, +1464 value=exp.Var(this=id_var.this.upper()), +1465 ) +1466 ) +1467 return self.expression(exp.LikeProperty, this=table, expressions=options) +1468 +1469 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: +1470 return self.expression( +1471 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound +1472 ) 1473 -1474 def _parse_insert(self) -> exp.Expression: -1475 overwrite = self._match(TokenType.OVERWRITE) -1476 local = self._match(TokenType.LOCAL) -1477 -1478 this: t.Optional[exp.Expression] +1474 def _parse_character_set(self, default: bool = False) -> exp.Expression: +1475 self._match(TokenType.EQ) +1476 return self.expression( +1477 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default +1478 ) 1479 -1480 alternative = None -1481 if self._match_text_seq("DIRECTORY"): -1482 this = self.expression( -1483 exp.Directory, -1484 this=self._parse_var_or_string(), -1485 local=local, -1486 row_format=self._parse_row_format(match_row=True), -1487 ) -1488 else: -1489 if self._match(TokenType.OR): -1490 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text -1491 -1492 self._match(TokenType.INTO) -1493 self._match(TokenType.TABLE) -1494 this = self._parse_table(schema=True) -1495 -1496 return self.expression( -1497 exp.Insert, -1498 this=this, -1499 exists=self._parse_exists(), -1500 partition=self._parse_partition(), -1501 expression=self._parse_ddl_select(), -1502 overwrite=overwrite, -1503 alternative=alternative, -1504 ) -1505 -1506 def _parse_row(self) -> t.Optional[exp.Expression]: -1507 if not self._match(TokenType.FORMAT): -1508 return None -1509 return self._parse_row_format() -1510 -1511 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: -1512 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): -1513 return None -1514 -1515 if self._match_text_seq("SERDE"): -1516 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) -1517 -1518 self._match_text_seq("DELIMITED") -1519 -1520 kwargs = {} -1521 -1522 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): -1523 kwargs["fields"] = self._parse_string() -1524 if self._match_text_seq("ESCAPED", "BY"): -1525 kwargs["escaped"] = self._parse_string() -1526 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): -1527 kwargs["collection_items"] = self._parse_string() -1528 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): -1529 kwargs["map_keys"] = self._parse_string() -1530 if self._match_text_seq("LINES", "TERMINATED", "BY"): -1531 kwargs["lines"] = self._parse_string() -1532 if self._match_text_seq("NULL", "DEFINED", "AS"): -1533 kwargs["null"] = self._parse_string() -1534 -1535 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore -1536 -1537 def _parse_load_data(self) -> exp.Expression: -1538 local = self._match(TokenType.LOCAL) -1539 self._match_text_seq("INPATH") -1540 inpath = self._parse_string() -1541 overwrite = self._match(TokenType.OVERWRITE) -1542 self._match_pair(TokenType.INTO, TokenType.TABLE) -1543 -1544 return self.expression( -1545 exp.LoadData, -1546 this=self._parse_table(schema=True), -1547 local=local, -1548 overwrite=overwrite, -1549 inpath=inpath, -1550 partition=self._parse_partition(), -1551 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), -1552 serde=self._match_text_seq("SERDE") and self._parse_string(), -1553 ) -1554 -1555 def _parse_delete(self) -> exp.Expression: -1556 self._match(TokenType.FROM) +1480 def _parse_returns(self) -> exp.Expression: +1481 value: t.Optional[exp.Expression] +1482 is_table = self._match(TokenType.TABLE) +1483 +1484 if is_table: +1485 if self._match(TokenType.LT): +1486 value = self.expression( +1487 exp.Schema, +1488 this="TABLE", +1489 expressions=self._parse_csv(self._parse_struct_kwargs), +1490 ) +1491 if not self._match(TokenType.GT): +1492 self.raise_error("Expecting >") +1493 else: +1494 value = self._parse_schema(exp.Var(this="TABLE")) +1495 else: +1496 value = self._parse_types() +1497 +1498 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) +1499 +1500 def _parse_temporary(self, global_=False) -> exp.Expression: +1501 self._match(TokenType.TEMPORARY) # in case calling from "GLOBAL" +1502 return self.expression(exp.TemporaryProperty, global_=global_) +1503 +1504 def _parse_describe(self) -> exp.Expression: +1505 kind = self._match_set(self.CREATABLES) and self._prev.text +1506 this = self._parse_table() +1507 +1508 return self.expression(exp.Describe, this=this, kind=kind) +1509 +1510 def _parse_insert(self) -> exp.Expression: +1511 overwrite = self._match(TokenType.OVERWRITE) +1512 local = self._match(TokenType.LOCAL) +1513 +1514 this: t.Optional[exp.Expression] +1515 +1516 alternative = None +1517 if self._match_text_seq("DIRECTORY"): +1518 this = self.expression( +1519 exp.Directory, +1520 this=self._parse_var_or_string(), +1521 local=local, +1522 row_format=self._parse_row_format(match_row=True), +1523 ) +1524 else: +1525 if self._match(TokenType.OR): +1526 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text +1527 +1528 self._match(TokenType.INTO) +1529 self._match(TokenType.TABLE) +1530 this = self._parse_table(schema=True) +1531 +1532 return self.expression( +1533 exp.Insert, +1534 this=this, +1535 exists=self._parse_exists(), +1536 partition=self._parse_partition(), +1537 expression=self._parse_ddl_select(), +1538 overwrite=overwrite, +1539 alternative=alternative, +1540 ) +1541 +1542 def _parse_row(self) -> t.Optional[exp.Expression]: +1543 if not self._match(TokenType.FORMAT): +1544 return None +1545 return self._parse_row_format() +1546 +1547 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: +1548 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): +1549 return None +1550 +1551 if self._match_text_seq("SERDE"): +1552 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) +1553 +1554 self._match_text_seq("DELIMITED") +1555 +1556 kwargs = {} 1557 -1558 return self.expression( -1559 exp.Delete, -1560 this=self._parse_table(schema=True), -1561 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), -1562 where=self._parse_where(), -1563 ) -1564 -1565 def _parse_update(self) -> exp.Expression: -1566 return self.expression( -1567 exp.Update, -1568 **{ # type: ignore -1569 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), -1570 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), -1571 "from": self._parse_from(), -1572 "where": self._parse_where(), -1573 }, -1574 ) -1575 -1576 def _parse_uncache(self) -> exp.Expression: -1577 if not self._match(TokenType.TABLE): -1578 self.raise_error("Expecting TABLE after UNCACHE") +1558 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): +1559 kwargs["fields"] = self._parse_string() +1560 if self._match_text_seq("ESCAPED", "BY"): +1561 kwargs["escaped"] = self._parse_string() +1562 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): +1563 kwargs["collection_items"] = self._parse_string() +1564 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): +1565 kwargs["map_keys"] = self._parse_string() +1566 if self._match_text_seq("LINES", "TERMINATED", "BY"): +1567 kwargs["lines"] = self._parse_string() +1568 if self._match_text_seq("NULL", "DEFINED", "AS"): +1569 kwargs["null"] = self._parse_string() +1570 +1571 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore +1572 +1573 def _parse_load_data(self) -> exp.Expression: +1574 local = self._match(TokenType.LOCAL) +1575 self._match_text_seq("INPATH") +1576 inpath = self._parse_string() +1577 overwrite = self._match(TokenType.OVERWRITE) +1578 self._match_pair(TokenType.INTO, TokenType.TABLE) 1579 1580 return self.expression( -1581 exp.Uncache, -1582 exists=self._parse_exists(), -1583 this=self._parse_table(schema=True), -1584 ) -1585 -1586 def _parse_cache(self) -> exp.Expression: -1587 lazy = self._match(TokenType.LAZY) -1588 self._match(TokenType.TABLE) -1589 table = self._parse_table(schema=True) -1590 options = [] -1591 -1592 if self._match(TokenType.OPTIONS): -1593 self._match_l_paren() -1594 k = self._parse_string() -1595 self._match(TokenType.EQ) -1596 v = self._parse_string() -1597 options = [k, v] -1598 self._match_r_paren() -1599 -1600 self._match(TokenType.ALIAS) -1601 return self.expression( -1602 exp.Cache, -1603 this=table, -1604 lazy=lazy, -1605 options=options, -1606 expression=self._parse_select(nested=True), -1607 ) -1608 -1609 def _parse_partition(self) -> t.Optional[exp.Expression]: -1610 if not self._match(TokenType.PARTITION): -1611 return None -1612 -1613 return self.expression( -1614 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) -1615 ) -1616 -1617 def _parse_value(self) -> exp.Expression: -1618 if self._match(TokenType.L_PAREN): -1619 expressions = self._parse_csv(self._parse_conjunction) -1620 self._match_r_paren() -1621 return self.expression(exp.Tuple, expressions=expressions) -1622 -1623 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. -1624 # Source: https://prestodb.io/docs/current/sql/values.html -1625 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) -1626 -1627 def _parse_select( -1628 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True -1629 ) -> t.Optional[exp.Expression]: -1630 cte = self._parse_with() -1631 if cte: -1632 this = self._parse_statement() -1633 -1634 if not this: -1635 self.raise_error("Failed to parse any statement following CTE") -1636 return cte -1637 -1638 if "with" in this.arg_types: -1639 this.set("with", cte) -1640 else: -1641 self.raise_error(f"{this.key} does not support CTE") -1642 this = cte -1643 elif self._match(TokenType.SELECT): -1644 comments = self._prev_comments -1645 -1646 hint = self._parse_hint() -1647 all_ = self._match(TokenType.ALL) -1648 distinct = self._match(TokenType.DISTINCT) -1649 -1650 if distinct: -1651 distinct = self.expression( -1652 exp.Distinct, -1653 on=self._parse_value() if self._match(TokenType.ON) else None, -1654 ) -1655 -1656 if all_ and distinct: -1657 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") +1581 exp.LoadData, +1582 this=self._parse_table(schema=True), +1583 local=local, +1584 overwrite=overwrite, +1585 inpath=inpath, +1586 partition=self._parse_partition(), +1587 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), +1588 serde=self._match_text_seq("SERDE") and self._parse_string(), +1589 ) +1590 +1591 def _parse_delete(self) -> exp.Expression: +1592 self._match(TokenType.FROM) +1593 +1594 return self.expression( +1595 exp.Delete, +1596 this=self._parse_table(schema=True), +1597 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), +1598 where=self._parse_where(), +1599 ) +1600 +1601 def _parse_update(self) -> exp.Expression: +1602 return self.expression( +1603 exp.Update, +1604 **{ # type: ignore +1605 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), +1606 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), +1607 "from": self._parse_from(), +1608 "where": self._parse_where(), +1609 }, +1610 ) +1611 +1612 def _parse_uncache(self) -> exp.Expression: +1613 if not self._match(TokenType.TABLE): +1614 self.raise_error("Expecting TABLE after UNCACHE") +1615 +1616 return self.expression( +1617 exp.Uncache, +1618 exists=self._parse_exists(), +1619 this=self._parse_table(schema=True), +1620 ) +1621 +1622 def _parse_cache(self) -> exp.Expression: +1623 lazy = self._match(TokenType.LAZY) +1624 self._match(TokenType.TABLE) +1625 table = self._parse_table(schema=True) +1626 options = [] +1627 +1628 if self._match(TokenType.OPTIONS): +1629 self._match_l_paren() +1630 k = self._parse_string() +1631 self._match(TokenType.EQ) +1632 v = self._parse_string() +1633 options = [k, v] +1634 self._match_r_paren() +1635 +1636 self._match(TokenType.ALIAS) +1637 return self.expression( +1638 exp.Cache, +1639 this=table, +1640 lazy=lazy, +1641 options=options, +1642 expression=self._parse_select(nested=True), +1643 ) +1644 +1645 def _parse_partition(self) -> t.Optional[exp.Expression]: +1646 if not self._match(TokenType.PARTITION): +1647 return None +1648 +1649 return self.expression( +1650 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) +1651 ) +1652 +1653 def _parse_value(self) -> exp.Expression: +1654 if self._match(TokenType.L_PAREN): +1655 expressions = self._parse_csv(self._parse_conjunction) +1656 self._match_r_paren() +1657 return self.expression(exp.Tuple, expressions=expressions) 1658 -1659 limit = self._parse_limit(top=True) -1660 expressions = self._parse_csv(self._parse_expression) -1661 -1662 this = self.expression( -1663 exp.Select, -1664 hint=hint, -1665 distinct=distinct, -1666 expressions=expressions, -1667 limit=limit, -1668 ) -1669 this.comments = comments -1670 -1671 into = self._parse_into() -1672 if into: -1673 this.set("into", into) -1674 -1675 from_ = self._parse_from() -1676 if from_: -1677 this.set("from", from_) -1678 -1679 self._parse_query_modifiers(this) -1680 elif (table or nested) and self._match(TokenType.L_PAREN): -1681 this = self._parse_table() if table else self._parse_select(nested=True) -1682 self._parse_query_modifiers(this) -1683 this = self._parse_set_operations(this) -1684 self._match_r_paren() +1659 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. +1660 # Source: https://prestodb.io/docs/current/sql/values.html +1661 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) +1662 +1663 def _parse_select( +1664 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True +1665 ) -> t.Optional[exp.Expression]: +1666 cte = self._parse_with() +1667 if cte: +1668 this = self._parse_statement() +1669 +1670 if not this: +1671 self.raise_error("Failed to parse any statement following CTE") +1672 return cte +1673 +1674 if "with" in this.arg_types: +1675 this.set("with", cte) +1676 else: +1677 self.raise_error(f"{this.key} does not support CTE") +1678 this = cte +1679 elif self._match(TokenType.SELECT): +1680 comments = self._prev_comments +1681 +1682 hint = self._parse_hint() +1683 all_ = self._match(TokenType.ALL) +1684 distinct = self._match(TokenType.DISTINCT) 1685 -1686 # early return so that subquery unions aren't parsed again -1687 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 -1688 # Union ALL should be a property of the top select node, not the subquery -1689 return self._parse_subquery(this, parse_alias=parse_subquery_alias) -1690 elif self._match(TokenType.VALUES): -1691 this = self.expression( -1692 exp.Values, -1693 expressions=self._parse_csv(self._parse_value), -1694 alias=self._parse_table_alias(), -1695 ) -1696 else: -1697 this = None -1698 -1699 return self._parse_set_operations(this) -1700 -1701 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: -1702 if not skip_with_token and not self._match(TokenType.WITH): -1703 return None -1704 -1705 recursive = self._match(TokenType.RECURSIVE) +1686 if distinct: +1687 distinct = self.expression( +1688 exp.Distinct, +1689 on=self._parse_value() if self._match(TokenType.ON) else None, +1690 ) +1691 +1692 if all_ and distinct: +1693 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") +1694 +1695 limit = self._parse_limit(top=True) +1696 expressions = self._parse_csv(self._parse_expression) +1697 +1698 this = self.expression( +1699 exp.Select, +1700 hint=hint, +1701 distinct=distinct, +1702 expressions=expressions, +1703 limit=limit, +1704 ) +1705 this.comments = comments 1706 -1707 expressions = [] -1708 while True: -1709 expressions.append(self._parse_cte()) +1707 into = self._parse_into() +1708 if into: +1709 this.set("into", into) 1710 -1711 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): -1712 break -1713 else: -1714 self._match(TokenType.WITH) -1715 -1716 return self.expression(exp.With, expressions=expressions, recursive=recursive) -1717 -1718 def _parse_cte(self) -> exp.Expression: -1719 alias = self._parse_table_alias() -1720 if not alias or not alias.this: -1721 self.raise_error("Expected CTE to have alias") -1722 -1723 self._match(TokenType.ALIAS) -1724 -1725 return self.expression( -1726 exp.CTE, -1727 this=self._parse_wrapped(self._parse_statement), -1728 alias=alias, -1729 ) -1730 -1731 def _parse_table_alias( -1732 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None -1733 ) -> t.Optional[exp.Expression]: -1734 any_token = self._match(TokenType.ALIAS) -1735 alias = self._parse_id_var( -1736 any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS -1737 ) -1738 index = self._index -1739 -1740 if self._match(TokenType.L_PAREN): -1741 columns = self._parse_csv(self._parse_function_parameter) -1742 self._match_r_paren() if columns else self._retreat(index) -1743 else: -1744 columns = None -1745 -1746 if not alias and not columns: -1747 return None -1748 -1749 return self.expression(exp.TableAlias, this=alias, columns=columns) -1750 -1751 def _parse_subquery( -1752 self, this: t.Optional[exp.Expression], parse_alias: bool = True -1753 ) -> exp.Expression: -1754 return self.expression( -1755 exp.Subquery, -1756 this=this, -1757 pivots=self._parse_pivots(), -1758 alias=self._parse_table_alias() if parse_alias else None, -1759 ) +1711 from_ = self._parse_from() +1712 if from_: +1713 this.set("from", from_) +1714 +1715 self._parse_query_modifiers(this) +1716 elif (table or nested) and self._match(TokenType.L_PAREN): +1717 this = self._parse_table() if table else self._parse_select(nested=True) +1718 self._parse_query_modifiers(this) +1719 this = self._parse_set_operations(this) +1720 self._match_r_paren() +1721 +1722 # early return so that subquery unions aren't parsed again +1723 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 +1724 # Union ALL should be a property of the top select node, not the subquery +1725 return self._parse_subquery(this, parse_alias=parse_subquery_alias) +1726 elif self._match(TokenType.VALUES): +1727 this = self.expression( +1728 exp.Values, +1729 expressions=self._parse_csv(self._parse_value), +1730 alias=self._parse_table_alias(), +1731 ) +1732 else: +1733 this = None +1734 +1735 return self._parse_set_operations(this) +1736 +1737 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: +1738 if not skip_with_token and not self._match(TokenType.WITH): +1739 return None +1740 +1741 recursive = self._match(TokenType.RECURSIVE) +1742 +1743 expressions = [] +1744 while True: +1745 expressions.append(self._parse_cte()) +1746 +1747 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): +1748 break +1749 else: +1750 self._match(TokenType.WITH) +1751 +1752 return self.expression(exp.With, expressions=expressions, recursive=recursive) +1753 +1754 def _parse_cte(self) -> exp.Expression: +1755 alias = self._parse_table_alias() +1756 if not alias or not alias.this: +1757 self.raise_error("Expected CTE to have alias") +1758 +1759 self._match(TokenType.ALIAS) 1760 -1761 def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None: -1762 if not isinstance(this, self.MODIFIABLES): -1763 return -1764 -1765 table = isinstance(this, exp.Table) +1761 return self.expression( +1762 exp.CTE, +1763 this=self._parse_wrapped(self._parse_statement), +1764 alias=alias, +1765 ) 1766 -1767 while True: -1768 lateral = self._parse_lateral() -1769 join = self._parse_join() -1770 comma = None if table else self._match(TokenType.COMMA) -1771 if lateral: -1772 this.append("laterals", lateral) -1773 if join: -1774 this.append("joins", join) -1775 if comma: -1776 this.args["from"].append("expressions", self._parse_table()) -1777 if not (lateral or join or comma): -1778 break -1779 -1780 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): -1781 expression = parser(self) -1782 -1783 if expression: -1784 this.set(key, expression) -1785 -1786 def _parse_hint(self) -> t.Optional[exp.Expression]: -1787 if self._match(TokenType.HINT): -1788 hints = self._parse_csv(self._parse_function) -1789 if not self._match_pair(TokenType.STAR, TokenType.SLASH): -1790 self.raise_error("Expected */ after HINT") -1791 return self.expression(exp.Hint, expressions=hints) -1792 -1793 return None -1794 -1795 def _parse_into(self) -> t.Optional[exp.Expression]: -1796 if not self._match(TokenType.INTO): -1797 return None -1798 -1799 temp = self._match(TokenType.TEMPORARY) -1800 unlogged = self._match(TokenType.UNLOGGED) -1801 self._match(TokenType.TABLE) +1767 def _parse_table_alias( +1768 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None +1769 ) -> t.Optional[exp.Expression]: +1770 any_token = self._match(TokenType.ALIAS) +1771 alias = self._parse_id_var( +1772 any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS +1773 ) +1774 index = self._index +1775 +1776 if self._match(TokenType.L_PAREN): +1777 columns = self._parse_csv(self._parse_function_parameter) +1778 self._match_r_paren() if columns else self._retreat(index) +1779 else: +1780 columns = None +1781 +1782 if not alias and not columns: +1783 return None +1784 +1785 return self.expression(exp.TableAlias, this=alias, columns=columns) +1786 +1787 def _parse_subquery( +1788 self, this: t.Optional[exp.Expression], parse_alias: bool = True +1789 ) -> exp.Expression: +1790 return self.expression( +1791 exp.Subquery, +1792 this=this, +1793 pivots=self._parse_pivots(), +1794 alias=self._parse_table_alias() if parse_alias else None, +1795 ) +1796 +1797 def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None: +1798 if not isinstance(this, self.MODIFIABLES): +1799 return +1800 +1801 table = isinstance(this, exp.Table) 1802 -1803 return self.expression( -1804 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged -1805 ) -1806 -1807 def _parse_from(self) -> t.Optional[exp.Expression]: -1808 if not self._match(TokenType.FROM): -1809 return None -1810 -1811 return self.expression( -1812 exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table) -1813 ) -1814 -1815 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: -1816 if not self._match(TokenType.MATCH_RECOGNIZE): -1817 return None -1818 self._match_l_paren() -1819 -1820 partition = self._parse_partition_by() -1821 order = self._parse_order() -1822 measures = ( -1823 self._parse_alias(self._parse_conjunction()) -1824 if self._match_text_seq("MEASURES") -1825 else None -1826 ) -1827 -1828 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): -1829 rows = exp.Var(this="ONE ROW PER MATCH") -1830 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): -1831 text = "ALL ROWS PER MATCH" -1832 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): -1833 text += f" SHOW EMPTY MATCHES" -1834 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): -1835 text += f" OMIT EMPTY MATCHES" -1836 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): -1837 text += f" WITH UNMATCHED ROWS" -1838 rows = exp.Var(this=text) -1839 else: -1840 rows = None -1841 -1842 if self._match_text_seq("AFTER", "MATCH", "SKIP"): -1843 text = "AFTER MATCH SKIP" -1844 if self._match_text_seq("PAST", "LAST", "ROW"): -1845 text += f" PAST LAST ROW" -1846 elif self._match_text_seq("TO", "NEXT", "ROW"): -1847 text += f" TO NEXT ROW" -1848 elif self._match_text_seq("TO", "FIRST"): -1849 text += f" TO FIRST {self._advance_any().text}" # type: ignore -1850 elif self._match_text_seq("TO", "LAST"): -1851 text += f" TO LAST {self._advance_any().text}" # type: ignore -1852 after = exp.Var(this=text) -1853 else: -1854 after = None +1803 while True: +1804 lateral = self._parse_lateral() +1805 join = self._parse_join() +1806 comma = None if table else self._match(TokenType.COMMA) +1807 if lateral: +1808 this.append("laterals", lateral) +1809 if join: +1810 this.append("joins", join) +1811 if comma: +1812 this.args["from"].append("expressions", self._parse_table()) +1813 if not (lateral or join or comma): +1814 break +1815 +1816 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): +1817 expression = parser(self) +1818 +1819 if expression: +1820 this.set(key, expression) +1821 +1822 def _parse_hint(self) -> t.Optional[exp.Expression]: +1823 if self._match(TokenType.HINT): +1824 hints = self._parse_csv(self._parse_function) +1825 if not self._match_pair(TokenType.STAR, TokenType.SLASH): +1826 self.raise_error("Expected */ after HINT") +1827 return self.expression(exp.Hint, expressions=hints) +1828 +1829 return None +1830 +1831 def _parse_into(self) -> t.Optional[exp.Expression]: +1832 if not self._match(TokenType.INTO): +1833 return None +1834 +1835 temp = self._match(TokenType.TEMPORARY) +1836 unlogged = self._match(TokenType.UNLOGGED) +1837 self._match(TokenType.TABLE) +1838 +1839 return self.expression( +1840 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged +1841 ) +1842 +1843 def _parse_from(self) -> t.Optional[exp.Expression]: +1844 if not self._match(TokenType.FROM): +1845 return None +1846 +1847 return self.expression( +1848 exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table) +1849 ) +1850 +1851 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: +1852 if not self._match(TokenType.MATCH_RECOGNIZE): +1853 return None +1854 self._match_l_paren() 1855 -1856 if self._match_text_seq("PATTERN"): -1857 self._match_l_paren() -1858 -1859 if not self._curr: -1860 self.raise_error("Expecting )", self._curr) -1861 -1862 paren = 1 -1863 start = self._curr -1864 -1865 while self._curr and paren > 0: -1866 if self._curr.token_type == TokenType.L_PAREN: -1867 paren += 1 -1868 if self._curr.token_type == TokenType.R_PAREN: -1869 paren -= 1 -1870 end = self._prev -1871 self._advance() -1872 if paren > 0: -1873 self.raise_error("Expecting )", self._curr) -1874 pattern = exp.Var(this=self._find_sql(start, end)) +1856 partition = self._parse_partition_by() +1857 order = self._parse_order() +1858 measures = ( +1859 self._parse_alias(self._parse_conjunction()) +1860 if self._match_text_seq("MEASURES") +1861 else None +1862 ) +1863 +1864 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): +1865 rows = exp.Var(this="ONE ROW PER MATCH") +1866 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): +1867 text = "ALL ROWS PER MATCH" +1868 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): +1869 text += f" SHOW EMPTY MATCHES" +1870 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): +1871 text += f" OMIT EMPTY MATCHES" +1872 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): +1873 text += f" WITH UNMATCHED ROWS" +1874 rows = exp.Var(this=text) 1875 else: -1876 pattern = None +1876 rows = None 1877 -1878 define = ( -1879 self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None -1880 ) -1881 self._match_r_paren() -1882 -1883 return self.expression( -1884 exp.MatchRecognize, -1885 partition_by=partition, -1886 order=order, -1887 measures=measures, -1888 rows=rows, -1889 after=after, -1890 pattern=pattern, -1891 define=define, -1892 ) -1893 -1894 def _parse_lateral(self) -> t.Optional[exp.Expression]: -1895 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) -1896 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) +1878 if self._match_text_seq("AFTER", "MATCH", "SKIP"): +1879 text = "AFTER MATCH SKIP" +1880 if self._match_text_seq("PAST", "LAST", "ROW"): +1881 text += f" PAST LAST ROW" +1882 elif self._match_text_seq("TO", "NEXT", "ROW"): +1883 text += f" TO NEXT ROW" +1884 elif self._match_text_seq("TO", "FIRST"): +1885 text += f" TO FIRST {self._advance_any().text}" # type: ignore +1886 elif self._match_text_seq("TO", "LAST"): +1887 text += f" TO LAST {self._advance_any().text}" # type: ignore +1888 after = exp.Var(this=text) +1889 else: +1890 after = None +1891 +1892 if self._match_text_seq("PATTERN"): +1893 self._match_l_paren() +1894 +1895 if not self._curr: +1896 self.raise_error("Expecting )", self._curr) 1897 -1898 if outer_apply or cross_apply: -1899 this = self._parse_select(table=True) -1900 view = None -1901 outer = not cross_apply -1902 elif self._match(TokenType.LATERAL): -1903 this = self._parse_select(table=True) -1904 view = self._match(TokenType.VIEW) -1905 outer = self._match(TokenType.OUTER) -1906 else: -1907 return None -1908 -1909 if not this: -1910 this = self._parse_function() or self._parse_id_var(any_token=False) -1911 while self._match(TokenType.DOT): -1912 this = exp.Dot( -1913 this=this, -1914 expression=self._parse_function() or self._parse_id_var(any_token=False), -1915 ) -1916 -1917 table_alias: t.Optional[exp.Expression] +1898 paren = 1 +1899 start = self._curr +1900 +1901 while self._curr and paren > 0: +1902 if self._curr.token_type == TokenType.L_PAREN: +1903 paren += 1 +1904 if self._curr.token_type == TokenType.R_PAREN: +1905 paren -= 1 +1906 end = self._prev +1907 self._advance() +1908 if paren > 0: +1909 self.raise_error("Expecting )", self._curr) +1910 pattern = exp.Var(this=self._find_sql(start, end)) +1911 else: +1912 pattern = None +1913 +1914 define = ( +1915 self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None +1916 ) +1917 self._match_r_paren() 1918 -1919 if view: -1920 table = self._parse_id_var(any_token=False) -1921 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] -1922 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) -1923 else: -1924 table_alias = self._parse_table_alias() -1925 -1926 expression = self.expression( -1927 exp.Lateral, -1928 this=this, -1929 view=view, -1930 outer=outer, -1931 alias=table_alias, -1932 ) +1919 return self.expression( +1920 exp.MatchRecognize, +1921 partition_by=partition, +1922 order=order, +1923 measures=measures, +1924 rows=rows, +1925 after=after, +1926 pattern=pattern, +1927 define=define, +1928 ) +1929 +1930 def _parse_lateral(self) -> t.Optional[exp.Expression]: +1931 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) +1932 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 1933 1934 if outer_apply or cross_apply: -1935 return self.expression(exp.Join, this=expression, side=None if cross_apply else "LEFT") -1936 -1937 return expression -1938 -1939 def _parse_join_side_and_kind( -1940 self, -1941 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: -1942 return ( -1943 self._match(TokenType.NATURAL) and self._prev, -1944 self._match_set(self.JOIN_SIDES) and self._prev, -1945 self._match_set(self.JOIN_KINDS) and self._prev, -1946 ) -1947 -1948 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: -1949 natural, side, kind = self._parse_join_side_and_kind() -1950 -1951 if not skip_join_token and not self._match(TokenType.JOIN): -1952 return None -1953 -1954 kwargs: t.Dict[ -1955 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] -1956 ] = {"this": self._parse_table()} -1957 -1958 if natural: -1959 kwargs["natural"] = True -1960 if side: -1961 kwargs["side"] = side.text -1962 if kind: -1963 kwargs["kind"] = kind.text -1964 -1965 if self._match(TokenType.ON): -1966 kwargs["on"] = self._parse_conjunction() -1967 elif self._match(TokenType.USING): -1968 kwargs["using"] = self._parse_wrapped_id_vars() +1935 this = self._parse_select(table=True) +1936 view = None +1937 outer = not cross_apply +1938 elif self._match(TokenType.LATERAL): +1939 this = self._parse_select(table=True) +1940 view = self._match(TokenType.VIEW) +1941 outer = self._match(TokenType.OUTER) +1942 else: +1943 return None +1944 +1945 if not this: +1946 this = self._parse_function() or self._parse_id_var(any_token=False) +1947 while self._match(TokenType.DOT): +1948 this = exp.Dot( +1949 this=this, +1950 expression=self._parse_function() or self._parse_id_var(any_token=False), +1951 ) +1952 +1953 table_alias: t.Optional[exp.Expression] +1954 +1955 if view: +1956 table = self._parse_id_var(any_token=False) +1957 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] +1958 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) +1959 else: +1960 table_alias = self._parse_table_alias() +1961 +1962 expression = self.expression( +1963 exp.Lateral, +1964 this=this, +1965 view=view, +1966 outer=outer, +1967 alias=table_alias, +1968 ) 1969 -1970 return self.expression(exp.Join, **kwargs) # type: ignore -1971 -1972 def _parse_index(self) -> exp.Expression: -1973 index = self._parse_id_var() -1974 self._match(TokenType.ON) -1975 self._match(TokenType.TABLE) # hive -1976 -1977 return self.expression( -1978 exp.Index, -1979 this=index, -1980 table=self.expression(exp.Table, this=self._parse_id_var()), -1981 columns=self._parse_expression(), +1970 if outer_apply or cross_apply: +1971 return self.expression(exp.Join, this=expression, side=None if cross_apply else "LEFT") +1972 +1973 return expression +1974 +1975 def _parse_join_side_and_kind( +1976 self, +1977 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: +1978 return ( +1979 self._match(TokenType.NATURAL) and self._prev, +1980 self._match_set(self.JOIN_SIDES) and self._prev, +1981 self._match_set(self.JOIN_KINDS) and self._prev, 1982 ) 1983 -1984 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: -1985 unique = self._match(TokenType.UNIQUE) -1986 primary = self._match_text_seq("PRIMARY") -1987 amp = self._match_text_seq("AMP") -1988 if not self._match(TokenType.INDEX): -1989 return None -1990 index = self._parse_id_var() -1991 columns = None -1992 if self._match(TokenType.L_PAREN, advance=False): -1993 columns = self._parse_wrapped_csv(self._parse_column) -1994 return self.expression( -1995 exp.Index, -1996 this=index, -1997 columns=columns, -1998 unique=unique, -1999 primary=primary, -2000 amp=amp, -2001 ) -2002 -2003 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: -2004 catalog = None -2005 db = None -2006 table = (not schema and self._parse_function()) or self._parse_id_var(any_token=False) +1984 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: +1985 natural, side, kind = self._parse_join_side_and_kind() +1986 +1987 if not skip_join_token and not self._match(TokenType.JOIN): +1988 return None +1989 +1990 kwargs: t.Dict[ +1991 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] +1992 ] = {"this": self._parse_table()} +1993 +1994 if natural: +1995 kwargs["natural"] = True +1996 if side: +1997 kwargs["side"] = side.text +1998 if kind: +1999 kwargs["kind"] = kind.text +2000 +2001 if self._match(TokenType.ON): +2002 kwargs["on"] = self._parse_conjunction() +2003 elif self._match(TokenType.USING): +2004 kwargs["using"] = self._parse_wrapped_id_vars() +2005 +2006 return self.expression(exp.Join, **kwargs) # type: ignore 2007 -2008 while self._match(TokenType.DOT): -2009 if catalog: -2010 # This allows nesting the table in arbitrarily many dot expressions if needed -2011 table = self.expression(exp.Dot, this=table, expression=self._parse_id_var()) -2012 else: -2013 catalog = db -2014 db = table -2015 table = self._parse_id_var() -2016 -2017 if not table: -2018 self.raise_error(f"Expected table name but got {self._curr}") +2008 def _parse_index(self) -> exp.Expression: +2009 index = self._parse_id_var() +2010 self._match(TokenType.ON) +2011 self._match(TokenType.TABLE) # hive +2012 +2013 return self.expression( +2014 exp.Index, +2015 this=index, +2016 table=self.expression(exp.Table, this=self._parse_id_var()), +2017 columns=self._parse_expression(), +2018 ) 2019 -2020 return self.expression( -2021 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() -2022 ) -2023 -2024 def _parse_table( -2025 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None -2026 ) -> t.Optional[exp.Expression]: -2027 lateral = self._parse_lateral() -2028 -2029 if lateral: -2030 return lateral -2031 -2032 unnest = self._parse_unnest() -2033 -2034 if unnest: -2035 return unnest -2036 -2037 values = self._parse_derived_table_values() +2020 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: +2021 unique = self._match(TokenType.UNIQUE) +2022 primary = self._match_text_seq("PRIMARY") +2023 amp = self._match_text_seq("AMP") +2024 if not self._match(TokenType.INDEX): +2025 return None +2026 index = self._parse_id_var() +2027 columns = None +2028 if self._match(TokenType.L_PAREN, advance=False): +2029 columns = self._parse_wrapped_csv(self._parse_column) +2030 return self.expression( +2031 exp.Index, +2032 this=index, +2033 columns=columns, +2034 unique=unique, +2035 primary=primary, +2036 amp=amp, +2037 ) 2038 -2039 if values: -2040 return values -2041 -2042 subquery = self._parse_select(table=True) +2039 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: +2040 catalog = None +2041 db = None +2042 table = (not schema and self._parse_function()) or self._parse_id_var(any_token=False) 2043 -2044 if subquery: -2045 return subquery -2046 -2047 this = self._parse_table_parts(schema=schema) -2048 -2049 if schema: -2050 return self._parse_schema(this=this) -2051 -2052 if self.alias_post_tablesample: -2053 table_sample = self._parse_table_sample() -2054 -2055 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) -2056 -2057 if alias: -2058 this.set("alias", alias) +2044 while self._match(TokenType.DOT): +2045 if catalog: +2046 # This allows nesting the table in arbitrarily many dot expressions if needed +2047 table = self.expression(exp.Dot, this=table, expression=self._parse_id_var()) +2048 else: +2049 catalog = db +2050 db = table +2051 table = self._parse_id_var() +2052 +2053 if not table: +2054 self.raise_error(f"Expected table name but got {self._curr}") +2055 +2056 return self.expression( +2057 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() +2058 ) 2059 -2060 if not this.args.get("pivots"): -2061 this.set("pivots", self._parse_pivots()) -2062 -2063 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): -2064 this.set( -2065 "hints", -2066 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), -2067 ) -2068 self._match_r_paren() +2060 def _parse_table( +2061 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None +2062 ) -> t.Optional[exp.Expression]: +2063 lateral = self._parse_lateral() +2064 +2065 if lateral: +2066 return lateral +2067 +2068 unnest = self._parse_unnest() 2069 -2070 if not self.alias_post_tablesample: -2071 table_sample = self._parse_table_sample() +2070 if unnest: +2071 return unnest 2072 -2073 if table_sample: -2074 table_sample.set("this", this) -2075 this = table_sample -2076 -2077 return this -2078 -2079 def _parse_unnest(self) -> t.Optional[exp.Expression]: -2080 if not self._match(TokenType.UNNEST): -2081 return None +2073 values = self._parse_derived_table_values() +2074 +2075 if values: +2076 return values +2077 +2078 subquery = self._parse_select(table=True) +2079 +2080 if subquery: +2081 return subquery 2082 -2083 expressions = self._parse_wrapped_csv(self._parse_column) -2084 ordinality = bool(self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY)) -2085 alias = self._parse_table_alias() -2086 -2087 if alias and self.unnest_column_only: -2088 if alias.args.get("columns"): -2089 self.raise_error("Unexpected extra column alias in unnest.") -2090 alias.set("columns", [alias.this]) -2091 alias.set("this", None) +2083 this = self._parse_table_parts(schema=schema) +2084 +2085 if schema: +2086 return self._parse_schema(this=this) +2087 +2088 if self.alias_post_tablesample: +2089 table_sample = self._parse_table_sample() +2090 +2091 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2092 -2093 offset = None -2094 if self._match_pair(TokenType.WITH, TokenType.OFFSET): -2095 self._match(TokenType.ALIAS) -2096 offset = self._parse_conjunction() -2097 -2098 return self.expression( -2099 exp.Unnest, -2100 expressions=expressions, -2101 ordinality=ordinality, -2102 alias=alias, -2103 offset=offset, -2104 ) +2093 if alias: +2094 this.set("alias", alias) +2095 +2096 if not this.args.get("pivots"): +2097 this.set("pivots", self._parse_pivots()) +2098 +2099 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): +2100 this.set( +2101 "hints", +2102 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), +2103 ) +2104 self._match_r_paren() 2105 -2106 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: -2107 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) -2108 if not is_derived and not self._match(TokenType.VALUES): -2109 return None -2110 -2111 expressions = self._parse_csv(self._parse_value) +2106 if not self.alias_post_tablesample: +2107 table_sample = self._parse_table_sample() +2108 +2109 if table_sample: +2110 table_sample.set("this", this) +2111 this = table_sample 2112 -2113 if is_derived: -2114 self._match_r_paren() -2115 -2116 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) -2117 -2118 def _parse_table_sample(self) -> t.Optional[exp.Expression]: -2119 if not self._match(TokenType.TABLE_SAMPLE): -2120 return None -2121 -2122 method = self._parse_var() -2123 bucket_numerator = None -2124 bucket_denominator = None -2125 bucket_field = None -2126 percent = None -2127 rows = None -2128 size = None -2129 seed = None -2130 -2131 self._match_l_paren() -2132 -2133 if self._match(TokenType.BUCKET): -2134 bucket_numerator = self._parse_number() -2135 self._match(TokenType.OUT_OF) -2136 bucket_denominator = bucket_denominator = self._parse_number() -2137 self._match(TokenType.ON) -2138 bucket_field = self._parse_field() -2139 else: -2140 num = self._parse_number() +2113 return this +2114 +2115 def _parse_unnest(self) -> t.Optional[exp.Expression]: +2116 if not self._match(TokenType.UNNEST): +2117 return None +2118 +2119 expressions = self._parse_wrapped_csv(self._parse_column) +2120 ordinality = bool(self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY)) +2121 alias = self._parse_table_alias() +2122 +2123 if alias and self.unnest_column_only: +2124 if alias.args.get("columns"): +2125 self.raise_error("Unexpected extra column alias in unnest.") +2126 alias.set("columns", [alias.this]) +2127 alias.set("this", None) +2128 +2129 offset = None +2130 if self._match_pair(TokenType.WITH, TokenType.OFFSET): +2131 self._match(TokenType.ALIAS) +2132 offset = self._parse_conjunction() +2133 +2134 return self.expression( +2135 exp.Unnest, +2136 expressions=expressions, +2137 ordinality=ordinality, +2138 alias=alias, +2139 offset=offset, +2140 ) 2141 -2142 if self._match(TokenType.PERCENT): -2143 percent = num -2144 elif self._match(TokenType.ROWS): -2145 rows = num -2146 else: -2147 size = num +2142 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: +2143 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) +2144 if not is_derived and not self._match(TokenType.VALUES): +2145 return None +2146 +2147 expressions = self._parse_csv(self._parse_value) 2148 -2149 self._match_r_paren() -2150 -2151 if self._match(TokenType.SEED): -2152 seed = self._parse_wrapped(self._parse_number) +2149 if is_derived: +2150 self._match_r_paren() +2151 +2152 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2153 -2154 return self.expression( -2155 exp.TableSample, -2156 method=method, -2157 bucket_numerator=bucket_numerator, -2158 bucket_denominator=bucket_denominator, -2159 bucket_field=bucket_field, -2160 percent=percent, -2161 rows=rows, -2162 size=size, -2163 seed=seed, -2164 ) -2165 -2166 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: -2167 return list(iter(self._parse_pivot, None)) +2154 def _parse_table_sample(self) -> t.Optional[exp.Expression]: +2155 if not self._match(TokenType.TABLE_SAMPLE): +2156 return None +2157 +2158 method = self._parse_var() +2159 bucket_numerator = None +2160 bucket_denominator = None +2161 bucket_field = None +2162 percent = None +2163 rows = None +2164 size = None +2165 seed = None +2166 +2167 self._match_l_paren() 2168 -2169 def _parse_pivot(self) -> t.Optional[exp.Expression]: -2170 index = self._index -2171 -2172 if self._match(TokenType.PIVOT): -2173 unpivot = False -2174 elif self._match(TokenType.UNPIVOT): -2175 unpivot = True -2176 else: -2177 return None -2178 -2179 expressions = [] -2180 field = None -2181 -2182 if not self._match(TokenType.L_PAREN): -2183 self._retreat(index) -2184 return None -2185 -2186 if unpivot: -2187 expressions = self._parse_csv(self._parse_column) -2188 else: -2189 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) -2190 -2191 if not self._match(TokenType.FOR): -2192 self.raise_error("Expecting FOR") -2193 -2194 value = self._parse_column() -2195 -2196 if not self._match(TokenType.IN): -2197 self.raise_error("Expecting IN") -2198 -2199 field = self._parse_in(value) -2200 -2201 self._match_r_paren() -2202 -2203 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) +2169 if self._match(TokenType.BUCKET): +2170 bucket_numerator = self._parse_number() +2171 self._match(TokenType.OUT_OF) +2172 bucket_denominator = bucket_denominator = self._parse_number() +2173 self._match(TokenType.ON) +2174 bucket_field = self._parse_field() +2175 else: +2176 num = self._parse_number() +2177 +2178 if self._match(TokenType.PERCENT): +2179 percent = num +2180 elif self._match(TokenType.ROWS): +2181 rows = num +2182 else: +2183 size = num +2184 +2185 self._match_r_paren() +2186 +2187 if self._match(TokenType.SEED): +2188 seed = self._parse_wrapped(self._parse_number) +2189 +2190 return self.expression( +2191 exp.TableSample, +2192 method=method, +2193 bucket_numerator=bucket_numerator, +2194 bucket_denominator=bucket_denominator, +2195 bucket_field=bucket_field, +2196 percent=percent, +2197 rows=rows, +2198 size=size, +2199 seed=seed, +2200 ) +2201 +2202 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: +2203 return list(iter(self._parse_pivot, None)) 2204 -2205 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): -2206 pivot.set("alias", self._parse_table_alias()) +2205 def _parse_pivot(self) -> t.Optional[exp.Expression]: +2206 index = self._index 2207 -2208 return pivot -2209 -2210 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: -2211 if not skip_where_token and not self._match(TokenType.WHERE): -2212 return None -2213 -2214 return self.expression( -2215 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() -2216 ) +2208 if self._match(TokenType.PIVOT): +2209 unpivot = False +2210 elif self._match(TokenType.UNPIVOT): +2211 unpivot = True +2212 else: +2213 return None +2214 +2215 expressions = [] +2216 field = None 2217 -2218 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: -2219 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): +2218 if not self._match(TokenType.L_PAREN): +2219 self._retreat(index) 2220 return None 2221 -2222 elements = defaultdict(list) -2223 -2224 while True: -2225 expressions = self._parse_csv(self._parse_conjunction) -2226 if expressions: -2227 elements["expressions"].extend(expressions) -2228 -2229 grouping_sets = self._parse_grouping_sets() -2230 if grouping_sets: -2231 elements["grouping_sets"].extend(grouping_sets) -2232 -2233 rollup = None -2234 cube = None -2235 -2236 with_ = self._match(TokenType.WITH) -2237 if self._match(TokenType.ROLLUP): -2238 rollup = with_ or self._parse_wrapped_csv(self._parse_column) -2239 elements["rollup"].extend(ensure_list(rollup)) +2222 if unpivot: +2223 expressions = self._parse_csv(self._parse_column) +2224 else: +2225 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) +2226 +2227 if not self._match(TokenType.FOR): +2228 self.raise_error("Expecting FOR") +2229 +2230 value = self._parse_column() +2231 +2232 if not self._match(TokenType.IN): +2233 self.raise_error("Expecting IN") +2234 +2235 field = self._parse_in(value) +2236 +2237 self._match_r_paren() +2238 +2239 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2240 -2241 if self._match(TokenType.CUBE): -2242 cube = with_ or self._parse_wrapped_csv(self._parse_column) -2243 elements["cube"].extend(ensure_list(cube)) -2244 -2245 if not (expressions or grouping_sets or rollup or cube): -2246 break -2247 -2248 return self.expression(exp.Group, **elements) # type: ignore +2241 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): +2242 pivot.set("alias", self._parse_table_alias()) +2243 +2244 return pivot +2245 +2246 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: +2247 if not skip_where_token and not self._match(TokenType.WHERE): +2248 return None 2249 -2250 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: -2251 if not self._match(TokenType.GROUPING_SETS): -2252 return None +2250 return self.expression( +2251 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() +2252 ) 2253 -2254 return self._parse_wrapped_csv(self._parse_grouping_set) -2255 -2256 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: -2257 if self._match(TokenType.L_PAREN): -2258 grouping_set = self._parse_csv(self._parse_column) -2259 self._match_r_paren() -2260 return self.expression(exp.Tuple, expressions=grouping_set) -2261 -2262 return self._parse_column() -2263 -2264 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: -2265 if not skip_having_token and not self._match(TokenType.HAVING): -2266 return None -2267 return self.expression(exp.Having, this=self._parse_conjunction()) +2254 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: +2255 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): +2256 return None +2257 +2258 elements = defaultdict(list) +2259 +2260 while True: +2261 expressions = self._parse_csv(self._parse_conjunction) +2262 if expressions: +2263 elements["expressions"].extend(expressions) +2264 +2265 grouping_sets = self._parse_grouping_sets() +2266 if grouping_sets: +2267 elements["grouping_sets"].extend(grouping_sets) 2268 -2269 def _parse_qualify(self) -> t.Optional[exp.Expression]: -2270 if not self._match(TokenType.QUALIFY): -2271 return None -2272 return self.expression(exp.Qualify, this=self._parse_conjunction()) -2273 -2274 def _parse_order( -2275 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False -2276 ) -> t.Optional[exp.Expression]: -2277 if not skip_order_token and not self._match(TokenType.ORDER_BY): -2278 return this -2279 -2280 return self.expression( -2281 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) -2282 ) +2269 rollup = None +2270 cube = None +2271 +2272 with_ = self._match(TokenType.WITH) +2273 if self._match(TokenType.ROLLUP): +2274 rollup = with_ or self._parse_wrapped_csv(self._parse_column) +2275 elements["rollup"].extend(ensure_list(rollup)) +2276 +2277 if self._match(TokenType.CUBE): +2278 cube = with_ or self._parse_wrapped_csv(self._parse_column) +2279 elements["cube"].extend(ensure_list(cube)) +2280 +2281 if not (expressions or grouping_sets or rollup or cube): +2282 break 2283 -2284 def _parse_sort( -2285 self, token_type: TokenType, exp_class: t.Type[exp.Expression] -2286 ) -> t.Optional[exp.Expression]: -2287 if not self._match(token_type): +2284 return self.expression(exp.Group, **elements) # type: ignore +2285 +2286 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: +2287 if not self._match(TokenType.GROUPING_SETS): 2288 return None -2289 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) -2290 -2291 def _parse_ordered(self) -> exp.Expression: -2292 this = self._parse_conjunction() -2293 self._match(TokenType.ASC) -2294 is_desc = self._match(TokenType.DESC) -2295 is_nulls_first = self._match(TokenType.NULLS_FIRST) -2296 is_nulls_last = self._match(TokenType.NULLS_LAST) -2297 desc = is_desc or False -2298 asc = not desc -2299 nulls_first = is_nulls_first or False -2300 explicitly_null_ordered = is_nulls_first or is_nulls_last -2301 if ( -2302 not explicitly_null_ordered -2303 and ( -2304 (asc and self.null_ordering == "nulls_are_small") -2305 or (desc and self.null_ordering != "nulls_are_small") -2306 ) -2307 and self.null_ordering != "nulls_are_last" -2308 ): -2309 nulls_first = True -2310 -2311 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) -2312 -2313 def _parse_limit( -2314 self, this: t.Optional[exp.Expression] = None, top: bool = False -2315 ) -> t.Optional[exp.Expression]: -2316 if self._match(TokenType.TOP if top else TokenType.LIMIT): -2317 limit_paren = self._match(TokenType.L_PAREN) -2318 limit_exp = self.expression( -2319 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() -2320 ) -2321 -2322 if limit_paren: -2323 self._match_r_paren() -2324 -2325 return limit_exp +2289 +2290 return self._parse_wrapped_csv(self._parse_grouping_set) +2291 +2292 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: +2293 if self._match(TokenType.L_PAREN): +2294 grouping_set = self._parse_csv(self._parse_column) +2295 self._match_r_paren() +2296 return self.expression(exp.Tuple, expressions=grouping_set) +2297 +2298 return self._parse_column() +2299 +2300 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: +2301 if not skip_having_token and not self._match(TokenType.HAVING): +2302 return None +2303 return self.expression(exp.Having, this=self._parse_conjunction()) +2304 +2305 def _parse_qualify(self) -> t.Optional[exp.Expression]: +2306 if not self._match(TokenType.QUALIFY): +2307 return None +2308 return self.expression(exp.Qualify, this=self._parse_conjunction()) +2309 +2310 def _parse_order( +2311 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False +2312 ) -> t.Optional[exp.Expression]: +2313 if not skip_order_token and not self._match(TokenType.ORDER_BY): +2314 return this +2315 +2316 return self.expression( +2317 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) +2318 ) +2319 +2320 def _parse_sort( +2321 self, token_type: TokenType, exp_class: t.Type[exp.Expression] +2322 ) -> t.Optional[exp.Expression]: +2323 if not self._match(token_type): +2324 return None +2325 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2326 -2327 if self._match(TokenType.FETCH): -2328 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) -2329 direction = self._prev.text if direction else "FIRST" -2330 count = self._parse_number() -2331 self._match_set((TokenType.ROW, TokenType.ROWS)) -2332 self._match(TokenType.ONLY) -2333 return self.expression(exp.Fetch, direction=direction, count=count) -2334 -2335 return this -2336 -2337 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: -2338 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): -2339 return this -2340 -2341 count = self._parse_number() -2342 self._match_set((TokenType.ROW, TokenType.ROWS)) -2343 return self.expression(exp.Offset, this=this, expression=count) -2344 -2345 def _parse_lock(self) -> t.Optional[exp.Expression]: -2346 if self._match_text_seq("FOR", "UPDATE"): -2347 return self.expression(exp.Lock, update=True) -2348 if self._match_text_seq("FOR", "SHARE"): -2349 return self.expression(exp.Lock, update=False) -2350 -2351 return None -2352 -2353 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: -2354 if not self._match_set(self.SET_OPERATIONS): -2355 return this -2356 -2357 token_type = self._prev.token_type -2358 -2359 if token_type == TokenType.UNION: -2360 expression = exp.Union -2361 elif token_type == TokenType.EXCEPT: -2362 expression = exp.Except -2363 else: -2364 expression = exp.Intersect -2365 -2366 return self.expression( -2367 expression, -2368 this=this, -2369 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), -2370 expression=self._parse_set_operations(self._parse_select(nested=True)), -2371 ) +2327 def _parse_ordered(self) -> exp.Expression: +2328 this = self._parse_conjunction() +2329 self._match(TokenType.ASC) +2330 is_desc = self._match(TokenType.DESC) +2331 is_nulls_first = self._match(TokenType.NULLS_FIRST) +2332 is_nulls_last = self._match(TokenType.NULLS_LAST) +2333 desc = is_desc or False +2334 asc = not desc +2335 nulls_first = is_nulls_first or False +2336 explicitly_null_ordered = is_nulls_first or is_nulls_last +2337 if ( +2338 not explicitly_null_ordered +2339 and ( +2340 (asc and self.null_ordering == "nulls_are_small") +2341 or (desc and self.null_ordering != "nulls_are_small") +2342 ) +2343 and self.null_ordering != "nulls_are_last" +2344 ): +2345 nulls_first = True +2346 +2347 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) +2348 +2349 def _parse_limit( +2350 self, this: t.Optional[exp.Expression] = None, top: bool = False +2351 ) -> t.Optional[exp.Expression]: +2352 if self._match(TokenType.TOP if top else TokenType.LIMIT): +2353 limit_paren = self._match(TokenType.L_PAREN) +2354 limit_exp = self.expression( +2355 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() +2356 ) +2357 +2358 if limit_paren: +2359 self._match_r_paren() +2360 +2361 return limit_exp +2362 +2363 if self._match(TokenType.FETCH): +2364 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) +2365 direction = self._prev.text if direction else "FIRST" +2366 count = self._parse_number() +2367 self._match_set((TokenType.ROW, TokenType.ROWS)) +2368 self._match(TokenType.ONLY) +2369 return self.expression(exp.Fetch, direction=direction, count=count) +2370 +2371 return this 2372 -2373 def _parse_expression(self) -> t.Optional[exp.Expression]: -2374 return self._parse_alias(self._parse_conjunction()) -2375 -2376 def _parse_conjunction(self) -> t.Optional[exp.Expression]: -2377 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) -2378 -2379 def _parse_equality(self) -> t.Optional[exp.Expression]: -2380 return self._parse_tokens(self._parse_comparison, self.EQUALITY) -2381 -2382 def _parse_comparison(self) -> t.Optional[exp.Expression]: -2383 return self._parse_tokens(self._parse_range, self.COMPARISON) -2384 -2385 def _parse_range(self) -> t.Optional[exp.Expression]: -2386 this = self._parse_bitwise() -2387 negate = self._match(TokenType.NOT) +2373 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: +2374 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): +2375 return this +2376 +2377 count = self._parse_number() +2378 self._match_set((TokenType.ROW, TokenType.ROWS)) +2379 return self.expression(exp.Offset, this=this, expression=count) +2380 +2381 def _parse_lock(self) -> t.Optional[exp.Expression]: +2382 if self._match_text_seq("FOR", "UPDATE"): +2383 return self.expression(exp.Lock, update=True) +2384 if self._match_text_seq("FOR", "SHARE"): +2385 return self.expression(exp.Lock, update=False) +2386 +2387 return None 2388 -2389 if self._match_set(self.RANGE_PARSERS): -2390 this = self.RANGE_PARSERS[self._prev.token_type](self, this) -2391 elif self._match(TokenType.ISNULL): -2392 this = self.expression(exp.Is, this=this, expression=exp.Null()) -2393 -2394 # Postgres supports ISNULL and NOTNULL for conditions. -2395 # https://blog.andreiavram.ro/postgresql-null-composite-type/ -2396 if self._match(TokenType.NOTNULL): -2397 this = self.expression(exp.Is, this=this, expression=exp.Null()) -2398 this = self.expression(exp.Not, this=this) -2399 -2400 if negate: -2401 this = self.expression(exp.Not, this=this) -2402 -2403 if self._match(TokenType.IS): -2404 this = self._parse_is(this) -2405 -2406 return this -2407 -2408 def _parse_is(self, this: t.Optional[exp.Expression]) -> exp.Expression: -2409 negate = self._match(TokenType.NOT) -2410 if self._match(TokenType.DISTINCT_FROM): -2411 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ -2412 return self.expression(klass, this=this, expression=self._parse_expression()) -2413 -2414 this = self.expression( -2415 exp.Is, -2416 this=this, -2417 expression=self._parse_null() or self._parse_boolean(), -2418 ) -2419 return self.expression(exp.Not, this=this) if negate else this +2389 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: +2390 if not self._match_set(self.SET_OPERATIONS): +2391 return this +2392 +2393 token_type = self._prev.token_type +2394 +2395 if token_type == TokenType.UNION: +2396 expression = exp.Union +2397 elif token_type == TokenType.EXCEPT: +2398 expression = exp.Except +2399 else: +2400 expression = exp.Intersect +2401 +2402 return self.expression( +2403 expression, +2404 this=this, +2405 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), +2406 expression=self._parse_set_operations(self._parse_select(nested=True)), +2407 ) +2408 +2409 def _parse_expression(self) -> t.Optional[exp.Expression]: +2410 return self._parse_alias(self._parse_conjunction()) +2411 +2412 def _parse_conjunction(self) -> t.Optional[exp.Expression]: +2413 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) +2414 +2415 def _parse_equality(self) -> t.Optional[exp.Expression]: +2416 return self._parse_tokens(self._parse_comparison, self.EQUALITY) +2417 +2418 def _parse_comparison(self) -> t.Optional[exp.Expression]: +2419 return self._parse_tokens(self._parse_range, self.COMPARISON) 2420 -2421 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: -2422 unnest = self._parse_unnest() -2423 if unnest: -2424 this = self.expression(exp.In, this=this, unnest=unnest) -2425 elif self._match(TokenType.L_PAREN): -2426 expressions = self._parse_csv(self._parse_select_or_expression) -2427 -2428 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): -2429 this = self.expression(exp.In, this=this, query=expressions[0]) -2430 else: -2431 this = self.expression(exp.In, this=this, expressions=expressions) -2432 -2433 self._match_r_paren() -2434 else: -2435 this = self.expression(exp.In, this=this, field=self._parse_field()) -2436 -2437 return this +2421 def _parse_range(self) -> t.Optional[exp.Expression]: +2422 this = self._parse_bitwise() +2423 negate = self._match(TokenType.NOT) +2424 +2425 if self._match_set(self.RANGE_PARSERS): +2426 this = self.RANGE_PARSERS[self._prev.token_type](self, this) +2427 elif self._match(TokenType.ISNULL): +2428 this = self.expression(exp.Is, this=this, expression=exp.Null()) +2429 +2430 # Postgres supports ISNULL and NOTNULL for conditions. +2431 # https://blog.andreiavram.ro/postgresql-null-composite-type/ +2432 if self._match(TokenType.NOTNULL): +2433 this = self.expression(exp.Is, this=this, expression=exp.Null()) +2434 this = self.expression(exp.Not, this=this) +2435 +2436 if negate: +2437 this = self.expression(exp.Not, this=this) 2438 -2439 def _parse_between(self, this: exp.Expression) -> exp.Expression: -2440 low = self._parse_bitwise() -2441 self._match(TokenType.AND) -2442 high = self._parse_bitwise() -2443 return self.expression(exp.Between, this=this, low=low, high=high) -2444 -2445 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: -2446 if not self._match(TokenType.ESCAPE): -2447 return this -2448 return self.expression(exp.Escape, this=this, expression=self._parse_string()) +2439 if self._match(TokenType.IS): +2440 this = self._parse_is(this) +2441 +2442 return this +2443 +2444 def _parse_is(self, this: t.Optional[exp.Expression]) -> exp.Expression: +2445 negate = self._match(TokenType.NOT) +2446 if self._match(TokenType.DISTINCT_FROM): +2447 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ +2448 return self.expression(klass, this=this, expression=self._parse_expression()) 2449 -2450 def _parse_bitwise(self) -> t.Optional[exp.Expression]: -2451 this = self._parse_term() -2452 -2453 while True: -2454 if self._match_set(self.BITWISE): -2455 this = self.expression( -2456 self.BITWISE[self._prev.token_type], -2457 this=this, -2458 expression=self._parse_term(), -2459 ) -2460 elif self._match_pair(TokenType.LT, TokenType.LT): -2461 this = self.expression( -2462 exp.BitwiseLeftShift, this=this, expression=self._parse_term() -2463 ) -2464 elif self._match_pair(TokenType.GT, TokenType.GT): -2465 this = self.expression( -2466 exp.BitwiseRightShift, this=this, expression=self._parse_term() -2467 ) -2468 else: -2469 break -2470 -2471 return this +2450 this = self.expression( +2451 exp.Is, +2452 this=this, +2453 expression=self._parse_null() or self._parse_boolean(), +2454 ) +2455 return self.expression(exp.Not, this=this) if negate else this +2456 +2457 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: +2458 unnest = self._parse_unnest() +2459 if unnest: +2460 this = self.expression(exp.In, this=this, unnest=unnest) +2461 elif self._match(TokenType.L_PAREN): +2462 expressions = self._parse_csv(self._parse_select_or_expression) +2463 +2464 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): +2465 this = self.expression(exp.In, this=this, query=expressions[0]) +2466 else: +2467 this = self.expression(exp.In, this=this, expressions=expressions) +2468 +2469 self._match_r_paren() +2470 else: +2471 this = self.expression(exp.In, this=this, field=self._parse_field()) 2472 -2473 def _parse_term(self) -> t.Optional[exp.Expression]: -2474 return self._parse_tokens(self._parse_factor, self.TERM) -2475 -2476 def _parse_factor(self) -> t.Optional[exp.Expression]: -2477 return self._parse_tokens(self._parse_unary, self.FACTOR) -2478 -2479 def _parse_unary(self) -> t.Optional[exp.Expression]: -2480 if self._match_set(self.UNARY_PARSERS): -2481 return self.UNARY_PARSERS[self._prev.token_type](self) -2482 return self._parse_at_time_zone(self._parse_type()) -2483 -2484 def _parse_type(self) -> t.Optional[exp.Expression]: -2485 if self._match(TokenType.INTERVAL): -2486 return self.expression(exp.Interval, this=self._parse_term(), unit=self._parse_var()) -2487 -2488 index = self._index -2489 type_token = self._parse_types(check_func=True) -2490 this = self._parse_column() -2491 -2492 if type_token: -2493 if this and not isinstance(this, exp.Star): -2494 return self.expression(exp.Cast, this=this, to=type_token) -2495 if not type_token.args.get("expressions"): -2496 self._retreat(index) -2497 return self._parse_column() -2498 return type_token -2499 -2500 return this -2501 -2502 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: -2503 index = self._index -2504 -2505 prefix = self._match_text_seq("SYSUDTLIB", ".") +2473 return this +2474 +2475 def _parse_between(self, this: exp.Expression) -> exp.Expression: +2476 low = self._parse_bitwise() +2477 self._match(TokenType.AND) +2478 high = self._parse_bitwise() +2479 return self.expression(exp.Between, this=this, low=low, high=high) +2480 +2481 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: +2482 if not self._match(TokenType.ESCAPE): +2483 return this +2484 return self.expression(exp.Escape, this=this, expression=self._parse_string()) +2485 +2486 def _parse_bitwise(self) -> t.Optional[exp.Expression]: +2487 this = self._parse_term() +2488 +2489 while True: +2490 if self._match_set(self.BITWISE): +2491 this = self.expression( +2492 self.BITWISE[self._prev.token_type], +2493 this=this, +2494 expression=self._parse_term(), +2495 ) +2496 elif self._match_pair(TokenType.LT, TokenType.LT): +2497 this = self.expression( +2498 exp.BitwiseLeftShift, this=this, expression=self._parse_term() +2499 ) +2500 elif self._match_pair(TokenType.GT, TokenType.GT): +2501 this = self.expression( +2502 exp.BitwiseRightShift, this=this, expression=self._parse_term() +2503 ) +2504 else: +2505 break 2506 -2507 if not self._match_set(self.TYPE_TOKENS): -2508 return None -2509 -2510 type_token = self._prev.token_type +2507 return this +2508 +2509 def _parse_term(self) -> t.Optional[exp.Expression]: +2510 return self._parse_tokens(self._parse_factor, self.TERM) 2511 -2512 if type_token == TokenType.PSEUDO_TYPE: -2513 return self.expression(exp.PseudoType, this=self._prev.text) +2512 def _parse_factor(self) -> t.Optional[exp.Expression]: +2513 return self._parse_tokens(self._parse_unary, self.FACTOR) 2514 -2515 nested = type_token in self.NESTED_TYPE_TOKENS -2516 is_struct = type_token == TokenType.STRUCT -2517 expressions = None -2518 maybe_func = False +2515 def _parse_unary(self) -> t.Optional[exp.Expression]: +2516 if self._match_set(self.UNARY_PARSERS): +2517 return self.UNARY_PARSERS[self._prev.token_type](self) +2518 return self._parse_at_time_zone(self._parse_type()) 2519 -2520 if self._match(TokenType.L_PAREN): -2521 if is_struct: -2522 expressions = self._parse_csv(self._parse_struct_kwargs) -2523 elif nested: -2524 expressions = self._parse_csv(self._parse_types) -2525 else: -2526 expressions = self._parse_csv(self._parse_conjunction) +2520 def _parse_type(self) -> t.Optional[exp.Expression]: +2521 if self._match(TokenType.INTERVAL): +2522 return self.expression(exp.Interval, this=self._parse_term(), unit=self._parse_var()) +2523 +2524 index = self._index +2525 type_token = self._parse_types(check_func=True) +2526 this = self._parse_column() 2527 -2528 if not expressions: -2529 self._retreat(index) -2530 return None -2531 -2532 self._match_r_paren() -2533 maybe_func = True -2534 -2535 if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): -2536 this = exp.DataType( -2537 this=exp.DataType.Type.ARRAY, -2538 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], -2539 nested=True, -2540 ) -2541 -2542 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): -2543 this = exp.DataType( -2544 this=exp.DataType.Type.ARRAY, -2545 expressions=[this], -2546 nested=True, -2547 ) -2548 -2549 return this +2528 if type_token: +2529 if this and not isinstance(this, exp.Star): +2530 return self.expression(exp.Cast, this=this, to=type_token) +2531 if not type_token.args.get("expressions"): +2532 self._retreat(index) +2533 return self._parse_column() +2534 return type_token +2535 +2536 return this +2537 +2538 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: +2539 index = self._index +2540 +2541 prefix = self._match_text_seq("SYSUDTLIB", ".") +2542 +2543 if not self._match_set(self.TYPE_TOKENS): +2544 return None +2545 +2546 type_token = self._prev.token_type +2547 +2548 if type_token == TokenType.PSEUDO_TYPE: +2549 return self.expression(exp.PseudoType, this=self._prev.text) 2550 -2551 if self._match(TokenType.L_BRACKET): -2552 self._retreat(index) -2553 return None -2554 -2555 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None -2556 if nested and self._match(TokenType.LT): +2551 nested = type_token in self.NESTED_TYPE_TOKENS +2552 is_struct = type_token == TokenType.STRUCT +2553 expressions = None +2554 maybe_func = False +2555 +2556 if self._match(TokenType.L_PAREN): 2557 if is_struct: 2558 expressions = self._parse_csv(self._parse_struct_kwargs) -2559 else: +2559 elif nested: 2560 expressions = self._parse_csv(self._parse_types) -2561 -2562 if not self._match(TokenType.GT): -2563 self.raise_error("Expecting >") -2564 -2565 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): -2566 values = self._parse_csv(self._parse_conjunction) -2567 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) -2568 -2569 value: t.Optional[exp.Expression] = None -2570 if type_token in self.TIMESTAMPS: -2571 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: -2572 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) -2573 elif ( -2574 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ -2575 ): -2576 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) -2577 elif self._match(TokenType.WITHOUT_TIME_ZONE): -2578 if type_token == TokenType.TIME: -2579 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) -2580 else: -2581 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) -2582 -2583 maybe_func = maybe_func and value is None +2561 else: +2562 expressions = self._parse_csv(self._parse_conjunction) +2563 +2564 if not expressions: +2565 self._retreat(index) +2566 return None +2567 +2568 self._match_r_paren() +2569 maybe_func = True +2570 +2571 if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): +2572 this = exp.DataType( +2573 this=exp.DataType.Type.ARRAY, +2574 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], +2575 nested=True, +2576 ) +2577 +2578 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): +2579 this = exp.DataType( +2580 this=exp.DataType.Type.ARRAY, +2581 expressions=[this], +2582 nested=True, +2583 ) 2584 -2585 if value is None: -2586 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) -2587 elif type_token == TokenType.INTERVAL: -2588 value = self.expression(exp.Interval, unit=self._parse_var()) -2589 -2590 if maybe_func and check_func: -2591 index2 = self._index -2592 peek = self._parse_string() -2593 -2594 if not peek: -2595 self._retreat(index) -2596 return None +2585 return this +2586 +2587 if self._match(TokenType.L_BRACKET): +2588 self._retreat(index) +2589 return None +2590 +2591 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None +2592 if nested and self._match(TokenType.LT): +2593 if is_struct: +2594 expressions = self._parse_csv(self._parse_struct_kwargs) +2595 else: +2596 expressions = self._parse_csv(self._parse_types) 2597 -2598 self._retreat(index2) -2599 -2600 if value: -2601 return value -2602 -2603 return exp.DataType( -2604 this=exp.DataType.Type[type_token.value.upper()], -2605 expressions=expressions, -2606 nested=nested, -2607 values=values, -2608 prefix=prefix, -2609 ) -2610 -2611 def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]: -2612 if self._curr and self._curr.token_type in self.TYPE_TOKENS: -2613 return self._parse_types() -2614 -2615 this = self._parse_id_var() -2616 self._match(TokenType.COLON) -2617 data_type = self._parse_types() +2598 if not self._match(TokenType.GT): +2599 self.raise_error("Expecting >") +2600 +2601 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): +2602 values = self._parse_csv(self._parse_conjunction) +2603 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) +2604 +2605 value: t.Optional[exp.Expression] = None +2606 if type_token in self.TIMESTAMPS: +2607 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: +2608 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) +2609 elif ( +2610 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ +2611 ): +2612 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) +2613 elif self._match(TokenType.WITHOUT_TIME_ZONE): +2614 if type_token == TokenType.TIME: +2615 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) +2616 else: +2617 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2618 -2619 if not data_type: -2620 return None -2621 return self.expression(exp.StructKwarg, this=this, expression=data_type) -2622 -2623 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: -2624 if not self._match(TokenType.AT_TIME_ZONE): -2625 return this -2626 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) -2627 -2628 def _parse_column(self) -> t.Optional[exp.Expression]: -2629 this = self._parse_field() -2630 if isinstance(this, exp.Identifier): -2631 this = self.expression(exp.Column, this=this) -2632 elif not this: -2633 return self._parse_bracket(this) -2634 this = self._parse_bracket(this) +2619 maybe_func = maybe_func and value is None +2620 +2621 if value is None: +2622 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) +2623 elif type_token == TokenType.INTERVAL: +2624 value = self.expression(exp.Interval, unit=self._parse_var()) +2625 +2626 if maybe_func and check_func: +2627 index2 = self._index +2628 peek = self._parse_string() +2629 +2630 if not peek: +2631 self._retreat(index) +2632 return None +2633 +2634 self._retreat(index2) 2635 -2636 while self._match_set(self.COLUMN_OPERATORS): -2637 op_token = self._prev.token_type -2638 op = self.COLUMN_OPERATORS.get(op_token) -2639 -2640 if op_token == TokenType.DCOLON: -2641 field = self._parse_types() -2642 if not field: -2643 self.raise_error("Expected type") -2644 elif op: -2645 self._advance() -2646 value = self._prev.text -2647 field = ( -2648 exp.Literal.number(value) -2649 if self._prev.token_type == TokenType.NUMBER -2650 else exp.Literal.string(value) -2651 ) -2652 else: -2653 field = self._parse_star() or self._parse_function() or self._parse_id_var() +2636 if value: +2637 return value +2638 +2639 return exp.DataType( +2640 this=exp.DataType.Type[type_token.value.upper()], +2641 expressions=expressions, +2642 nested=nested, +2643 values=values, +2644 prefix=prefix, +2645 ) +2646 +2647 def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]: +2648 if self._curr and self._curr.token_type in self.TYPE_TOKENS: +2649 return self._parse_types() +2650 +2651 this = self._parse_id_var() +2652 self._match(TokenType.COLON) +2653 data_type = self._parse_types() 2654 -2655 if isinstance(field, exp.Func): -2656 # bigquery allows function calls like x.y.count(...) -2657 # SAFE.SUBSTR(...) -2658 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules -2659 this = self._replace_columns_with_dots(this) -2660 -2661 if op: -2662 this = op(self, this, field) -2663 elif isinstance(this, exp.Column) and not this.args.get("catalog"): -2664 this = self.expression( -2665 exp.Column, -2666 this=field, -2667 table=this.this, -2668 db=this.args.get("table"), -2669 catalog=this.args.get("db"), -2670 ) -2671 else: -2672 this = self.expression(exp.Dot, this=this, expression=field) -2673 this = self._parse_bracket(this) -2674 -2675 return this -2676 -2677 def _parse_primary(self) -> t.Optional[exp.Expression]: -2678 if self._match_set(self.PRIMARY_PARSERS): -2679 token_type = self._prev.token_type -2680 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) -2681 -2682 if token_type == TokenType.STRING: -2683 expressions = [primary] -2684 while self._match(TokenType.STRING): -2685 expressions.append(exp.Literal.string(self._prev.text)) -2686 if len(expressions) > 1: -2687 return self.expression(exp.Concat, expressions=expressions) -2688 return primary -2689 -2690 if self._match_pair(TokenType.DOT, TokenType.NUMBER): -2691 return exp.Literal.number(f"0.{self._prev.text}") -2692 -2693 if self._match(TokenType.L_PAREN): -2694 comments = self._prev_comments -2695 query = self._parse_select() +2655 if not data_type: +2656 return None +2657 return self.expression(exp.StructKwarg, this=this, expression=data_type) +2658 +2659 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: +2660 if not self._match(TokenType.AT_TIME_ZONE): +2661 return this +2662 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) +2663 +2664 def _parse_column(self) -> t.Optional[exp.Expression]: +2665 this = self._parse_field() +2666 if isinstance(this, exp.Identifier): +2667 this = self.expression(exp.Column, this=this) +2668 elif not this: +2669 return self._parse_bracket(this) +2670 this = self._parse_bracket(this) +2671 +2672 while self._match_set(self.COLUMN_OPERATORS): +2673 op_token = self._prev.token_type +2674 op = self.COLUMN_OPERATORS.get(op_token) +2675 +2676 if op_token == TokenType.DCOLON: +2677 field = self._parse_types() +2678 if not field: +2679 self.raise_error("Expected type") +2680 elif op: +2681 self._advance() +2682 value = self._prev.text +2683 field = ( +2684 exp.Literal.number(value) +2685 if self._prev.token_type == TokenType.NUMBER +2686 else exp.Literal.string(value) +2687 ) +2688 else: +2689 field = self._parse_star() or self._parse_function() or self._parse_id_var() +2690 +2691 if isinstance(field, exp.Func): +2692 # bigquery allows function calls like x.y.count(...) +2693 # SAFE.SUBSTR(...) +2694 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules +2695 this = self._replace_columns_with_dots(this) 2696 -2697 if query: -2698 expressions = [query] -2699 else: -2700 expressions = self._parse_csv( -2701 lambda: self._parse_alias(self._parse_conjunction(), explicit=True) -2702 ) -2703 -2704 this = seq_get(expressions, 0) -2705 self._parse_query_modifiers(this) -2706 self._match_r_paren() -2707 -2708 if isinstance(this, exp.Subqueryable): -2709 this = self._parse_set_operations( -2710 self._parse_subquery(this=this, parse_alias=False) -2711 ) -2712 elif len(expressions) > 1: -2713 this = self.expression(exp.Tuple, expressions=expressions) -2714 else: -2715 this = self.expression(exp.Paren, this=this) -2716 -2717 if this and comments: -2718 this.comments = comments -2719 -2720 return this -2721 -2722 return None -2723 -2724 def _parse_field(self, any_token: bool = False) -> t.Optional[exp.Expression]: -2725 return self._parse_primary() or self._parse_function() or self._parse_id_var(any_token) -2726 -2727 def _parse_function( -2728 self, functions: t.Optional[t.Dict[str, t.Callable]] = None -2729 ) -> t.Optional[exp.Expression]: -2730 if not self._curr: -2731 return None +2697 if op: +2698 this = op(self, this, field) +2699 elif isinstance(this, exp.Column) and not this.args.get("catalog"): +2700 this = self.expression( +2701 exp.Column, +2702 this=field, +2703 table=this.this, +2704 db=this.args.get("table"), +2705 catalog=this.args.get("db"), +2706 ) +2707 else: +2708 this = self.expression(exp.Dot, this=this, expression=field) +2709 this = self._parse_bracket(this) +2710 +2711 return this +2712 +2713 def _parse_primary(self) -> t.Optional[exp.Expression]: +2714 if self._match_set(self.PRIMARY_PARSERS): +2715 token_type = self._prev.token_type +2716 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) +2717 +2718 if token_type == TokenType.STRING: +2719 expressions = [primary] +2720 while self._match(TokenType.STRING): +2721 expressions.append(exp.Literal.string(self._prev.text)) +2722 if len(expressions) > 1: +2723 return self.expression(exp.Concat, expressions=expressions) +2724 return primary +2725 +2726 if self._match_pair(TokenType.DOT, TokenType.NUMBER): +2727 return exp.Literal.number(f"0.{self._prev.text}") +2728 +2729 if self._match(TokenType.L_PAREN): +2730 comments = self._prev_comments +2731 query = self._parse_select() 2732 -2733 token_type = self._curr.token_type -2734 -2735 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): -2736 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) -2737 -2738 if not self._next or self._next.token_type != TokenType.L_PAREN: -2739 if token_type in self.NO_PAREN_FUNCTIONS: -2740 self._advance() -2741 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) -2742 -2743 return None -2744 -2745 if token_type not in self.FUNC_TOKENS: -2746 return None -2747 -2748 this = self._curr.text -2749 upper = this.upper() -2750 self._advance(2) -2751 -2752 parser = self.FUNCTION_PARSERS.get(upper) -2753 -2754 if parser: -2755 this = parser(self) -2756 else: -2757 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) -2758 -2759 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): -2760 this = self.expression(subquery_predicate, this=self._parse_select()) -2761 self._match_r_paren() -2762 return this -2763 -2764 if functions is None: -2765 functions = self.FUNCTIONS -2766 -2767 function = functions.get(upper) -2768 args = self._parse_csv(self._parse_lambda) -2769 -2770 if function: -2771 # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the -2772 # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists. -2773 if count_params(function) == 2: -2774 params = None -2775 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): -2776 params = self._parse_csv(self._parse_lambda) -2777 -2778 this = function(args, params) -2779 else: -2780 this = function(args) -2781 -2782 self.validate_expression(this, args) -2783 else: -2784 this = self.expression(exp.Anonymous, this=this, expressions=args) -2785 -2786 self._match_r_paren(this) -2787 return self._parse_window(this) -2788 -2789 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: -2790 return self._parse_column_def(self._parse_id_var()) -2791 -2792 def _parse_user_defined_function( -2793 self, kind: t.Optional[TokenType] = None -2794 ) -> t.Optional[exp.Expression]: -2795 this = self._parse_id_var() -2796 -2797 while self._match(TokenType.DOT): -2798 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) +2733 if query: +2734 expressions = [query] +2735 else: +2736 expressions = self._parse_csv( +2737 lambda: self._parse_alias(self._parse_conjunction(), explicit=True) +2738 ) +2739 +2740 this = seq_get(expressions, 0) +2741 self._parse_query_modifiers(this) +2742 self._match_r_paren() +2743 +2744 if isinstance(this, exp.Subqueryable): +2745 this = self._parse_set_operations( +2746 self._parse_subquery(this=this, parse_alias=False) +2747 ) +2748 elif len(expressions) > 1: +2749 this = self.expression(exp.Tuple, expressions=expressions) +2750 else: +2751 this = self.expression(exp.Paren, this=this) +2752 +2753 if this and comments: +2754 this.comments = comments +2755 +2756 return this +2757 +2758 return None +2759 +2760 def _parse_field(self, any_token: bool = False) -> t.Optional[exp.Expression]: +2761 return self._parse_primary() or self._parse_function() or self._parse_id_var(any_token) +2762 +2763 def _parse_function( +2764 self, functions: t.Optional[t.Dict[str, t.Callable]] = None +2765 ) -> t.Optional[exp.Expression]: +2766 if not self._curr: +2767 return None +2768 +2769 token_type = self._curr.token_type +2770 +2771 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): +2772 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) +2773 +2774 if not self._next or self._next.token_type != TokenType.L_PAREN: +2775 if token_type in self.NO_PAREN_FUNCTIONS: +2776 self._advance() +2777 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) +2778 +2779 return None +2780 +2781 if token_type not in self.FUNC_TOKENS: +2782 return None +2783 +2784 this = self._curr.text +2785 upper = this.upper() +2786 self._advance(2) +2787 +2788 parser = self.FUNCTION_PARSERS.get(upper) +2789 +2790 if parser: +2791 this = parser(self) +2792 else: +2793 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) +2794 +2795 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): +2796 this = self.expression(subquery_predicate, this=self._parse_select()) +2797 self._match_r_paren() +2798 return this 2799 -2800 if not self._match(TokenType.L_PAREN): -2801 return this +2800 if functions is None: +2801 functions = self.FUNCTIONS 2802 -2803 expressions = self._parse_csv(self._parse_function_parameter) -2804 self._match_r_paren() -2805 return self.expression( -2806 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True -2807 ) -2808 -2809 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: -2810 literal = self._parse_primary() -2811 if literal: -2812 return self.expression(exp.Introducer, this=token.text, expression=literal) +2803 function = functions.get(upper) +2804 args = self._parse_csv(self._parse_lambda) +2805 +2806 if function: +2807 # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the +2808 # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists. +2809 if count_params(function) == 2: +2810 params = None +2811 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): +2812 params = self._parse_csv(self._parse_lambda) 2813 -2814 return self.expression(exp.Identifier, this=token.text) -2815 -2816 def _parse_national(self, token: Token) -> exp.Expression: -2817 return self.expression(exp.National, this=exp.Literal.string(token.text)) -2818 -2819 def _parse_session_parameter(self) -> exp.Expression: -2820 kind = None -2821 this = self._parse_id_var() or self._parse_primary() -2822 -2823 if this and self._match(TokenType.DOT): -2824 kind = this.name -2825 this = self._parse_var() or self._parse_primary() -2826 -2827 return self.expression(exp.SessionParameter, this=this, kind=kind) -2828 -2829 def _parse_lambda(self) -> t.Optional[exp.Expression]: -2830 index = self._index -2831 -2832 if self._match(TokenType.L_PAREN): -2833 expressions = self._parse_csv(self._parse_id_var) -2834 -2835 if not self._match(TokenType.R_PAREN): -2836 self._retreat(index) -2837 else: -2838 expressions = [self._parse_id_var()] -2839 -2840 if self._match_set(self.LAMBDAS): -2841 return self.LAMBDAS[self._prev.token_type](self, expressions) -2842 -2843 self._retreat(index) +2814 this = function(args, params) +2815 else: +2816 this = function(args) +2817 +2818 self.validate_expression(this, args) +2819 else: +2820 this = self.expression(exp.Anonymous, this=this, expressions=args) +2821 +2822 self._match_r_paren(this) +2823 return self._parse_window(this) +2824 +2825 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: +2826 return self._parse_column_def(self._parse_id_var()) +2827 +2828 def _parse_user_defined_function( +2829 self, kind: t.Optional[TokenType] = None +2830 ) -> t.Optional[exp.Expression]: +2831 this = self._parse_id_var() +2832 +2833 while self._match(TokenType.DOT): +2834 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) +2835 +2836 if not self._match(TokenType.L_PAREN): +2837 return this +2838 +2839 expressions = self._parse_csv(self._parse_function_parameter) +2840 self._match_r_paren() +2841 return self.expression( +2842 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True +2843 ) 2844 -2845 this: t.Optional[exp.Expression] -2846 -2847 if self._match(TokenType.DISTINCT): -2848 this = self.expression( -2849 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) -2850 ) -2851 else: -2852 this = self._parse_select_or_expression() -2853 -2854 if self._match(TokenType.IGNORE_NULLS): -2855 this = self.expression(exp.IgnoreNulls, this=this) -2856 else: -2857 self._match(TokenType.RESPECT_NULLS) +2845 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: +2846 literal = self._parse_primary() +2847 if literal: +2848 return self.expression(exp.Introducer, this=token.text, expression=literal) +2849 +2850 return self.expression(exp.Identifier, this=token.text) +2851 +2852 def _parse_national(self, token: Token) -> exp.Expression: +2853 return self.expression(exp.National, this=exp.Literal.string(token.text)) +2854 +2855 def _parse_session_parameter(self) -> exp.Expression: +2856 kind = None +2857 this = self._parse_id_var() or self._parse_primary() 2858 -2859 return self._parse_limit(self._parse_order(this)) -2860 -2861 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: -2862 index = self._index -2863 if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT): -2864 self._retreat(index) -2865 return this -2866 -2867 args = self._parse_csv( -2868 lambda: self._parse_constraint() -2869 or self._parse_column_def(self._parse_field(any_token=True)) -2870 ) -2871 self._match_r_paren() -2872 return self.expression(exp.Schema, this=this, expressions=args) -2873 -2874 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: -2875 kind = self._parse_types() -2876 -2877 if self._match_text_seq("FOR", "ORDINALITY"): -2878 return self.expression(exp.ColumnDef, this=this, ordinality=True) -2879 -2880 constraints = [] -2881 while True: -2882 constraint = self._parse_column_constraint() -2883 if not constraint: -2884 break -2885 constraints.append(constraint) -2886 -2887 if not kind and not constraints: -2888 return this +2859 if this and self._match(TokenType.DOT): +2860 kind = this.name +2861 this = self._parse_var() or self._parse_primary() +2862 +2863 return self.expression(exp.SessionParameter, this=this, kind=kind) +2864 +2865 def _parse_lambda(self) -> t.Optional[exp.Expression]: +2866 index = self._index +2867 +2868 if self._match(TokenType.L_PAREN): +2869 expressions = self._parse_csv(self._parse_id_var) +2870 +2871 if not self._match(TokenType.R_PAREN): +2872 self._retreat(index) +2873 else: +2874 expressions = [self._parse_id_var()] +2875 +2876 if self._match_set(self.LAMBDAS): +2877 return self.LAMBDAS[self._prev.token_type](self, expressions) +2878 +2879 self._retreat(index) +2880 +2881 this: t.Optional[exp.Expression] +2882 +2883 if self._match(TokenType.DISTINCT): +2884 this = self.expression( +2885 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) +2886 ) +2887 else: +2888 this = self._parse_select_or_expression() 2889 -2890 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) -2891 -2892 def _parse_auto_increment(self) -> exp.Expression: -2893 start = None -2894 increment = None -2895 -2896 if self._match(TokenType.L_PAREN, advance=False): -2897 args = self._parse_wrapped_csv(self._parse_bitwise) -2898 start = seq_get(args, 0) -2899 increment = seq_get(args, 1) -2900 elif self._match_text_seq("START"): -2901 start = self._parse_bitwise() -2902 self._match_text_seq("INCREMENT") -2903 increment = self._parse_bitwise() -2904 -2905 if start and increment: -2906 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) -2907 -2908 return exp.AutoIncrementColumnConstraint() +2890 if self._match(TokenType.IGNORE_NULLS): +2891 this = self.expression(exp.IgnoreNulls, this=this) +2892 else: +2893 self._match(TokenType.RESPECT_NULLS) +2894 +2895 return self._parse_limit(self._parse_order(this)) +2896 +2897 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: +2898 index = self._index +2899 if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT): +2900 self._retreat(index) +2901 return this +2902 +2903 args = self._parse_csv( +2904 lambda: self._parse_constraint() +2905 or self._parse_column_def(self._parse_field(any_token=True)) +2906 ) +2907 self._match_r_paren() +2908 return self.expression(exp.Schema, this=this, expressions=args) 2909 -2910 def _parse_compress(self) -> exp.Expression: -2911 if self._match(TokenType.L_PAREN, advance=False): -2912 return self.expression( -2913 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) -2914 ) +2910 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: +2911 kind = self._parse_types() +2912 +2913 if self._match_text_seq("FOR", "ORDINALITY"): +2914 return self.expression(exp.ColumnDef, this=this, ordinality=True) 2915 -2916 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) -2917 -2918 def _parse_generated_as_identity(self) -> exp.Expression: -2919 if self._match(TokenType.BY_DEFAULT): -2920 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False) -2921 else: -2922 self._match_text_seq("ALWAYS") -2923 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) -2924 -2925 self._match_text_seq("AS", "IDENTITY") -2926 if self._match(TokenType.L_PAREN): -2927 if self._match_text_seq("START", "WITH"): -2928 this.set("start", self._parse_bitwise()) -2929 if self._match_text_seq("INCREMENT", "BY"): -2930 this.set("increment", self._parse_bitwise()) -2931 if self._match_text_seq("MINVALUE"): -2932 this.set("minvalue", self._parse_bitwise()) -2933 if self._match_text_seq("MAXVALUE"): -2934 this.set("maxvalue", self._parse_bitwise()) -2935 -2936 if self._match_text_seq("CYCLE"): -2937 this.set("cycle", True) -2938 elif self._match_text_seq("NO", "CYCLE"): -2939 this.set("cycle", False) +2916 constraints = [] +2917 while True: +2918 constraint = self._parse_column_constraint() +2919 if not constraint: +2920 break +2921 constraints.append(constraint) +2922 +2923 if not kind and not constraints: +2924 return this +2925 +2926 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) +2927 +2928 def _parse_auto_increment(self) -> exp.Expression: +2929 start = None +2930 increment = None +2931 +2932 if self._match(TokenType.L_PAREN, advance=False): +2933 args = self._parse_wrapped_csv(self._parse_bitwise) +2934 start = seq_get(args, 0) +2935 increment = seq_get(args, 1) +2936 elif self._match_text_seq("START"): +2937 start = self._parse_bitwise() +2938 self._match_text_seq("INCREMENT") +2939 increment = self._parse_bitwise() 2940 -2941 self._match_r_paren() -2942 -2943 return this -2944 -2945 def _parse_inline(self) -> t.Optional[exp.Expression]: -2946 self._match_text_seq("LENGTH") -2947 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) -2948 -2949 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: -2950 if self._match_text_seq("NULL"): -2951 return self.expression(exp.NotNullColumnConstraint) -2952 if self._match_text_seq("CASESPECIFIC"): -2953 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) -2954 return None -2955 -2956 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: -2957 this = self._parse_references() -2958 if this: -2959 return this +2941 if start and increment: +2942 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) +2943 +2944 return exp.AutoIncrementColumnConstraint() +2945 +2946 def _parse_compress(self) -> exp.Expression: +2947 if self._match(TokenType.L_PAREN, advance=False): +2948 return self.expression( +2949 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) +2950 ) +2951 +2952 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) +2953 +2954 def _parse_generated_as_identity(self) -> exp.Expression: +2955 if self._match(TokenType.BY_DEFAULT): +2956 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False) +2957 else: +2958 self._match_text_seq("ALWAYS") +2959 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 2960 -2961 if self._match(TokenType.CONSTRAINT): -2962 this = self._parse_id_var() -2963 -2964 if self._match_texts(self.CONSTRAINT_PARSERS): -2965 return self.expression( -2966 exp.ColumnConstraint, -2967 this=this, -2968 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), -2969 ) -2970 -2971 return this -2972 -2973 def _parse_constraint(self) -> t.Optional[exp.Expression]: -2974 if not self._match(TokenType.CONSTRAINT): -2975 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) +2961 self._match_text_seq("AS", "IDENTITY") +2962 if self._match(TokenType.L_PAREN): +2963 if self._match_text_seq("START", "WITH"): +2964 this.set("start", self._parse_bitwise()) +2965 if self._match_text_seq("INCREMENT", "BY"): +2966 this.set("increment", self._parse_bitwise()) +2967 if self._match_text_seq("MINVALUE"): +2968 this.set("minvalue", self._parse_bitwise()) +2969 if self._match_text_seq("MAXVALUE"): +2970 this.set("maxvalue", self._parse_bitwise()) +2971 +2972 if self._match_text_seq("CYCLE"): +2973 this.set("cycle", True) +2974 elif self._match_text_seq("NO", "CYCLE"): +2975 this.set("cycle", False) 2976 -2977 this = self._parse_id_var() -2978 expressions = [] -2979 -2980 while True: -2981 constraint = self._parse_unnamed_constraint() or self._parse_function() -2982 if not constraint: -2983 break -2984 expressions.append(constraint) -2985 -2986 return self.expression(exp.Constraint, this=this, expressions=expressions) -2987 -2988 def _parse_unnamed_constraint( -2989 self, constraints: t.Optional[t.Collection[str]] = None -2990 ) -> t.Optional[exp.Expression]: -2991 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): -2992 return None -2993 -2994 constraint = self._prev.text.upper() -2995 if constraint not in self.CONSTRAINT_PARSERS: -2996 self.raise_error(f"No parser found for schema constraint {constraint}.") -2997 -2998 return self.CONSTRAINT_PARSERS[constraint](self) +2977 self._match_r_paren() +2978 +2979 return this +2980 +2981 def _parse_inline(self) -> t.Optional[exp.Expression]: +2982 self._match_text_seq("LENGTH") +2983 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) +2984 +2985 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: +2986 if self._match_text_seq("NULL"): +2987 return self.expression(exp.NotNullColumnConstraint) +2988 if self._match_text_seq("CASESPECIFIC"): +2989 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) +2990 return None +2991 +2992 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: +2993 this = self._parse_references() +2994 if this: +2995 return this +2996 +2997 if self._match(TokenType.CONSTRAINT): +2998 this = self._parse_id_var() 2999 -3000 def _parse_unique(self) -> exp.Expression: -3001 if not self._match(TokenType.L_PAREN, advance=False): -3002 return self.expression(exp.UniqueColumnConstraint) -3003 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) -3004 -3005 def _parse_key_constraint_options(self) -> t.List[str]: -3006 options = [] -3007 while True: -3008 if not self._curr: -3009 break -3010 -3011 if self._match(TokenType.ON): -3012 action = None -3013 on = self._advance_any() and self._prev.text -3014 -3015 if self._match(TokenType.NO_ACTION): -3016 action = "NO ACTION" -3017 elif self._match(TokenType.CASCADE): -3018 action = "CASCADE" -3019 elif self._match_pair(TokenType.SET, TokenType.NULL): -3020 action = "SET NULL" -3021 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): -3022 action = "SET DEFAULT" -3023 else: -3024 self.raise_error("Invalid key constraint") -3025 -3026 options.append(f"ON {on} {action}") -3027 elif self._match_text_seq("NOT", "ENFORCED"): -3028 options.append("NOT ENFORCED") -3029 elif self._match_text_seq("DEFERRABLE"): -3030 options.append("DEFERRABLE") -3031 elif self._match_text_seq("INITIALLY", "DEFERRED"): -3032 options.append("INITIALLY DEFERRED") -3033 elif self._match_text_seq("NORELY"): -3034 options.append("NORELY") -3035 elif self._match_text_seq("MATCH", "FULL"): -3036 options.append("MATCH FULL") -3037 else: -3038 break -3039 -3040 return options -3041 -3042 def _parse_references(self) -> t.Optional[exp.Expression]: -3043 if not self._match(TokenType.REFERENCES): -3044 return None -3045 -3046 expressions = None -3047 this = self._parse_id_var() -3048 -3049 if self._match(TokenType.L_PAREN, advance=False): -3050 expressions = self._parse_wrapped_id_vars() -3051 -3052 options = self._parse_key_constraint_options() -3053 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) -3054 -3055 def _parse_foreign_key(self) -> exp.Expression: -3056 expressions = self._parse_wrapped_id_vars() -3057 reference = self._parse_references() -3058 options = {} -3059 -3060 while self._match(TokenType.ON): -3061 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): -3062 self.raise_error("Expected DELETE or UPDATE") -3063 -3064 kind = self._prev.text.lower() -3065 -3066 if self._match(TokenType.NO_ACTION): -3067 action = "NO ACTION" -3068 elif self._match(TokenType.SET): -3069 self._match_set((TokenType.NULL, TokenType.DEFAULT)) -3070 action = "SET " + self._prev.text.upper() -3071 else: -3072 self._advance() -3073 action = self._prev.text.upper() -3074 -3075 options[kind] = action -3076 -3077 return self.expression( -3078 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore -3079 ) -3080 -3081 def _parse_primary_key(self) -> exp.Expression: -3082 desc = ( -3083 self._match_set((TokenType.ASC, TokenType.DESC)) -3084 and self._prev.token_type == TokenType.DESC -3085 ) -3086 -3087 if not self._match(TokenType.L_PAREN, advance=False): -3088 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) -3089 -3090 expressions = self._parse_wrapped_id_vars() -3091 options = self._parse_key_constraint_options() -3092 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) -3093 -3094 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: -3095 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): -3096 return this -3097 -3098 bracket_kind = self._prev.token_type -3099 expressions: t.List[t.Optional[exp.Expression]] -3100 -3101 if self._match(TokenType.COLON): -3102 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] -3103 else: -3104 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) -3105 -3106 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs -3107 if bracket_kind == TokenType.L_BRACE: -3108 this = self.expression(exp.Struct, expressions=expressions) -3109 elif not this or this.name.upper() == "ARRAY": -3110 this = self.expression(exp.Array, expressions=expressions) -3111 else: -3112 expressions = apply_index_offset(expressions, -self.index_offset) -3113 this = self.expression(exp.Bracket, this=this, expressions=expressions) -3114 -3115 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: -3116 self.raise_error("Expected ]") -3117 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: -3118 self.raise_error("Expected }") -3119 -3120 this.comments = self._prev_comments -3121 return self._parse_bracket(this) +3000 if self._match_texts(self.CONSTRAINT_PARSERS): +3001 return self.expression( +3002 exp.ColumnConstraint, +3003 this=this, +3004 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), +3005 ) +3006 +3007 return this +3008 +3009 def _parse_constraint(self) -> t.Optional[exp.Expression]: +3010 if not self._match(TokenType.CONSTRAINT): +3011 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) +3012 +3013 this = self._parse_id_var() +3014 expressions = [] +3015 +3016 while True: +3017 constraint = self._parse_unnamed_constraint() or self._parse_function() +3018 if not constraint: +3019 break +3020 expressions.append(constraint) +3021 +3022 return self.expression(exp.Constraint, this=this, expressions=expressions) +3023 +3024 def _parse_unnamed_constraint( +3025 self, constraints: t.Optional[t.Collection[str]] = None +3026 ) -> t.Optional[exp.Expression]: +3027 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): +3028 return None +3029 +3030 constraint = self._prev.text.upper() +3031 if constraint not in self.CONSTRAINT_PARSERS: +3032 self.raise_error(f"No parser found for schema constraint {constraint}.") +3033 +3034 return self.CONSTRAINT_PARSERS[constraint](self) +3035 +3036 def _parse_unique(self) -> exp.Expression: +3037 if not self._match(TokenType.L_PAREN, advance=False): +3038 return self.expression(exp.UniqueColumnConstraint) +3039 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) +3040 +3041 def _parse_key_constraint_options(self) -> t.List[str]: +3042 options = [] +3043 while True: +3044 if not self._curr: +3045 break +3046 +3047 if self._match(TokenType.ON): +3048 action = None +3049 on = self._advance_any() and self._prev.text +3050 +3051 if self._match(TokenType.NO_ACTION): +3052 action = "NO ACTION" +3053 elif self._match(TokenType.CASCADE): +3054 action = "CASCADE" +3055 elif self._match_pair(TokenType.SET, TokenType.NULL): +3056 action = "SET NULL" +3057 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): +3058 action = "SET DEFAULT" +3059 else: +3060 self.raise_error("Invalid key constraint") +3061 +3062 options.append(f"ON {on} {action}") +3063 elif self._match_text_seq("NOT", "ENFORCED"): +3064 options.append("NOT ENFORCED") +3065 elif self._match_text_seq("DEFERRABLE"): +3066 options.append("DEFERRABLE") +3067 elif self._match_text_seq("INITIALLY", "DEFERRED"): +3068 options.append("INITIALLY DEFERRED") +3069 elif self._match_text_seq("NORELY"): +3070 options.append("NORELY") +3071 elif self._match_text_seq("MATCH", "FULL"): +3072 options.append("MATCH FULL") +3073 else: +3074 break +3075 +3076 return options +3077 +3078 def _parse_references(self) -> t.Optional[exp.Expression]: +3079 if not self._match(TokenType.REFERENCES): +3080 return None +3081 +3082 expressions = None +3083 this = self._parse_id_var() +3084 +3085 if self._match(TokenType.L_PAREN, advance=False): +3086 expressions = self._parse_wrapped_id_vars() +3087 +3088 options = self._parse_key_constraint_options() +3089 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) +3090 +3091 def _parse_foreign_key(self) -> exp.Expression: +3092 expressions = self._parse_wrapped_id_vars() +3093 reference = self._parse_references() +3094 options = {} +3095 +3096 while self._match(TokenType.ON): +3097 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): +3098 self.raise_error("Expected DELETE or UPDATE") +3099 +3100 kind = self._prev.text.lower() +3101 +3102 if self._match(TokenType.NO_ACTION): +3103 action = "NO ACTION" +3104 elif self._match(TokenType.SET): +3105 self._match_set((TokenType.NULL, TokenType.DEFAULT)) +3106 action = "SET " + self._prev.text.upper() +3107 else: +3108 self._advance() +3109 action = self._prev.text.upper() +3110 +3111 options[kind] = action +3112 +3113 return self.expression( +3114 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore +3115 ) +3116 +3117 def _parse_primary_key(self) -> exp.Expression: +3118 desc = ( +3119 self._match_set((TokenType.ASC, TokenType.DESC)) +3120 and self._prev.token_type == TokenType.DESC +3121 ) 3122 -3123 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: -3124 if self._match(TokenType.COLON): -3125 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) -3126 return this -3127 -3128 def _parse_case(self) -> t.Optional[exp.Expression]: -3129 ifs = [] -3130 default = None -3131 -3132 expression = self._parse_conjunction() +3123 if not self._match(TokenType.L_PAREN, advance=False): +3124 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) +3125 +3126 expressions = self._parse_wrapped_id_vars() +3127 options = self._parse_key_constraint_options() +3128 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) +3129 +3130 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: +3131 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): +3132 return this 3133 -3134 while self._match(TokenType.WHEN): -3135 this = self._parse_conjunction() -3136 self._match(TokenType.THEN) -3137 then = self._parse_conjunction() -3138 ifs.append(self.expression(exp.If, this=this, true=then)) -3139 -3140 if self._match(TokenType.ELSE): -3141 default = self._parse_conjunction() -3142 -3143 if not self._match(TokenType.END): -3144 self.raise_error("Expected END after CASE", self._prev) -3145 -3146 return self._parse_window( -3147 self.expression(exp.Case, this=expression, ifs=ifs, default=default) -3148 ) -3149 -3150 def _parse_if(self) -> t.Optional[exp.Expression]: -3151 if self._match(TokenType.L_PAREN): -3152 args = self._parse_csv(self._parse_conjunction) -3153 this = exp.If.from_arg_list(args) -3154 self.validate_expression(this, args) -3155 self._match_r_paren() -3156 else: -3157 condition = self._parse_conjunction() -3158 self._match(TokenType.THEN) -3159 true = self._parse_conjunction() -3160 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None -3161 self._match(TokenType.END) -3162 this = self.expression(exp.If, this=condition, true=true, false=false) +3134 bracket_kind = self._prev.token_type +3135 expressions: t.List[t.Optional[exp.Expression]] +3136 +3137 if self._match(TokenType.COLON): +3138 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] +3139 else: +3140 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) +3141 +3142 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs +3143 if bracket_kind == TokenType.L_BRACE: +3144 this = self.expression(exp.Struct, expressions=expressions) +3145 elif not this or this.name.upper() == "ARRAY": +3146 this = self.expression(exp.Array, expressions=expressions) +3147 else: +3148 expressions = apply_index_offset(expressions, -self.index_offset) +3149 this = self.expression(exp.Bracket, this=this, expressions=expressions) +3150 +3151 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: +3152 self.raise_error("Expected ]") +3153 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: +3154 self.raise_error("Expected }") +3155 +3156 this.comments = self._prev_comments +3157 return self._parse_bracket(this) +3158 +3159 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: +3160 if self._match(TokenType.COLON): +3161 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) +3162 return this 3163 -3164 return self._parse_window(this) -3165 -3166 def _parse_extract(self) -> exp.Expression: -3167 this = self._parse_function() or self._parse_var() or self._parse_type() -3168 -3169 if self._match(TokenType.FROM): -3170 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) -3171 -3172 if not self._match(TokenType.COMMA): -3173 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) -3174 -3175 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) -3176 -3177 def _parse_cast(self, strict: bool) -> exp.Expression: -3178 this = self._parse_conjunction() -3179 -3180 if not self._match(TokenType.ALIAS): -3181 self.raise_error("Expected AS after CAST") -3182 -3183 to = self._parse_types() -3184 -3185 if not to: -3186 self.raise_error("Expected TYPE after CAST") -3187 elif to.this == exp.DataType.Type.CHAR: -3188 if self._match(TokenType.CHARACTER_SET): -3189 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) -3190 -3191 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) -3192 -3193 def _parse_string_agg(self) -> exp.Expression: -3194 expression: t.Optional[exp.Expression] -3195 -3196 if self._match(TokenType.DISTINCT): -3197 args = self._parse_csv(self._parse_conjunction) -3198 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) -3199 else: -3200 args = self._parse_csv(self._parse_conjunction) -3201 expression = seq_get(args, 0) -3202 -3203 index = self._index -3204 if not self._match(TokenType.R_PAREN): -3205 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) -3206 order = self._parse_order(this=expression) -3207 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) -3208 -3209 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). -3210 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that -3211 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. -3212 if not self._match(TokenType.WITHIN_GROUP): -3213 self._retreat(index) -3214 this = exp.GroupConcat.from_arg_list(args) -3215 self.validate_expression(this, args) -3216 return this -3217 -3218 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) -3219 order = self._parse_order(this=expression) -3220 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) -3221 -3222 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: -3223 to: t.Optional[exp.Expression] -3224 this = self._parse_column() -3225 -3226 if self._match(TokenType.USING): -3227 to = self.expression(exp.CharacterSet, this=self._parse_var()) -3228 elif self._match(TokenType.COMMA): -3229 to = self._parse_types() -3230 else: -3231 to = None -3232 -3233 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) -3234 -3235 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: -3236 args = self._parse_csv(self._parse_bitwise) -3237 -3238 if self._match(TokenType.IN): -3239 return self.expression( -3240 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) -3241 ) -3242 -3243 if haystack_first: -3244 haystack = seq_get(args, 0) -3245 needle = seq_get(args, 1) -3246 else: -3247 needle = seq_get(args, 0) -3248 haystack = seq_get(args, 1) -3249 -3250 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) -3251 -3252 self.validate_expression(this, args) +3164 def _parse_case(self) -> t.Optional[exp.Expression]: +3165 ifs = [] +3166 default = None +3167 +3168 expression = self._parse_conjunction() +3169 +3170 while self._match(TokenType.WHEN): +3171 this = self._parse_conjunction() +3172 self._match(TokenType.THEN) +3173 then = self._parse_conjunction() +3174 ifs.append(self.expression(exp.If, this=this, true=then)) +3175 +3176 if self._match(TokenType.ELSE): +3177 default = self._parse_conjunction() +3178 +3179 if not self._match(TokenType.END): +3180 self.raise_error("Expected END after CASE", self._prev) +3181 +3182 return self._parse_window( +3183 self.expression(exp.Case, this=expression, ifs=ifs, default=default) +3184 ) +3185 +3186 def _parse_if(self) -> t.Optional[exp.Expression]: +3187 if self._match(TokenType.L_PAREN): +3188 args = self._parse_csv(self._parse_conjunction) +3189 this = exp.If.from_arg_list(args) +3190 self.validate_expression(this, args) +3191 self._match_r_paren() +3192 else: +3193 condition = self._parse_conjunction() +3194 self._match(TokenType.THEN) +3195 true = self._parse_conjunction() +3196 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None +3197 self._match(TokenType.END) +3198 this = self.expression(exp.If, this=condition, true=true, false=false) +3199 +3200 return self._parse_window(this) +3201 +3202 def _parse_extract(self) -> exp.Expression: +3203 this = self._parse_function() or self._parse_var() or self._parse_type() +3204 +3205 if self._match(TokenType.FROM): +3206 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) +3207 +3208 if not self._match(TokenType.COMMA): +3209 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) +3210 +3211 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) +3212 +3213 def _parse_cast(self, strict: bool) -> exp.Expression: +3214 this = self._parse_conjunction() +3215 +3216 if not self._match(TokenType.ALIAS): +3217 self.raise_error("Expected AS after CAST") +3218 +3219 to = self._parse_types() +3220 +3221 if not to: +3222 self.raise_error("Expected TYPE after CAST") +3223 elif to.this == exp.DataType.Type.CHAR: +3224 if self._match(TokenType.CHARACTER_SET): +3225 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) +3226 +3227 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) +3228 +3229 def _parse_string_agg(self) -> exp.Expression: +3230 expression: t.Optional[exp.Expression] +3231 +3232 if self._match(TokenType.DISTINCT): +3233 args = self._parse_csv(self._parse_conjunction) +3234 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) +3235 else: +3236 args = self._parse_csv(self._parse_conjunction) +3237 expression = seq_get(args, 0) +3238 +3239 index = self._index +3240 if not self._match(TokenType.R_PAREN): +3241 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) +3242 order = self._parse_order(this=expression) +3243 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) +3244 +3245 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). +3246 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that +3247 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. +3248 if not self._match(TokenType.WITHIN_GROUP): +3249 self._retreat(index) +3250 this = exp.GroupConcat.from_arg_list(args) +3251 self.validate_expression(this, args) +3252 return this 3253 -3254 return this -3255 -3256 def _parse_join_hint(self, func_name: str) -> exp.Expression: -3257 args = self._parse_csv(self._parse_table) -3258 return exp.JoinHint(this=func_name.upper(), expressions=args) -3259 -3260 def _parse_substring(self) -> exp.Expression: -3261 # Postgres supports the form: substring(string [from int] [for int]) -3262 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 -3263 -3264 args = self._parse_csv(self._parse_bitwise) -3265 -3266 if self._match(TokenType.FROM): -3267 args.append(self._parse_bitwise()) -3268 if self._match(TokenType.FOR): -3269 args.append(self._parse_bitwise()) +3254 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) +3255 order = self._parse_order(this=expression) +3256 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) +3257 +3258 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: +3259 to: t.Optional[exp.Expression] +3260 this = self._parse_column() +3261 +3262 if self._match(TokenType.USING): +3263 to = self.expression(exp.CharacterSet, this=self._parse_var()) +3264 elif self._match(TokenType.COMMA): +3265 to = self._parse_types() +3266 else: +3267 to = None +3268 +3269 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3270 -3271 this = exp.Substring.from_arg_list(args) -3272 self.validate_expression(this, args) +3271 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: +3272 args = self._parse_csv(self._parse_bitwise) 3273 -3274 return this -3275 -3276 def _parse_trim(self) -> exp.Expression: -3277 # https://www.w3resource.com/sql/character-functions/trim.php -3278 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html -3279 -3280 position = None -3281 collation = None -3282 -3283 if self._match_set(self.TRIM_TYPES): -3284 position = self._prev.text.upper() +3274 if self._match(TokenType.IN): +3275 return self.expression( +3276 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) +3277 ) +3278 +3279 if haystack_first: +3280 haystack = seq_get(args, 0) +3281 needle = seq_get(args, 1) +3282 else: +3283 needle = seq_get(args, 0) +3284 haystack = seq_get(args, 1) 3285 -3286 expression = self._parse_term() -3287 if self._match_set((TokenType.FROM, TokenType.COMMA)): -3288 this = self._parse_term() -3289 else: -3290 this = expression -3291 expression = None -3292 -3293 if self._match(TokenType.COLLATE): -3294 collation = self._parse_term() +3286 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) +3287 +3288 self.validate_expression(this, args) +3289 +3290 return this +3291 +3292 def _parse_join_hint(self, func_name: str) -> exp.Expression: +3293 args = self._parse_csv(self._parse_table) +3294 return exp.JoinHint(this=func_name.upper(), expressions=args) 3295 -3296 return self.expression( -3297 exp.Trim, -3298 this=this, -3299 position=position, -3300 expression=expression, -3301 collation=collation, -3302 ) -3303 -3304 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: -3305 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) +3296 def _parse_substring(self) -> exp.Expression: +3297 # Postgres supports the form: substring(string [from int] [for int]) +3298 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 +3299 +3300 args = self._parse_csv(self._parse_bitwise) +3301 +3302 if self._match(TokenType.FROM): +3303 args.append(self._parse_bitwise()) +3304 if self._match(TokenType.FOR): +3305 args.append(self._parse_bitwise()) 3306 -3307 def _parse_named_window(self) -> t.Optional[exp.Expression]: -3308 return self._parse_window(self._parse_id_var(), alias=True) +3307 this = exp.Substring.from_arg_list(args) +3308 self.validate_expression(this, args) 3309 -3310 def _parse_window( -3311 self, this: t.Optional[exp.Expression], alias: bool = False -3312 ) -> t.Optional[exp.Expression]: -3313 if self._match(TokenType.FILTER): -3314 where = self._parse_wrapped(self._parse_where) -3315 this = self.expression(exp.Filter, this=this, expression=where) -3316 -3317 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. -3318 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 -3319 if self._match(TokenType.WITHIN_GROUP): -3320 order = self._parse_wrapped(self._parse_order) -3321 this = self.expression(exp.WithinGroup, this=this, expression=order) -3322 -3323 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER -3324 # Some dialects choose to implement and some do not. -3325 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html -3326 -3327 # There is some code above in _parse_lambda that handles -3328 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... -3329 -3330 # The below changes handle -3331 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... -3332 -3333 # Oracle allows both formats -3334 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) -3335 # and Snowflake chose to do the same for familiarity -3336 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes -3337 if self._match(TokenType.IGNORE_NULLS): -3338 this = self.expression(exp.IgnoreNulls, this=this) -3339 elif self._match(TokenType.RESPECT_NULLS): -3340 this = self.expression(exp.RespectNulls, this=this) -3341 -3342 # bigquery select from window x AS (partition by ...) -3343 if alias: -3344 self._match(TokenType.ALIAS) -3345 elif not self._match(TokenType.OVER): -3346 return this -3347 -3348 if not self._match(TokenType.L_PAREN): -3349 return self.expression(exp.Window, this=this, alias=self._parse_id_var(False)) -3350 -3351 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) -3352 partition = self._parse_partition_by() -3353 order = self._parse_order() -3354 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text -3355 -3356 if kind: -3357 self._match(TokenType.BETWEEN) -3358 start = self._parse_window_spec() -3359 self._match(TokenType.AND) -3360 end = self._parse_window_spec() -3361 -3362 spec = self.expression( -3363 exp.WindowSpec, -3364 kind=kind, -3365 start=start["value"], -3366 start_side=start["side"], -3367 end=end["value"], -3368 end_side=end["side"], -3369 ) -3370 else: -3371 spec = None -3372 -3373 self._match_r_paren() -3374 -3375 return self.expression( -3376 exp.Window, -3377 this=this, -3378 partition_by=partition, -3379 order=order, -3380 spec=spec, -3381 alias=window_alias, -3382 ) +3310 return this +3311 +3312 def _parse_trim(self) -> exp.Expression: +3313 # https://www.w3resource.com/sql/character-functions/trim.php +3314 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html +3315 +3316 position = None +3317 collation = None +3318 +3319 if self._match_set(self.TRIM_TYPES): +3320 position = self._prev.text.upper() +3321 +3322 expression = self._parse_term() +3323 if self._match_set((TokenType.FROM, TokenType.COMMA)): +3324 this = self._parse_term() +3325 else: +3326 this = expression +3327 expression = None +3328 +3329 if self._match(TokenType.COLLATE): +3330 collation = self._parse_term() +3331 +3332 return self.expression( +3333 exp.Trim, +3334 this=this, +3335 position=position, +3336 expression=expression, +3337 collation=collation, +3338 ) +3339 +3340 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: +3341 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) +3342 +3343 def _parse_named_window(self) -> t.Optional[exp.Expression]: +3344 return self._parse_window(self._parse_id_var(), alias=True) +3345 +3346 def _parse_window( +3347 self, this: t.Optional[exp.Expression], alias: bool = False +3348 ) -> t.Optional[exp.Expression]: +3349 if self._match(TokenType.FILTER): +3350 where = self._parse_wrapped(self._parse_where) +3351 this = self.expression(exp.Filter, this=this, expression=where) +3352 +3353 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. +3354 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 +3355 if self._match(TokenType.WITHIN_GROUP): +3356 order = self._parse_wrapped(self._parse_order) +3357 this = self.expression(exp.WithinGroup, this=this, expression=order) +3358 +3359 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER +3360 # Some dialects choose to implement and some do not. +3361 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html +3362 +3363 # There is some code above in _parse_lambda that handles +3364 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... +3365 +3366 # The below changes handle +3367 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... +3368 +3369 # Oracle allows both formats +3370 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) +3371 # and Snowflake chose to do the same for familiarity +3372 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes +3373 if self._match(TokenType.IGNORE_NULLS): +3374 this = self.expression(exp.IgnoreNulls, this=this) +3375 elif self._match(TokenType.RESPECT_NULLS): +3376 this = self.expression(exp.RespectNulls, this=this) +3377 +3378 # bigquery select from window x AS (partition by ...) +3379 if alias: +3380 self._match(TokenType.ALIAS) +3381 elif not self._match(TokenType.OVER): +3382 return this 3383 -3384 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: -3385 self._match(TokenType.BETWEEN) +3384 if not self._match(TokenType.L_PAREN): +3385 return self.expression(exp.Window, this=this, alias=self._parse_id_var(False)) 3386 -3387 return { -3388 "value": ( -3389 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text -3390 ) -3391 or self._parse_bitwise(), -3392 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, -3393 } -3394 -3395 def _parse_alias( -3396 self, this: t.Optional[exp.Expression], explicit: bool = False -3397 ) -> t.Optional[exp.Expression]: -3398 any_token = self._match(TokenType.ALIAS) -3399 -3400 if explicit and not any_token: -3401 return this -3402 -3403 if self._match(TokenType.L_PAREN): -3404 aliases = self.expression( -3405 exp.Aliases, -3406 this=this, -3407 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), -3408 ) -3409 self._match_r_paren(aliases) -3410 return aliases -3411 -3412 alias = self._parse_id_var(any_token) -3413 -3414 if alias: -3415 return self.expression(exp.Alias, this=this, alias=alias) -3416 -3417 return this -3418 -3419 def _parse_id_var( -3420 self, -3421 any_token: bool = True, -3422 tokens: t.Optional[t.Collection[TokenType]] = None, -3423 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, -3424 ) -> t.Optional[exp.Expression]: -3425 identifier = self._parse_identifier() -3426 -3427 if identifier: -3428 return identifier -3429 -3430 prefix = "" -3431 -3432 if prefix_tokens: -3433 while self._match_set(prefix_tokens): -3434 prefix += self._prev.text +3387 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) +3388 partition = self._parse_partition_by() +3389 order = self._parse_order() +3390 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text +3391 +3392 if kind: +3393 self._match(TokenType.BETWEEN) +3394 start = self._parse_window_spec() +3395 self._match(TokenType.AND) +3396 end = self._parse_window_spec() +3397 +3398 spec = self.expression( +3399 exp.WindowSpec, +3400 kind=kind, +3401 start=start["value"], +3402 start_side=start["side"], +3403 end=end["value"], +3404 end_side=end["side"], +3405 ) +3406 else: +3407 spec = None +3408 +3409 self._match_r_paren() +3410 +3411 return self.expression( +3412 exp.Window, +3413 this=this, +3414 partition_by=partition, +3415 order=order, +3416 spec=spec, +3417 alias=window_alias, +3418 ) +3419 +3420 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: +3421 self._match(TokenType.BETWEEN) +3422 +3423 return { +3424 "value": ( +3425 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text +3426 ) +3427 or self._parse_bitwise(), +3428 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, +3429 } +3430 +3431 def _parse_alias( +3432 self, this: t.Optional[exp.Expression], explicit: bool = False +3433 ) -> t.Optional[exp.Expression]: +3434 any_token = self._match(TokenType.ALIAS) 3435 -3436 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): -3437 quoted = self._prev.token_type == TokenType.STRING -3438 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) -3439 -3440 return None -3441 -3442 def _parse_string(self) -> t.Optional[exp.Expression]: -3443 if self._match(TokenType.STRING): -3444 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) -3445 return self._parse_placeholder() -3446 -3447 def _parse_number(self) -> t.Optional[exp.Expression]: -3448 if self._match(TokenType.NUMBER): -3449 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) -3450 return self._parse_placeholder() -3451 -3452 def _parse_identifier(self) -> t.Optional[exp.Expression]: -3453 if self._match(TokenType.IDENTIFIER): -3454 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) -3455 return self._parse_placeholder() -3456 -3457 def _parse_var(self, any_token: bool = False) -> t.Optional[exp.Expression]: -3458 if (any_token and self._advance_any()) or self._match(TokenType.VAR): -3459 return self.expression(exp.Var, this=self._prev.text) -3460 return self._parse_placeholder() -3461 -3462 def _advance_any(self) -> t.Optional[Token]: -3463 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: -3464 self._advance() -3465 return self._prev -3466 return None +3436 if explicit and not any_token: +3437 return this +3438 +3439 if self._match(TokenType.L_PAREN): +3440 aliases = self.expression( +3441 exp.Aliases, +3442 this=this, +3443 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), +3444 ) +3445 self._match_r_paren(aliases) +3446 return aliases +3447 +3448 alias = self._parse_id_var(any_token) +3449 +3450 if alias: +3451 return self.expression(exp.Alias, this=this, alias=alias) +3452 +3453 return this +3454 +3455 def _parse_id_var( +3456 self, +3457 any_token: bool = True, +3458 tokens: t.Optional[t.Collection[TokenType]] = None, +3459 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, +3460 ) -> t.Optional[exp.Expression]: +3461 identifier = self._parse_identifier() +3462 +3463 if identifier: +3464 return identifier +3465 +3466 prefix = "" 3467 -3468 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: -3469 return self._parse_var() or self._parse_string() -3470 -3471 def _parse_null(self) -> t.Optional[exp.Expression]: -3472 if self._match(TokenType.NULL): -3473 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) -3474 return None +3468 if prefix_tokens: +3469 while self._match_set(prefix_tokens): +3470 prefix += self._prev.text +3471 +3472 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): +3473 quoted = self._prev.token_type == TokenType.STRING +3474 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3475 -3476 def _parse_boolean(self) -> t.Optional[exp.Expression]: -3477 if self._match(TokenType.TRUE): -3478 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) -3479 if self._match(TokenType.FALSE): -3480 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) -3481 return None +3476 return None +3477 +3478 def _parse_string(self) -> t.Optional[exp.Expression]: +3479 if self._match(TokenType.STRING): +3480 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) +3481 return self._parse_placeholder() 3482 -3483 def _parse_star(self) -> t.Optional[exp.Expression]: -3484 if self._match(TokenType.STAR): -3485 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) -3486 return None +3483 def _parse_number(self) -> t.Optional[exp.Expression]: +3484 if self._match(TokenType.NUMBER): +3485 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) +3486 return self._parse_placeholder() 3487 -3488 def _parse_parameter(self) -> exp.Expression: -3489 wrapped = self._match(TokenType.L_BRACE) -3490 this = self._parse_var() or self._parse_primary() -3491 self._match(TokenType.R_BRACE) -3492 return self.expression(exp.Parameter, this=this, wrapped=wrapped) -3493 -3494 def _parse_placeholder(self) -> t.Optional[exp.Expression]: -3495 if self._match_set(self.PLACEHOLDER_PARSERS): -3496 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) -3497 if placeholder: -3498 return placeholder -3499 self._advance(-1) -3500 return None -3501 -3502 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: -3503 if not self._match(TokenType.EXCEPT): -3504 return None -3505 if self._match(TokenType.L_PAREN, advance=False): -3506 return self._parse_wrapped_csv(self._parse_column) -3507 return self._parse_csv(self._parse_column) -3508 -3509 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: -3510 if not self._match(TokenType.REPLACE): -3511 return None -3512 if self._match(TokenType.L_PAREN, advance=False): -3513 return self._parse_wrapped_csv(self._parse_expression) -3514 return self._parse_csv(self._parse_expression) -3515 -3516 def _parse_csv( -3517 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA -3518 ) -> t.List[t.Optional[exp.Expression]]: -3519 parse_result = parse_method() -3520 items = [parse_result] if parse_result is not None else [] -3521 -3522 while self._match(sep): -3523 if parse_result and self._prev_comments: -3524 parse_result.comments = self._prev_comments -3525 -3526 parse_result = parse_method() -3527 if parse_result is not None: -3528 items.append(parse_result) +3488 def _parse_identifier(self) -> t.Optional[exp.Expression]: +3489 if self._match(TokenType.IDENTIFIER): +3490 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) +3491 return self._parse_placeholder() +3492 +3493 def _parse_var(self, any_token: bool = False) -> t.Optional[exp.Expression]: +3494 if (any_token and self._advance_any()) or self._match(TokenType.VAR): +3495 return self.expression(exp.Var, this=self._prev.text) +3496 return self._parse_placeholder() +3497 +3498 def _advance_any(self) -> t.Optional[Token]: +3499 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: +3500 self._advance() +3501 return self._prev +3502 return None +3503 +3504 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: +3505 return self._parse_var() or self._parse_string() +3506 +3507 def _parse_null(self) -> t.Optional[exp.Expression]: +3508 if self._match(TokenType.NULL): +3509 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) +3510 return None +3511 +3512 def _parse_boolean(self) -> t.Optional[exp.Expression]: +3513 if self._match(TokenType.TRUE): +3514 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) +3515 if self._match(TokenType.FALSE): +3516 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) +3517 return None +3518 +3519 def _parse_star(self) -> t.Optional[exp.Expression]: +3520 if self._match(TokenType.STAR): +3521 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) +3522 return None +3523 +3524 def _parse_parameter(self) -> exp.Expression: +3525 wrapped = self._match(TokenType.L_BRACE) +3526 this = self._parse_var() or self._parse_primary() +3527 self._match(TokenType.R_BRACE) +3528 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 3529 -3530 return items -3531 -3532 def _parse_tokens( -3533 self, parse_method: t.Callable, expressions: t.Dict -3534 ) -> t.Optional[exp.Expression]: -3535 this = parse_method() -3536 -3537 while self._match_set(expressions): -3538 this = self.expression( -3539 expressions[self._prev.token_type], -3540 this=this, -3541 comments=self._prev_comments, -3542 expression=parse_method(), -3543 ) +3530 def _parse_placeholder(self) -> t.Optional[exp.Expression]: +3531 if self._match_set(self.PLACEHOLDER_PARSERS): +3532 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) +3533 if placeholder: +3534 return placeholder +3535 self._advance(-1) +3536 return None +3537 +3538 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: +3539 if not self._match(TokenType.EXCEPT): +3540 return None +3541 if self._match(TokenType.L_PAREN, advance=False): +3542 return self._parse_wrapped_csv(self._parse_column) +3543 return self._parse_csv(self._parse_column) 3544 -3545 return this -3546 -3547 def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]: -3548 return self._parse_wrapped_csv(self._parse_id_var) -3549 -3550 def _parse_wrapped_csv( -3551 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA -3552 ) -> t.List[t.Optional[exp.Expression]]: -3553 return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep)) -3554 -3555 def _parse_wrapped(self, parse_method: t.Callable) -> t.Any: -3556 self._match_l_paren() -3557 parse_result = parse_method() -3558 self._match_r_paren() -3559 return parse_result -3560 -3561 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: -3562 return self._parse_select() or self._parse_expression() -3563 -3564 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: -3565 return self._parse_set_operations( -3566 self._parse_select(nested=True, parse_subquery_alias=False) -3567 ) -3568 -3569 def _parse_transaction(self) -> exp.Expression: -3570 this = None -3571 if self._match_texts(self.TRANSACTION_KIND): -3572 this = self._prev.text -3573 -3574 self._match_texts({"TRANSACTION", "WORK"}) -3575 -3576 modes = [] -3577 while True: -3578 mode = [] -3579 while self._match(TokenType.VAR): -3580 mode.append(self._prev.text) -3581 -3582 if mode: -3583 modes.append(" ".join(mode)) -3584 if not self._match(TokenType.COMMA): -3585 break -3586 -3587 return self.expression(exp.Transaction, this=this, modes=modes) -3588 -3589 def _parse_commit_or_rollback(self) -> exp.Expression: -3590 chain = None -3591 savepoint = None -3592 is_rollback = self._prev.token_type == TokenType.ROLLBACK -3593 -3594 self._match_texts({"TRANSACTION", "WORK"}) -3595 -3596 if self._match_text_seq("TO"): -3597 self._match_text_seq("SAVEPOINT") -3598 savepoint = self._parse_id_var() +3545 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: +3546 if not self._match(TokenType.REPLACE): +3547 return None +3548 if self._match(TokenType.L_PAREN, advance=False): +3549 return self._parse_wrapped_csv(self._parse_expression) +3550 return self._parse_csv(self._parse_expression) +3551 +3552 def _parse_csv( +3553 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA +3554 ) -> t.List[t.Optional[exp.Expression]]: +3555 parse_result = parse_method() +3556 items = [parse_result] if parse_result is not None else [] +3557 +3558 while self._match(sep): +3559 if parse_result and self._prev_comments: +3560 parse_result.comments = self._prev_comments +3561 +3562 parse_result = parse_method() +3563 if parse_result is not None: +3564 items.append(parse_result) +3565 +3566 return items +3567 +3568 def _parse_tokens( +3569 self, parse_method: t.Callable, expressions: t.Dict +3570 ) -> t.Optional[exp.Expression]: +3571 this = parse_method() +3572 +3573 while self._match_set(expressions): +3574 this = self.expression( +3575 expressions[self._prev.token_type], +3576 this=this, +3577 comments=self._prev_comments, +3578 expression=parse_method(), +3579 ) +3580 +3581 return this +3582 +3583 def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]: +3584 return self._parse_wrapped_csv(self._parse_id_var) +3585 +3586 def _parse_wrapped_csv( +3587 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA +3588 ) -> t.List[t.Optional[exp.Expression]]: +3589 return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep)) +3590 +3591 def _parse_wrapped(self, parse_method: t.Callable) -> t.Any: +3592 self._match_l_paren() +3593 parse_result = parse_method() +3594 self._match_r_paren() +3595 return parse_result +3596 +3597 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: +3598 return self._parse_select() or self._parse_expression() 3599 -3600 if self._match(TokenType.AND): -3601 chain = not self._match_text_seq("NO") -3602 self._match_text_seq("CHAIN") -3603 -3604 if is_rollback: -3605 return self.expression(exp.Rollback, savepoint=savepoint) -3606 return self.expression(exp.Commit, chain=chain) -3607 -3608 def _parse_add_column(self) -> t.Optional[exp.Expression]: -3609 if not self._match_text_seq("ADD"): -3610 return None +3600 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: +3601 return self._parse_set_operations( +3602 self._parse_select(nested=True, parse_subquery_alias=False) +3603 ) +3604 +3605 def _parse_transaction(self) -> exp.Expression: +3606 this = None +3607 if self._match_texts(self.TRANSACTION_KIND): +3608 this = self._prev.text +3609 +3610 self._match_texts({"TRANSACTION", "WORK"}) 3611 -3612 self._match(TokenType.COLUMN) -3613 exists_column = self._parse_exists(not_=True) -3614 expression = self._parse_column_def(self._parse_field(any_token=True)) -3615 -3616 if expression: -3617 expression.set("exists", exists_column) -3618 -3619 return expression -3620 -3621 def _parse_drop_column(self) -> t.Optional[exp.Expression]: -3622 return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN") -3623 -3624 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html -3625 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: -3626 return self.expression( -3627 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists -3628 ) +3612 modes = [] +3613 while True: +3614 mode = [] +3615 while self._match(TokenType.VAR): +3616 mode.append(self._prev.text) +3617 +3618 if mode: +3619 modes.append(" ".join(mode)) +3620 if not self._match(TokenType.COMMA): +3621 break +3622 +3623 return self.expression(exp.Transaction, this=this, modes=modes) +3624 +3625 def _parse_commit_or_rollback(self) -> exp.Expression: +3626 chain = None +3627 savepoint = None +3628 is_rollback = self._prev.token_type == TokenType.ROLLBACK 3629 -3630 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: -3631 this = None -3632 kind = self._prev.token_type -3633 -3634 if kind == TokenType.CONSTRAINT: -3635 this = self._parse_id_var() -3636 -3637 if self._match_text_seq("CHECK"): -3638 expression = self._parse_wrapped(self._parse_conjunction) -3639 enforced = self._match_text_seq("ENFORCED") -3640 -3641 return self.expression( -3642 exp.AddConstraint, this=this, expression=expression, enforced=enforced -3643 ) -3644 -3645 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): -3646 expression = self._parse_foreign_key() -3647 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): -3648 expression = self._parse_primary_key() -3649 -3650 return self.expression(exp.AddConstraint, this=this, expression=expression) +3630 self._match_texts({"TRANSACTION", "WORK"}) +3631 +3632 if self._match_text_seq("TO"): +3633 self._match_text_seq("SAVEPOINT") +3634 savepoint = self._parse_id_var() +3635 +3636 if self._match(TokenType.AND): +3637 chain = not self._match_text_seq("NO") +3638 self._match_text_seq("CHAIN") +3639 +3640 if is_rollback: +3641 return self.expression(exp.Rollback, savepoint=savepoint) +3642 return self.expression(exp.Commit, chain=chain) +3643 +3644 def _parse_add_column(self) -> t.Optional[exp.Expression]: +3645 if not self._match_text_seq("ADD"): +3646 return None +3647 +3648 self._match(TokenType.COLUMN) +3649 exists_column = self._parse_exists(not_=True) +3650 expression = self._parse_column_def(self._parse_field(any_token=True)) 3651 -3652 def _parse_alter(self) -> t.Optional[exp.Expression]: -3653 if not self._match(TokenType.TABLE): -3654 return self._parse_as_command(self._prev) -3655 -3656 exists = self._parse_exists() -3657 this = self._parse_table(schema=True) -3658 -3659 actions: t.Optional[exp.Expression | t.List[t.Optional[exp.Expression]]] = None -3660 -3661 index = self._index -3662 if self._match(TokenType.DELETE): -3663 actions = [self.expression(exp.Delete, where=self._parse_where())] -3664 elif self._match_text_seq("ADD"): -3665 if self._match_set(self.ADD_CONSTRAINT_TOKENS): -3666 actions = self._parse_csv(self._parse_add_constraint) -3667 else: -3668 self._retreat(index) -3669 actions = self._parse_csv(self._parse_add_column) -3670 elif self._match_text_seq("DROP"): -3671 partition_exists = self._parse_exists() +3652 if expression: +3653 expression.set("exists", exists_column) +3654 +3655 return expression +3656 +3657 def _parse_drop_column(self) -> t.Optional[exp.Expression]: +3658 return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN") +3659 +3660 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html +3661 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: +3662 return self.expression( +3663 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists +3664 ) +3665 +3666 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: +3667 this = None +3668 kind = self._prev.token_type +3669 +3670 if kind == TokenType.CONSTRAINT: +3671 this = self._parse_id_var() 3672 -3673 if self._match(TokenType.PARTITION, advance=False): -3674 actions = self._parse_csv( -3675 lambda: self._parse_drop_partition(exists=partition_exists) -3676 ) -3677 else: -3678 self._retreat(index) -3679 actions = self._parse_csv(self._parse_drop_column) -3680 elif self._match_text_seq("RENAME", "TO"): -3681 actions = self.expression(exp.RenameTable, this=self._parse_table(schema=True)) -3682 elif self._match_text_seq("ALTER"): -3683 self._match(TokenType.COLUMN) -3684 column = self._parse_field(any_token=True) +3673 if self._match_text_seq("CHECK"): +3674 expression = self._parse_wrapped(self._parse_conjunction) +3675 enforced = self._match_text_seq("ENFORCED") +3676 +3677 return self.expression( +3678 exp.AddConstraint, this=this, expression=expression, enforced=enforced +3679 ) +3680 +3681 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): +3682 expression = self._parse_foreign_key() +3683 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): +3684 expression = self._parse_primary_key() 3685 -3686 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): -3687 actions = self.expression(exp.AlterColumn, this=column, drop=True) -3688 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): -3689 actions = self.expression( -3690 exp.AlterColumn, this=column, default=self._parse_conjunction() -3691 ) -3692 else: -3693 self._match_text_seq("SET", "DATA") -3694 actions = self.expression( -3695 exp.AlterColumn, -3696 this=column, -3697 dtype=self._match_text_seq("TYPE") and self._parse_types(), -3698 collate=self._match(TokenType.COLLATE) and self._parse_term(), -3699 using=self._match(TokenType.USING) and self._parse_conjunction(), -3700 ) -3701 -3702 actions = ensure_list(actions) -3703 return self.expression(exp.AlterTable, this=this, exists=exists, actions=actions) -3704 -3705 def _parse_show(self) -> t.Optional[exp.Expression]: -3706 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore -3707 if parser: -3708 return parser(self) -3709 self._advance() -3710 return self.expression(exp.Show, this=self._prev.text.upper()) -3711 -3712 def _default_parse_set_item(self) -> exp.Expression: -3713 return self.expression( -3714 exp.SetItem, -3715 this=self._parse_statement(), -3716 ) +3686 return self.expression(exp.AddConstraint, this=this, expression=expression) +3687 +3688 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: +3689 index = self._index - 1 +3690 +3691 if self._match_set(self.ADD_CONSTRAINT_TOKENS): +3692 return self._parse_csv(self._parse_add_constraint) +3693 +3694 self._retreat(index) +3695 return self._parse_csv(self._parse_add_column) +3696 +3697 def _parse_alter_table_alter(self) -> exp.Expression: +3698 self._match(TokenType.COLUMN) +3699 column = self._parse_field(any_token=True) +3700 +3701 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): +3702 return self.expression(exp.AlterColumn, this=column, drop=True) +3703 if self._match_pair(TokenType.SET, TokenType.DEFAULT): +3704 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) +3705 +3706 self._match_text_seq("SET", "DATA") +3707 return self.expression( +3708 exp.AlterColumn, +3709 this=column, +3710 dtype=self._match_text_seq("TYPE") and self._parse_types(), +3711 collate=self._match(TokenType.COLLATE) and self._parse_term(), +3712 using=self._match(TokenType.USING) and self._parse_conjunction(), +3713 ) +3714 +3715 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: +3716 index = self._index - 1 3717 -3718 def _parse_set_item(self) -> t.Optional[exp.Expression]: -3719 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore -3720 return parser(self) if parser else self._default_parse_set_item() +3718 partition_exists = self._parse_exists() +3719 if self._match(TokenType.PARTITION, advance=False): +3720 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 3721 -3722 def _parse_merge(self) -> exp.Expression: -3723 self._match(TokenType.INTO) -3724 target = self._parse_table() -3725 -3726 self._match(TokenType.USING) -3727 using = self._parse_table() +3722 self._retreat(index) +3723 return self._parse_csv(self._parse_drop_column) +3724 +3725 def _parse_alter_table_rename(self) -> exp.Expression: +3726 self._match_text_seq("TO") +3727 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 3728 -3729 self._match(TokenType.ON) -3730 on = self._parse_conjunction() -3731 -3732 whens = [] -3733 while self._match(TokenType.WHEN): -3734 this = self._parse_conjunction() -3735 self._match(TokenType.THEN) -3736 -3737 if self._match(TokenType.INSERT): -3738 _this = self._parse_star() -3739 if _this: -3740 then = self.expression(exp.Insert, this=_this) -3741 else: -3742 then = self.expression( -3743 exp.Insert, -3744 this=self._parse_value(), -3745 expression=self._match(TokenType.VALUES) and self._parse_value(), -3746 ) -3747 elif self._match(TokenType.UPDATE): -3748 expressions = self._parse_star() -3749 if expressions: -3750 then = self.expression(exp.Update, expressions=expressions) -3751 else: -3752 then = self.expression( -3753 exp.Update, -3754 expressions=self._match(TokenType.SET) -3755 and self._parse_csv(self._parse_equality), -3756 ) -3757 elif self._match(TokenType.DELETE): -3758 then = self.expression(exp.Var, this=self._prev.text) -3759 -3760 whens.append(self.expression(exp.When, this=this, then=then)) -3761 -3762 return self.expression( -3763 exp.Merge, -3764 this=target, -3765 using=using, -3766 on=on, -3767 expressions=whens, -3768 ) -3769 -3770 def _parse_set(self) -> exp.Expression: -3771 return self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) -3772 -3773 def _parse_as_command(self, start: Token) -> exp.Command: -3774 while self._curr: -3775 self._advance() -3776 return exp.Command(this=self._find_sql(start, self._prev)) -3777 -3778 def _find_parser( -3779 self, parsers: t.Dict[str, t.Callable], trie: t.Dict -3780 ) -> t.Optional[t.Callable]: -3781 index = self._index -3782 this = [] -3783 while True: -3784 # The current token might be multiple words -3785 curr = self._curr.text.upper() -3786 key = curr.split(" ") -3787 this.append(curr) -3788 self._advance() -3789 result, trie = in_trie(trie, key) -3790 if result == 0: -3791 break -3792 if result == 2: -3793 subparser = parsers[" ".join(this)] -3794 return subparser -3795 self._retreat(index) -3796 return None -3797 -3798 def _match(self, token_type, advance=True): -3799 if not self._curr: -3800 return None -3801 -3802 if self._curr.token_type == token_type: -3803 if advance: -3804 self._advance() -3805 return True -3806 -3807 return None +3729 def _parse_alter(self) -> t.Optional[exp.Expression]: +3730 if not self._match(TokenType.TABLE): +3731 return self._parse_as_command(self._prev) +3732 +3733 exists = self._parse_exists() +3734 this = self._parse_table(schema=True) +3735 +3736 if not self._curr: +3737 return None +3738 +3739 parser = self.ALTER_PARSERS.get(self._curr.text.upper()) +3740 actions = ensure_list(self._advance() or parser(self)) if parser else [] # type: ignore +3741 +3742 return self.expression(exp.AlterTable, this=this, exists=exists, actions=actions) +3743 +3744 def _parse_show(self) -> t.Optional[exp.Expression]: +3745 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore +3746 if parser: +3747 return parser(self) +3748 self._advance() +3749 return self.expression(exp.Show, this=self._prev.text.upper()) +3750 +3751 def _default_parse_set_item(self) -> exp.Expression: +3752 return self.expression( +3753 exp.SetItem, +3754 this=self._parse_statement(), +3755 ) +3756 +3757 def _parse_set_item(self) -> t.Optional[exp.Expression]: +3758 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore +3759 return parser(self) if parser else self._default_parse_set_item() +3760 +3761 def _parse_merge(self) -> exp.Expression: +3762 self._match(TokenType.INTO) +3763 target = self._parse_table() +3764 +3765 self._match(TokenType.USING) +3766 using = self._parse_table() +3767 +3768 self._match(TokenType.ON) +3769 on = self._parse_conjunction() +3770 +3771 whens = [] +3772 while self._match(TokenType.WHEN): +3773 this = self._parse_conjunction() +3774 self._match(TokenType.THEN) +3775 +3776 if self._match(TokenType.INSERT): +3777 _this = self._parse_star() +3778 if _this: +3779 then = self.expression(exp.Insert, this=_this) +3780 else: +3781 then = self.expression( +3782 exp.Insert, +3783 this=self._parse_value(), +3784 expression=self._match(TokenType.VALUES) and self._parse_value(), +3785 ) +3786 elif self._match(TokenType.UPDATE): +3787 expressions = self._parse_star() +3788 if expressions: +3789 then = self.expression(exp.Update, expressions=expressions) +3790 else: +3791 then = self.expression( +3792 exp.Update, +3793 expressions=self._match(TokenType.SET) +3794 and self._parse_csv(self._parse_equality), +3795 ) +3796 elif self._match(TokenType.DELETE): +3797 then = self.expression(exp.Var, this=self._prev.text) +3798 +3799 whens.append(self.expression(exp.When, this=this, then=then)) +3800 +3801 return self.expression( +3802 exp.Merge, +3803 this=target, +3804 using=using, +3805 on=on, +3806 expressions=whens, +3807 ) 3808 -3809 def _match_set(self, types, advance=True): -3810 if not self._curr: -3811 return None -3812 -3813 if self._curr.token_type in types: -3814 if advance: -3815 self._advance() -3816 return True -3817 -3818 return None -3819 -3820 def _match_pair(self, token_type_a, token_type_b, advance=True): -3821 if not self._curr or not self._next: -3822 return None -3823 -3824 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: -3825 if advance: -3826 self._advance(2) -3827 return True -3828 -3829 return None -3830 -3831 def _match_l_paren(self, expression=None): -3832 if not self._match(TokenType.L_PAREN): -3833 self.raise_error("Expecting (") -3834 if expression and self._prev_comments: -3835 expression.comments = self._prev_comments -3836 -3837 def _match_r_paren(self, expression=None): -3838 if not self._match(TokenType.R_PAREN): -3839 self.raise_error("Expecting )") -3840 if expression and self._prev_comments: -3841 expression.comments = self._prev_comments +3809 def _parse_set(self) -> exp.Expression: +3810 return self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) +3811 +3812 def _parse_as_command(self, start: Token) -> exp.Command: +3813 while self._curr: +3814 self._advance() +3815 text = self._find_sql(start, self._prev) +3816 size = len(start.text) +3817 return exp.Command(this=text[:size], expression=text[size:]) +3818 +3819 def _find_parser( +3820 self, parsers: t.Dict[str, t.Callable], trie: t.Dict +3821 ) -> t.Optional[t.Callable]: +3822 index = self._index +3823 this = [] +3824 while True: +3825 # The current token might be multiple words +3826 curr = self._curr.text.upper() +3827 key = curr.split(" ") +3828 this.append(curr) +3829 self._advance() +3830 result, trie = in_trie(trie, key) +3831 if result == 0: +3832 break +3833 if result == 2: +3834 subparser = parsers[" ".join(this)] +3835 return subparser +3836 self._retreat(index) +3837 return None +3838 +3839 def _match(self, token_type, advance=True): +3840 if not self._curr: +3841 return None 3842 -3843 def _match_texts(self, texts, advance=True): -3844 if self._curr and self._curr.text.upper() in texts: -3845 if advance: -3846 self._advance() -3847 return True -3848 return False +3843 if self._curr.token_type == token_type: +3844 if advance: +3845 self._advance() +3846 return True +3847 +3848 return None 3849 -3850 def _match_text_seq(self, *texts, advance=True): -3851 index = self._index -3852 for text in texts: -3853 if self._curr and self._curr.text.upper() == text: -3854 self._advance() -3855 else: -3856 self._retreat(index) -3857 return False +3850 def _match_set(self, types, advance=True): +3851 if not self._curr: +3852 return None +3853 +3854 if self._curr.token_type in types: +3855 if advance: +3856 self._advance() +3857 return True 3858 -3859 if not advance: -3860 self._retreat(index) -3861 -3862 return True -3863 -3864 def _replace_columns_with_dots(self, this): -3865 if isinstance(this, exp.Dot): -3866 exp.replace_children(this, self._replace_columns_with_dots) -3867 elif isinstance(this, exp.Column): -3868 exp.replace_children(this, self._replace_columns_with_dots) -3869 table = this.args.get("table") -3870 this = ( -3871 self.expression(exp.Dot, this=table, expression=this.this) -3872 if table -3873 else self.expression(exp.Var, this=this.name) -3874 ) -3875 elif isinstance(this, exp.Identifier): -3876 this = self.expression(exp.Var, this=this.name) -3877 return this -3878 -3879 def _replace_lambda(self, node, lambda_variables): -3880 if isinstance(node, exp.Column): -3881 if node.name in lambda_variables: -3882 return node.this -3883 return node +3859 return None +3860 +3861 def _match_pair(self, token_type_a, token_type_b, advance=True): +3862 if not self._curr or not self._next: +3863 return None +3864 +3865 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: +3866 if advance: +3867 self._advance(2) +3868 return True +3869 +3870 return None +3871 +3872 def _match_l_paren(self, expression=None): +3873 if not self._match(TokenType.L_PAREN): +3874 self.raise_error("Expecting (") +3875 if expression and self._prev_comments: +3876 expression.comments = self._prev_comments +3877 +3878 def _match_r_paren(self, expression=None): +3879 if not self._match(TokenType.R_PAREN): +3880 self.raise_error("Expecting )") +3881 if expression and self._prev_comments: +3882 expression.comments = self._prev_comments +3883 +3884 def _match_texts(self, texts, advance=True): +3885 if self._curr and self._curr.text.upper() in texts: +3886 if advance: +3887 self._advance() +3888 return True +3889 return False +3890 +3891 def _match_text_seq(self, *texts, advance=True): +3892 index = self._index +3893 for text in texts: +3894 if self._curr and self._curr.text.upper() == text: +3895 self._advance() +3896 else: +3897 self._retreat(index) +3898 return False +3899 +3900 if not advance: +3901 self._retreat(index) +3902 +3903 return True +3904 +3905 def _replace_columns_with_dots(self, this): +3906 if isinstance(this, exp.Dot): +3907 exp.replace_children(this, self._replace_columns_with_dots) +3908 elif isinstance(this, exp.Column): +3909 exp.replace_children(this, self._replace_columns_with_dots) +3910 table = this.args.get("table") +3911 this = ( +3912 self.expression(exp.Dot, this=table, expression=this.this) +3913 if table +3914 else self.expression(exp.Var, this=this.name) +3915 ) +3916 elif isinstance(this, exp.Identifier): +3917 this = self.expression(exp.Var, this=this.name) +3918 return this +3919 +3920 def _replace_lambda(self, node, lambda_variables): +3921 if isinstance(node, exp.Column): +3922 if node.name in lambda_variables: +3923 return node.this +3924 return node @@ -7889,24 +7971,24 @@ Default: "nulls_are_small" -
713    def __init__(
-714        self,
-715        error_level: t.Optional[ErrorLevel] = None,
-716        error_message_context: int = 100,
-717        index_offset: int = 0,
-718        unnest_column_only: bool = False,
-719        alias_post_tablesample: bool = False,
-720        max_errors: int = 3,
-721        null_ordering: t.Optional[str] = None,
-722    ):
-723        self.error_level = error_level or ErrorLevel.IMMEDIATE
-724        self.error_message_context = error_message_context
-725        self.index_offset = index_offset
-726        self.unnest_column_only = unnest_column_only
-727        self.alias_post_tablesample = alias_post_tablesample
-728        self.max_errors = max_errors
-729        self.null_ordering = null_ordering
-730        self.reset()
+            
725    def __init__(
+726        self,
+727        error_level: t.Optional[ErrorLevel] = None,
+728        error_message_context: int = 100,
+729        index_offset: int = 0,
+730        unnest_column_only: bool = False,
+731        alias_post_tablesample: bool = False,
+732        max_errors: int = 3,
+733        null_ordering: t.Optional[str] = None,
+734    ):
+735        self.error_level = error_level or ErrorLevel.IMMEDIATE
+736        self.error_message_context = error_message_context
+737        self.index_offset = index_offset
+738        self.unnest_column_only = unnest_column_only
+739        self.alias_post_tablesample = alias_post_tablesample
+740        self.max_errors = max_errors
+741        self.null_ordering = null_ordering
+742        self.reset()
 
@@ -7924,15 +8006,15 @@ Default: "nulls_are_small"
-
732    def reset(self):
-733        self.sql = ""
-734        self.errors = []
-735        self._tokens = []
-736        self._index = 0
-737        self._curr = None
-738        self._next = None
-739        self._prev = None
-740        self._prev_comments = None
+            
744    def reset(self):
+745        self.sql = ""
+746        self.errors = []
+747        self._tokens = []
+748        self._index = 0
+749        self._curr = None
+750        self._next = None
+751        self._prev = None
+752        self._prev_comments = None
 
@@ -7950,23 +8032,23 @@ Default: "nulls_are_small"
-
742    def parse(
-743        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
-744    ) -> t.List[t.Optional[exp.Expression]]:
-745        """
-746        Parses a list of tokens and returns a list of syntax trees, one tree
-747        per parsed SQL statement.
-748
-749        Args:
-750            raw_tokens: the list of tokens.
-751            sql: the original SQL string, used to produce helpful debug messages.
-752
-753        Returns:
-754            The list of syntax trees.
-755        """
-756        return self._parse(
-757            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
-758        )
+            
754    def parse(
+755        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
+756    ) -> t.List[t.Optional[exp.Expression]]:
+757        """
+758        Parses a list of tokens and returns a list of syntax trees, one tree
+759        per parsed SQL statement.
+760
+761        Args:
+762            raw_tokens: the list of tokens.
+763            sql: the original SQL string, used to produce helpful debug messages.
+764
+765        Returns:
+766            The list of syntax trees.
+767        """
+768        return self._parse(
+769            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
+770        )
 
@@ -8000,39 +8082,39 @@ per parsed SQL statement.

-
760    def parse_into(
-761        self,
-762        expression_types: exp.IntoType,
-763        raw_tokens: t.List[Token],
-764        sql: t.Optional[str] = None,
-765    ) -> t.List[t.Optional[exp.Expression]]:
-766        """
-767        Parses a list of tokens into a given Expression type. If a collection of Expression
-768        types is given instead, this method will try to parse the token list into each one
-769        of them, stopping at the first for which the parsing succeeds.
-770
-771        Args:
-772            expression_types: the expression type(s) to try and parse the token list into.
-773            raw_tokens: the list of tokens.
-774            sql: the original SQL string, used to produce helpful debug messages.
-775
-776        Returns:
-777            The target Expression.
-778        """
-779        errors = []
-780        for expression_type in ensure_collection(expression_types):
-781            parser = self.EXPRESSION_PARSERS.get(expression_type)
-782            if not parser:
-783                raise TypeError(f"No parser registered for {expression_type}")
-784            try:
-785                return self._parse(parser, raw_tokens, sql)
-786            except ParseError as e:
-787                e.errors[0]["into_expression"] = expression_type
-788                errors.append(e)
-789        raise ParseError(
-790            f"Failed to parse into {expression_types}",
-791            errors=merge_errors(errors),
-792        ) from errors[-1]
+            
772    def parse_into(
+773        self,
+774        expression_types: exp.IntoType,
+775        raw_tokens: t.List[Token],
+776        sql: t.Optional[str] = None,
+777    ) -> t.List[t.Optional[exp.Expression]]:
+778        """
+779        Parses a list of tokens into a given Expression type. If a collection of Expression
+780        types is given instead, this method will try to parse the token list into each one
+781        of them, stopping at the first for which the parsing succeeds.
+782
+783        Args:
+784            expression_types: the expression type(s) to try and parse the token list into.
+785            raw_tokens: the list of tokens.
+786            sql: the original SQL string, used to produce helpful debug messages.
+787
+788        Returns:
+789            The target Expression.
+790        """
+791        errors = []
+792        for expression_type in ensure_collection(expression_types):
+793            parser = self.EXPRESSION_PARSERS.get(expression_type)
+794            if not parser:
+795                raise TypeError(f"No parser registered for {expression_type}")
+796            try:
+797                return self._parse(parser, raw_tokens, sql)
+798            except ParseError as e:
+799                e.errors[0]["into_expression"] = expression_type
+800                errors.append(e)
+801        raise ParseError(
+802            f"Failed to parse into {expression_types}",
+803            errors=merge_errors(errors),
+804        ) from errors[-1]
 
@@ -8068,18 +8150,18 @@ of them, stopping at the first for which the parsing succeeds.

-
828    def check_errors(self) -> None:
-829        """
-830        Logs or raises any found errors, depending on the chosen error level setting.
-831        """
-832        if self.error_level == ErrorLevel.WARN:
-833            for error in self.errors:
-834                logger.error(str(error))
-835        elif self.error_level == ErrorLevel.RAISE and self.errors:
-836            raise ParseError(
-837                concat_messages(self.errors, self.max_errors),
-838                errors=merge_errors(self.errors),
-839            )
+            
840    def check_errors(self) -> None:
+841        """
+842        Logs or raises any found errors, depending on the chosen error level setting.
+843        """
+844        if self.error_level == ErrorLevel.WARN:
+845            for error in self.errors:
+846                logger.error(str(error))
+847        elif self.error_level == ErrorLevel.RAISE and self.errors:
+848            raise ParseError(
+849                concat_messages(self.errors, self.max_errors),
+850                errors=merge_errors(self.errors),
+851            )
 
@@ -8099,33 +8181,33 @@ of them, stopping at the first for which the parsing succeeds.

-
841    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
-842        """
-843        Appends an error in the list of recorded errors or raises it, depending on the chosen
-844        error level setting.
-845        """
-846        token = token or self._curr or self._prev or Token.string("")
-847        start = self._find_token(token)
-848        end = start + len(token.text)
-849        start_context = self.sql[max(start - self.error_message_context, 0) : start]
-850        highlight = self.sql[start:end]
-851        end_context = self.sql[end : end + self.error_message_context]
-852
-853        error = ParseError.new(
-854            f"{message}. Line {token.line}, Col: {token.col}.\n"
-855            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
-856            description=message,
-857            line=token.line,
-858            col=token.col,
-859            start_context=start_context,
-860            highlight=highlight,
-861            end_context=end_context,
-862        )
-863
-864        if self.error_level == ErrorLevel.IMMEDIATE:
-865            raise error
-866
-867        self.errors.append(error)
+            
853    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
+854        """
+855        Appends an error in the list of recorded errors or raises it, depending on the chosen
+856        error level setting.
+857        """
+858        token = token or self._curr or self._prev or Token.string("")
+859        start = self._find_token(token)
+860        end = start + len(token.text)
+861        start_context = self.sql[max(start - self.error_message_context, 0) : start]
+862        highlight = self.sql[start:end]
+863        end_context = self.sql[end : end + self.error_message_context]
+864
+865        error = ParseError.new(
+866            f"{message}. Line {token.line}, Col: {token.col}.\n"
+867            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
+868            description=message,
+869            line=token.line,
+870            col=token.col,
+871            start_context=start_context,
+872            highlight=highlight,
+873            end_context=end_context,
+874        )
+875
+876        if self.error_level == ErrorLevel.IMMEDIATE:
+877            raise error
+878
+879        self.errors.append(error)
 
@@ -8146,28 +8228,28 @@ error level setting.

-
869    def expression(
-870        self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs
-871    ) -> exp.Expression:
-872        """
-873        Creates a new, validated Expression.
-874
-875        Args:
-876            exp_class: the expression class to instantiate.
-877            comments: an optional list of comments to attach to the expression.
-878            kwargs: the arguments to set for the expression along with their respective values.
-879
-880        Returns:
-881            The target expression.
-882        """
-883        instance = exp_class(**kwargs)
-884        if self._prev_comments:
-885            instance.comments = self._prev_comments
-886            self._prev_comments = None
-887        if comments:
-888            instance.comments = comments
-889        self.validate_expression(instance)
-890        return instance
+            
881    def expression(
+882        self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs
+883    ) -> exp.Expression:
+884        """
+885        Creates a new, validated Expression.
+886
+887        Args:
+888            exp_class: the expression class to instantiate.
+889            comments: an optional list of comments to attach to the expression.
+890            kwargs: the arguments to set for the expression along with their respective values.
+891
+892        Returns:
+893            The target expression.
+894        """
+895        instance = exp_class(**kwargs)
+896        if self._prev_comments:
+897            instance.comments = self._prev_comments
+898            self._prev_comments = None
+899        if comments:
+900            instance.comments = comments
+901        self.validate_expression(instance)
+902        return instance
 
@@ -8201,22 +8283,22 @@ error level setting.

-
892    def validate_expression(
-893        self, expression: exp.Expression, args: t.Optional[t.List] = None
-894    ) -> None:
-895        """
-896        Validates an already instantiated expression, making sure that all its mandatory arguments
-897        are set.
-898
-899        Args:
-900            expression: the expression to validate.
-901            args: an optional list of items that was used to instantiate the expression, if it's a Func.
-902        """
-903        if self.error_level == ErrorLevel.IGNORE:
-904            return
-905
-906        for error_message in expression.error_messages(args):
-907            self.raise_error(error_message)
+            
904    def validate_expression(
+905        self, expression: exp.Expression, args: t.Optional[t.List] = None
+906    ) -> None:
+907        """
+908        Validates an already instantiated expression, making sure that all its mandatory arguments
+909        are set.
+910
+911        Args:
+912            expression: the expression to validate.
+913            args: an optional list of items that was used to instantiate the expression, if it's a Func.
+914        """
+915        if self.error_level == ErrorLevel.IGNORE:
+916            return
+917
+918        for error_message in expression.error_messages(args):
+919            self.raise_error(error_message)
 
-- cgit v1.2.3