diff options
Diffstat (limited to '')
-rw-r--r-- | sqlglot/dialects/clickhouse.py | 3 | ||||
-rw-r--r-- | sqlglot/dialects/dialect.py | 16 | ||||
-rw-r--r-- | sqlglot/dialects/duckdb.py | 1 | ||||
-rw-r--r-- | sqlglot/dialects/mysql.py | 15 | ||||
-rw-r--r-- | sqlglot/dialects/oracle.py | 45 | ||||
-rw-r--r-- | sqlglot/dialects/redshift.py | 2 | ||||
-rw-r--r-- | sqlglot/expressions.py | 67 | ||||
-rw-r--r-- | sqlglot/generator.py | 56 | ||||
-rw-r--r-- | sqlglot/optimizer/merge_subqueries.py | 2 | ||||
-rw-r--r-- | sqlglot/optimizer/qualify_columns.py | 2 | ||||
-rw-r--r-- | sqlglot/parser.py | 77 | ||||
-rw-r--r-- | sqlglot/tokens.py | 15 |
12 files changed, 262 insertions, 39 deletions
diff --git a/sqlglot/dialects/clickhouse.py b/sqlglot/dialects/clickhouse.py index a38a239..4b36663 100644 --- a/sqlglot/dialects/clickhouse.py +++ b/sqlglot/dialects/clickhouse.py @@ -313,6 +313,8 @@ class ClickHouse(Dialect): exp.DataType.Type.LONGTEXT: "String", exp.DataType.Type.MEDIUMBLOB: "String", exp.DataType.Type.MEDIUMTEXT: "String", + exp.DataType.Type.TINYBLOB: "String", + exp.DataType.Type.TINYTEXT: "String", exp.DataType.Type.TEXT: "String", exp.DataType.Type.VARBINARY: "String", exp.DataType.Type.VARCHAR: "String", @@ -331,6 +333,7 @@ class ClickHouse(Dialect): exp.DataType.Type.FIXEDSTRING: "FixedString", exp.DataType.Type.FLOAT: "Float32", exp.DataType.Type.INT: "Int32", + exp.DataType.Type.MEDIUMINT: "Int32", exp.DataType.Type.INT128: "Int128", exp.DataType.Type.INT256: "Int256", exp.DataType.Type.LOWCARDINALITY: "LowCardinality", diff --git a/sqlglot/dialects/dialect.py b/sqlglot/dialects/dialect.py index 1bfbfef..ff22547 100644 --- a/sqlglot/dialects/dialect.py +++ b/sqlglot/dialects/dialect.py @@ -2,6 +2,7 @@ from __future__ import annotations import typing as t from enum import Enum +from functools import reduce from sqlglot import exp from sqlglot._typing import E @@ -656,11 +657,18 @@ def ts_or_ds_to_date_sql(dialect: str) -> t.Callable: def concat_to_dpipe_sql(self: Generator, expression: exp.Concat | exp.SafeConcat) -> str: expression = expression.copy() - this, *rest_args = expression.expressions - for arg in rest_args: - this = exp.DPipe(this=this, expression=arg) + return self.sql(reduce(lambda x, y: exp.DPipe(this=x, expression=y), expression.expressions)) - return self.sql(this) + +def concat_ws_to_dpipe_sql(self: Generator, expression: exp.ConcatWs) -> str: + expression = expression.copy() + delim, *rest_args = expression.expressions + return self.sql( + reduce( + lambda x, y: exp.DPipe(this=x, expression=exp.DPipe(this=delim, expression=y)), + rest_args, + ) + ) def regexp_extract_sql(self: Generator, expression: exp.RegexpExtract) -> str: diff --git a/sqlglot/dialects/duckdb.py b/sqlglot/dialects/duckdb.py index 684e35e..bf657ed 100644 --- a/sqlglot/dialects/duckdb.py +++ b/sqlglot/dialects/duckdb.py @@ -291,6 +291,7 @@ class DuckDB(Dialect): exp.UnixToStr: lambda self, e: f"STRFTIME(TO_TIMESTAMP({self.sql(e, 'this')}), {self.format_time(e)})", exp.UnixToTime: rename_func("TO_TIMESTAMP"), exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", + exp.VariancePop: rename_func("VAR_POP"), exp.WeekOfYear: rename_func("WEEKOFYEAR"), } diff --git a/sqlglot/dialects/mysql.py b/sqlglot/dialects/mysql.py index 6327796..46e3f19 100644 --- a/sqlglot/dialects/mysql.py +++ b/sqlglot/dialects/mysql.py @@ -119,7 +119,7 @@ class MySQL(Dialect): QUOTES = ["'", '"'] COMMENTS = ["--", "#", ("/*", "*/")] IDENTIFIERS = ["`"] - STRING_ESCAPES = ["'", "\\"] + STRING_ESCAPES = ["'", '"', "\\"] BIT_STRINGS = [("b'", "'"), ("B'", "'"), ("0b", "")] HEX_STRINGS = [("x'", "'"), ("X'", "'"), ("0x", "")] @@ -132,6 +132,8 @@ class MySQL(Dialect): "LONGBLOB": TokenType.LONGBLOB, "LONGTEXT": TokenType.LONGTEXT, "MEDIUMBLOB": TokenType.MEDIUMBLOB, + "TINYBLOB": TokenType.TINYBLOB, + "TINYTEXT": TokenType.TINYTEXT, "MEDIUMTEXT": TokenType.MEDIUMTEXT, "MEDIUMINT": TokenType.MEDIUMINT, "MEMBER OF": TokenType.MEMBER_OF, @@ -356,6 +358,15 @@ class MySQL(Dialect): LOG_DEFAULTS_TO_LN = True + def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: + this = self._parse_id_var() + if not self._match(TokenType.L_PAREN): + return this + + expression = self._parse_number() + self._match_r_paren() + return self.expression(exp.ColumnPrefix, this=this, expression=expression) + def _parse_index_constraint( self, kind: t.Optional[str] = None ) -> exp.IndexColumnConstraint: @@ -577,8 +588,10 @@ class MySQL(Dialect): TYPE_MAPPING.pop(exp.DataType.Type.MEDIUMTEXT) TYPE_MAPPING.pop(exp.DataType.Type.LONGTEXT) + TYPE_MAPPING.pop(exp.DataType.Type.TINYTEXT) TYPE_MAPPING.pop(exp.DataType.Type.MEDIUMBLOB) TYPE_MAPPING.pop(exp.DataType.Type.LONGBLOB) + TYPE_MAPPING.pop(exp.DataType.Type.TINYBLOB) PROPERTIES_LOCATION = { **generator.Generator.PROPERTIES_LOCATION, diff --git a/sqlglot/dialects/oracle.py b/sqlglot/dialects/oracle.py index 279ed31..378df49 100644 --- a/sqlglot/dialects/oracle.py +++ b/sqlglot/dialects/oracle.py @@ -7,6 +7,9 @@ from sqlglot.dialects.dialect import Dialect, no_ilike_sql, rename_func, trim_sq from sqlglot.helper import seq_get from sqlglot.tokens import TokenType +if t.TYPE_CHECKING: + from sqlglot._typing import E + def _parse_xml_table(self: Oracle.Parser) -> exp.XMLTable: this = self._parse_string() @@ -69,6 +72,16 @@ class Oracle(Dialect): FUNCTION_PARSERS: t.Dict[str, t.Callable] = { **parser.Parser.FUNCTION_PARSERS, + "JSON_ARRAY": lambda self: self._parse_json_array( + exp.JSONArray, + expressions=self._parse_csv(lambda: self._parse_format_json(self._parse_bitwise())), + ), + "JSON_ARRAYAGG": lambda self: self._parse_json_array( + exp.JSONArrayAgg, + this=self._parse_format_json(self._parse_bitwise()), + order=self._parse_order(), + ), + "JSON_TABLE": lambda self: self._parse_json_table(), "XMLTABLE": _parse_xml_table, } @@ -82,6 +95,38 @@ class Oracle(Dialect): # Reference: https://stackoverflow.com/a/336455 DISTINCT_TOKENS = {TokenType.DISTINCT, TokenType.UNIQUE} + # Note: this is currently incomplete; it only implements the "JSON_value_column" part + def _parse_json_column_def(self) -> exp.JSONColumnDef: + this = self._parse_id_var() + kind = self._parse_types(allow_identifiers=False) + path = self._match_text_seq("PATH") and self._parse_string() + return self.expression(exp.JSONColumnDef, this=this, kind=kind, path=path) + + def _parse_json_table(self) -> exp.JSONTable: + this = self._parse_format_json(self._parse_bitwise()) + path = self._match(TokenType.COMMA) and self._parse_string() + error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") + empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") + self._match(TokenType.COLUMN) + expressions = self._parse_wrapped_csv(self._parse_json_column_def, optional=True) + + return exp.JSONTable( + this=this, + expressions=expressions, + path=path, + error_handling=error_handling, + empty_handling=empty_handling, + ) + + def _parse_json_array(self, expr_type: t.Type[E], **kwargs) -> E: + return self.expression( + expr_type, + null_handling=self._parse_on_handling("NULL", "NULL", "ABSENT"), + return_type=self._match_text_seq("RETURNING") and self._parse_type(), + strict=self._match_text_seq("STRICT"), + **kwargs, + ) + def _parse_column(self) -> t.Optional[exp.Expression]: column = super()._parse_column() if column: diff --git a/sqlglot/dialects/redshift.py b/sqlglot/dialects/redshift.py index 351c5df..554cbd3 100644 --- a/sqlglot/dialects/redshift.py +++ b/sqlglot/dialects/redshift.py @@ -5,6 +5,7 @@ import typing as t from sqlglot import exp, transforms from sqlglot.dialects.dialect import ( concat_to_dpipe_sql, + concat_ws_to_dpipe_sql, rename_func, ts_or_ds_to_date_sql, ) @@ -123,6 +124,7 @@ class Redshift(Postgres): TRANSFORMS = { **Postgres.Generator.TRANSFORMS, exp.Concat: concat_to_dpipe_sql, + exp.ConcatWs: concat_ws_to_dpipe_sql, exp.CurrentTimestamp: lambda self, e: "SYSDATE", exp.DateAdd: lambda self, e: self.func( "DATEADD", exp.var(e.text("unit") or "day"), e.expression, e.this diff --git a/sqlglot/expressions.py b/sqlglot/expressions.py index 877e9fd..98afddc 100644 --- a/sqlglot/expressions.py +++ b/sqlglot/expressions.py @@ -20,6 +20,7 @@ import typing as t from collections import deque from copy import deepcopy from enum import auto +from functools import reduce from sqlglot._typing import E from sqlglot.errors import ParseError @@ -1170,7 +1171,7 @@ class Column(Condition): parts.append(parent.expression) parent = parent.parent - return Dot.build(parts) + return Dot.build(deepcopy(parts)) class ColumnPosition(Expression): @@ -1537,6 +1538,10 @@ class ForeignKey(Expression): } +class ColumnPrefix(Expression): + arg_types = {"this": True, "expression": True} + + class PrimaryKey(Expression): arg_types = {"expressions": True, "options": False} @@ -3529,6 +3534,8 @@ class DataType(Expression): STRUCT = auto() SUPER = auto() TEXT = auto() + TINYBLOB = auto() + TINYTEXT = auto() TIME = auto() TIMETZ = auto() TIMESTAMP = auto() @@ -3793,13 +3800,7 @@ class Dot(Binary): if len(expressions) < 2: raise ValueError(f"Dot requires >= 2 expressions.") - a, b, *expressions = expressions - dot = Dot(this=a, expression=b) - - for expression in expressions: - dot = Dot(this=dot, expression=expression) - - return dot + return t.cast(Dot, reduce(lambda x, y: Dot(this=x, expression=y), expressions)) class DPipe(Binary): @@ -3959,6 +3960,13 @@ class Between(Predicate): class Bracket(Condition): arg_types = {"this": True, "expressions": True} + @property + def output_name(self) -> str: + if len(self.expressions) == 1: + return self.expressions[0].output_name + + return super().output_name + class SafeBracket(Bracket): """Represents array lookup where OOB index yields NULL instead of causing a failure.""" @@ -4477,6 +4485,10 @@ class IsNan(Func): _sql_names = ["IS_NAN", "ISNAN"] +class FormatJson(Expression): + pass + + class JSONKeyValue(Expression): arg_types = {"this": True, "expression": True} @@ -4487,11 +4499,48 @@ class JSONObject(Func): "null_handling": False, "unique_keys": False, "return_type": False, - "format_json": False, "encoding": False, } +# https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/JSON_ARRAY.html +class JSONArray(Func): + arg_types = { + "expressions": True, + "null_handling": False, + "return_type": False, + "strict": False, + } + + +# https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/JSON_ARRAYAGG.html +class JSONArrayAgg(Func): + arg_types = { + "this": True, + "order": False, + "null_handling": False, + "return_type": False, + "strict": False, + } + + +# https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/JSON_TABLE.html +# Note: parsing of JSON column definitions is currently incomplete. +class JSONColumnDef(Expression): + arg_types = {"this": True, "kind": False, "path": False} + + +# # https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/JSON_TABLE.html +class JSONTable(Func): + arg_types = { + "this": True, + "expressions": True, + "path": False, + "error_handling": False, + "empty_handling": False, + } + + class OpenJSONColumnDef(Expression): arg_types = {"this": True, "kind": True, "path": False, "as_json": False} diff --git a/sqlglot/generator.py b/sqlglot/generator.py index 1074e9a..399b48b 100644 --- a/sqlglot/generator.py +++ b/sqlglot/generator.py @@ -193,8 +193,10 @@ class Generator: exp.DataType.Type.NVARCHAR: "VARCHAR", exp.DataType.Type.MEDIUMTEXT: "TEXT", exp.DataType.Type.LONGTEXT: "TEXT", + exp.DataType.Type.TINYTEXT: "TEXT", exp.DataType.Type.MEDIUMBLOB: "BLOB", exp.DataType.Type.LONGBLOB: "BLOB", + exp.DataType.Type.TINYBLOB: "BLOB", exp.DataType.Type.INET: "INET", } @@ -2021,6 +2023,9 @@ class Generator: def jsonkeyvalue_sql(self, expression: exp.JSONKeyValue) -> str: return f"{self.sql(expression, 'this')}: {self.sql(expression, 'expression')}" + def formatjson_sql(self, expression: exp.FormatJson) -> str: + return f"{self.sql(expression, 'this')} FORMAT JSON" + def jsonobject_sql(self, expression: exp.JSONObject) -> str: null_handling = expression.args.get("null_handling") null_handling = f" {null_handling}" if null_handling else "" @@ -2031,13 +2036,57 @@ class Generator: unique_keys = "" return_type = self.sql(expression, "return_type") return_type = f" RETURNING {return_type}" if return_type else "" - format_json = " FORMAT JSON" if expression.args.get("format_json") else "" encoding = self.sql(expression, "encoding") encoding = f" ENCODING {encoding}" if encoding else "" return self.func( "JSON_OBJECT", *expression.expressions, - suffix=f"{null_handling}{unique_keys}{return_type}{format_json}{encoding})", + suffix=f"{null_handling}{unique_keys}{return_type}{encoding})", + ) + + def jsonarray_sql(self, expression: exp.JSONArray) -> str: + null_handling = expression.args.get("null_handling") + null_handling = f" {null_handling}" if null_handling else "" + return_type = self.sql(expression, "return_type") + return_type = f" RETURNING {return_type}" if return_type else "" + strict = " STRICT" if expression.args.get("strict") else "" + return self.func( + "JSON_ARRAY", *expression.expressions, suffix=f"{null_handling}{return_type}{strict})" + ) + + def jsonarrayagg_sql(self, expression: exp.JSONArrayAgg) -> str: + this = self.sql(expression, "this") + order = self.sql(expression, "order") + null_handling = expression.args.get("null_handling") + null_handling = f" {null_handling}" if null_handling else "" + return_type = self.sql(expression, "return_type") + return_type = f" RETURNING {return_type}" if return_type else "" + strict = " STRICT" if expression.args.get("strict") else "" + return self.func( + "JSON_ARRAYAGG", + this, + suffix=f"{order}{null_handling}{return_type}{strict})", + ) + + def jsoncolumndef_sql(self, expression: exp.JSONColumnDef) -> str: + this = self.sql(expression, "this") + kind = self.sql(expression, "kind") + kind = f" {kind}" if kind else "" + path = self.sql(expression, "path") + path = f" PATH {path}" if path else "" + return f"{this}{kind}{path}" + + def jsontable_sql(self, expression: exp.JSONTable) -> str: + this = self.sql(expression, "this") + path = self.sql(expression, "path") + path = f", {path}" if path else "" + error_handling = expression.args.get("error_handling") + error_handling = f" {error_handling}" if error_handling else "" + empty_handling = expression.args.get("empty_handling") + empty_handling = f" {empty_handling}" if empty_handling else "" + columns = f" COLUMNS ({self.expressions(expression, skip_first=True)})" + return self.func( + "JSON_TABLE", this, suffix=f"{path}{error_handling}{empty_handling}{columns})" ) def openjsoncolumndef_sql(self, expression: exp.OpenJSONColumnDef) -> str: @@ -2722,6 +2771,9 @@ class Generator: condition = f" IF {condition}" if condition else "" return f"{this} FOR {expr} IN {iterator}{condition}" + def columnprefix_sql(self, expression: exp.ColumnPrefix) -> str: + return f"{self.sql(expression, 'this')}({self.sql(expression, 'expression')})" + def cached_generator( cache: t.Optional[t.Dict[int, str]] = None diff --git a/sqlglot/optimizer/merge_subqueries.py b/sqlglot/optimizer/merge_subqueries.py index 7322424..976c9ad 100644 --- a/sqlglot/optimizer/merge_subqueries.py +++ b/sqlglot/optimizer/merge_subqueries.py @@ -128,7 +128,7 @@ def _mergeable(outer_scope, inner_scope, leave_tables_isolated, from_or_join): def _is_a_window_expression_in_unmergable_operation(): window_expressions = inner_select.find_all(exp.Window) window_alias_names = {window.parent.alias_or_name for window in window_expressions} - inner_select_name = inner_select.parent.alias_or_name + inner_select_name = from_or_join.alias_or_name unmergable_window_columns = [ column for column in outer_scope.columns diff --git a/sqlglot/optimizer/qualify_columns.py b/sqlglot/optimizer/qualify_columns.py index 952999d..b06ea1d 100644 --- a/sqlglot/optimizer/qualify_columns.py +++ b/sqlglot/optimizer/qualify_columns.py @@ -129,7 +129,7 @@ def _expand_using(scope: Scope, resolver: Resolver) -> t.Dict[str, t.Any]: table = columns.get(identifier) if not table or identifier not in join_columns: - if columns and join_columns: + if (columns and "*" not in columns) and join_columns: raise OptimizeError(f"Cannot automatically join: {identifier}") table = table or source_table diff --git a/sqlglot/parser.py b/sqlglot/parser.py index 939303f..f721582 100644 --- a/sqlglot/parser.py +++ b/sqlglot/parser.py @@ -155,6 +155,8 @@ class Parser(metaclass=_Parser): TokenType.JSON, TokenType.JSONB, TokenType.INTERVAL, + TokenType.TINYBLOB, + TokenType.TINYTEXT, TokenType.TIME, TokenType.TIMETZ, TokenType.TIMESTAMP, @@ -764,6 +766,7 @@ class Parser(metaclass=_Parser): "ANY_VALUE": lambda self: self._parse_any_value(), "CAST": lambda self: self._parse_cast(self.STRICT_CAST), "CONCAT": lambda self: self._parse_concat(), + "CONCAT_WS": lambda self: self._parse_concat_ws(), "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), "DECODE": lambda self: self._parse_decode(), "EXTRACT": lambda self: self._parse_extract(), @@ -1942,7 +1945,7 @@ class Parser(metaclass=_Parser): def _parse_update(self) -> exp.Update: comments = self._prev_comments - this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) + this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) returning = self._parse_returning() return self.expression( @@ -3269,7 +3272,7 @@ class Parser(metaclass=_Parser): if tokens[0].token_type in self.TYPE_TOKENS: self._prev = tokens[0] elif self.SUPPORTS_USER_DEFINED_TYPES: - return identifier + return exp.DataType.build(identifier.name, udt=True) else: return None else: @@ -3888,6 +3891,9 @@ class Parser(metaclass=_Parser): exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore ) + def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: + return self._parse_field() + def _parse_primary_key( self, wrapped_optional: bool = False, in_props: bool = False ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: @@ -3899,7 +3905,9 @@ class Parser(metaclass=_Parser): if not in_props and not self._match(TokenType.L_PAREN, advance=False): return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) - expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) + expressions = self._parse_wrapped_csv( + self._parse_primary_key_part, optional=wrapped_optional + ) options = self._parse_key_constraint_options() return self.expression(exp.PrimaryKey, expressions=expressions, options=options) @@ -4066,11 +4074,7 @@ class Parser(metaclass=_Parser): def _parse_concat(self) -> t.Optional[exp.Expression]: args = self._parse_csv(self._parse_conjunction) if self.CONCAT_NULL_OUTPUTS_STRING: - args = [ - exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) - for arg in args - if arg - ] + args = self._ensure_string_if_null(args) # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when # we find such a call we replace it with its argument. @@ -4081,6 +4085,16 @@ class Parser(metaclass=_Parser): exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args ) + def _parse_concat_ws(self) -> t.Optional[exp.Expression]: + args = self._parse_csv(self._parse_conjunction) + if len(args) < 2: + return self.expression(exp.ConcatWs, expressions=args) + delim, *values = args + if self.CONCAT_NULL_OUTPUTS_STRING: + values = self._ensure_string_if_null(values) + + return self.expression(exp.ConcatWs, expressions=[delim] + values) + def _parse_string_agg(self) -> exp.Expression: if self._match(TokenType.DISTINCT): args: t.List[t.Optional[exp.Expression]] = [ @@ -4181,15 +4195,28 @@ class Parser(metaclass=_Parser): return None return self.expression(exp.JSONKeyValue, this=key, expression=value) + def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: + if not this or not self._match_text_seq("FORMAT", "JSON"): + return this + + return self.expression(exp.FormatJson, this=this) + + def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: + # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) + for value in values: + if self._match_text_seq(value, "ON", on): + return f"{value} ON {on}" + + return None + def _parse_json_object(self) -> exp.JSONObject: star = self._parse_star() - expressions = [star] if star else self._parse_csv(self._parse_json_key_value) - - null_handling = None - if self._match_text_seq("NULL", "ON", "NULL"): - null_handling = "NULL ON NULL" - elif self._match_text_seq("ABSENT", "ON", "NULL"): - null_handling = "ABSENT ON NULL" + expressions = ( + [star] + if star + else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) + ) + null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") unique_keys = None if self._match_text_seq("WITH", "UNIQUE"): @@ -4199,8 +4226,9 @@ class Parser(metaclass=_Parser): self._match_text_seq("KEYS") - return_type = self._match_text_seq("RETURNING") and self._parse_type() - format_json = self._match_text_seq("FORMAT", "JSON") + return_type = self._match_text_seq("RETURNING") and self._parse_format_json( + self._parse_type() + ) encoding = self._match_text_seq("ENCODING") and self._parse_var() return self.expression( @@ -4209,7 +4237,6 @@ class Parser(metaclass=_Parser): null_handling=null_handling, unique_keys=unique_keys, return_type=return_type, - format_json=format_json, encoding=encoding, ) @@ -4979,9 +5006,12 @@ class Parser(metaclass=_Parser): self._match_r_paren() return self.expression(exp.DictRange, this=this, min=min, max=max) - def _parse_comprehension(self, this: exp.Expression) -> exp.Comprehension: + def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: + index = self._index expression = self._parse_column() - self._match(TokenType.IN) + if not self._match(TokenType.IN): + self._retreat(index - 1) + return None iterator = self._parse_column() condition = self._parse_conjunction() if self._match_text_seq("IF") else None return self.expression( @@ -5125,3 +5155,10 @@ class Parser(metaclass=_Parser): else: column.replace(dot_or_id) return node + + def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: + return [ + exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) + for value in values + if value + ] diff --git a/sqlglot/tokens.py b/sqlglot/tokens.py index 3ba8195..ce255c3 100644 --- a/sqlglot/tokens.py +++ b/sqlglot/tokens.py @@ -108,6 +108,8 @@ class TokenType(AutoName): LONGTEXT = auto() MEDIUMBLOB = auto() LONGBLOB = auto() + TINYBLOB = auto() + TINYTEXT = auto() BINARY = auto() VARBINARY = auto() JSON = auto() @@ -675,6 +677,7 @@ class Tokenizer(metaclass=_Tokenizer): "BOOL": TokenType.BOOLEAN, "BOOLEAN": TokenType.BOOLEAN, "BYTE": TokenType.TINYINT, + "MEDIUMINT": TokenType.MEDIUMINT, "TINYINT": TokenType.TINYINT, "SHORT": TokenType.SMALLINT, "SMALLINT": TokenType.SMALLINT, @@ -712,10 +715,16 @@ class Tokenizer(metaclass=_Tokenizer): "STR": TokenType.TEXT, "STRING": TokenType.TEXT, "TEXT": TokenType.TEXT, + "LONGTEXT": TokenType.LONGTEXT, + "MEDIUMTEXT": TokenType.MEDIUMTEXT, + "TINYTEXT": TokenType.TINYTEXT, "CLOB": TokenType.TEXT, "LONGVARCHAR": TokenType.TEXT, "BINARY": TokenType.BINARY, "BLOB": TokenType.VARBINARY, + "LONGBLOB": TokenType.LONGBLOB, + "MEDIUMBLOB": TokenType.MEDIUMBLOB, + "TINYBLOB": TokenType.TINYBLOB, "BYTEA": TokenType.VARBINARY, "VARBINARY": TokenType.VARBINARY, "TIME": TokenType.TIME, @@ -1159,7 +1168,11 @@ class Tokenizer(metaclass=_Tokenizer): escapes = self._STRING_ESCAPES if escapes is None else escapes while True: - if self._char in escapes and (self._peek == delimiter or self._peek in escapes): + if ( + self._char in escapes + and (self._peek == delimiter or self._peek in escapes) + and (self._char not in self._QUOTES or self._char == self._peek) + ): if self._peek == delimiter: text += self._peek else: |