summaryrefslogtreecommitdiffstats
path: root/sqlglot
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2023-09-13 09:17:40 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2023-09-13 09:17:40 +0000
commitbdf5cc7bdd5ec93dc928d81e286f7b1e678ba19d (patch)
tree4d46f9407b792f6fd5d767d510e6865ec9640569 /sqlglot
parentReleasing progress-linux version 18.3.0-1. (diff)
downloadsqlglot-bdf5cc7bdd5ec93dc928d81e286f7b1e678ba19d.tar.xz
sqlglot-bdf5cc7bdd5ec93dc928d81e286f7b1e678ba19d.zip
Merging upstream version 18.4.1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot')
-rw-r--r--sqlglot/dialects/clickhouse.py3
-rw-r--r--sqlglot/dialects/dialect.py16
-rw-r--r--sqlglot/dialects/duckdb.py1
-rw-r--r--sqlglot/dialects/mysql.py15
-rw-r--r--sqlglot/dialects/oracle.py45
-rw-r--r--sqlglot/dialects/redshift.py2
-rw-r--r--sqlglot/expressions.py67
-rw-r--r--sqlglot/generator.py56
-rw-r--r--sqlglot/optimizer/merge_subqueries.py2
-rw-r--r--sqlglot/optimizer/qualify_columns.py2
-rw-r--r--sqlglot/parser.py77
-rw-r--r--sqlglot/tokens.py15
12 files changed, 262 insertions, 39 deletions
diff --git a/sqlglot/dialects/clickhouse.py b/sqlglot/dialects/clickhouse.py
index a38a239..4b36663 100644
--- a/sqlglot/dialects/clickhouse.py
+++ b/sqlglot/dialects/clickhouse.py
@@ -313,6 +313,8 @@ class ClickHouse(Dialect):
exp.DataType.Type.LONGTEXT: "String",
exp.DataType.Type.MEDIUMBLOB: "String",
exp.DataType.Type.MEDIUMTEXT: "String",
+ exp.DataType.Type.TINYBLOB: "String",
+ exp.DataType.Type.TINYTEXT: "String",
exp.DataType.Type.TEXT: "String",
exp.DataType.Type.VARBINARY: "String",
exp.DataType.Type.VARCHAR: "String",
@@ -331,6 +333,7 @@ class ClickHouse(Dialect):
exp.DataType.Type.FIXEDSTRING: "FixedString",
exp.DataType.Type.FLOAT: "Float32",
exp.DataType.Type.INT: "Int32",
+ exp.DataType.Type.MEDIUMINT: "Int32",
exp.DataType.Type.INT128: "Int128",
exp.DataType.Type.INT256: "Int256",
exp.DataType.Type.LOWCARDINALITY: "LowCardinality",
diff --git a/sqlglot/dialects/dialect.py b/sqlglot/dialects/dialect.py
index 1bfbfef..ff22547 100644
--- a/sqlglot/dialects/dialect.py
+++ b/sqlglot/dialects/dialect.py
@@ -2,6 +2,7 @@ from __future__ import annotations
import typing as t
from enum import Enum
+from functools import reduce
from sqlglot import exp
from sqlglot._typing import E
@@ -656,11 +657,18 @@ def ts_or_ds_to_date_sql(dialect: str) -> t.Callable:
def concat_to_dpipe_sql(self: Generator, expression: exp.Concat | exp.SafeConcat) -> str:
expression = expression.copy()
- this, *rest_args = expression.expressions
- for arg in rest_args:
- this = exp.DPipe(this=this, expression=arg)
+ return self.sql(reduce(lambda x, y: exp.DPipe(this=x, expression=y), expression.expressions))
- return self.sql(this)
+
+def concat_ws_to_dpipe_sql(self: Generator, expression: exp.ConcatWs) -> str:
+ expression = expression.copy()
+ delim, *rest_args = expression.expressions
+ return self.sql(
+ reduce(
+ lambda x, y: exp.DPipe(this=x, expression=exp.DPipe(this=delim, expression=y)),
+ rest_args,
+ )
+ )
def regexp_extract_sql(self: Generator, expression: exp.RegexpExtract) -> str:
diff --git a/sqlglot/dialects/duckdb.py b/sqlglot/dialects/duckdb.py
index 684e35e..bf657ed 100644
--- a/sqlglot/dialects/duckdb.py
+++ b/sqlglot/dialects/duckdb.py
@@ -291,6 +291,7 @@ class DuckDB(Dialect):
exp.UnixToStr: lambda self, e: f"STRFTIME(TO_TIMESTAMP({self.sql(e, 'this')}), {self.format_time(e)})",
exp.UnixToTime: rename_func("TO_TIMESTAMP"),
exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)",
+ exp.VariancePop: rename_func("VAR_POP"),
exp.WeekOfYear: rename_func("WEEKOFYEAR"),
}
diff --git a/sqlglot/dialects/mysql.py b/sqlglot/dialects/mysql.py
index 6327796..46e3f19 100644
--- a/sqlglot/dialects/mysql.py
+++ b/sqlglot/dialects/mysql.py
@@ -119,7 +119,7 @@ class MySQL(Dialect):
QUOTES = ["'", '"']
COMMENTS = ["--", "#", ("/*", "*/")]
IDENTIFIERS = ["`"]
- STRING_ESCAPES = ["'", "\\"]
+ STRING_ESCAPES = ["'", '"', "\\"]
BIT_STRINGS = [("b'", "'"), ("B'", "'"), ("0b", "")]
HEX_STRINGS = [("x'", "'"), ("X'", "'"), ("0x", "")]
@@ -132,6 +132,8 @@ class MySQL(Dialect):
"LONGBLOB": TokenType.LONGBLOB,
"LONGTEXT": TokenType.LONGTEXT,
"MEDIUMBLOB": TokenType.MEDIUMBLOB,
+ "TINYBLOB": TokenType.TINYBLOB,
+ "TINYTEXT": TokenType.TINYTEXT,
"MEDIUMTEXT": TokenType.MEDIUMTEXT,
"MEDIUMINT": TokenType.MEDIUMINT,
"MEMBER OF": TokenType.MEMBER_OF,
@@ -356,6 +358,15 @@ class MySQL(Dialect):
LOG_DEFAULTS_TO_LN = True
+ def _parse_primary_key_part(self) -> t.Optional[exp.Expression]:
+ this = self._parse_id_var()
+ if not self._match(TokenType.L_PAREN):
+ return this
+
+ expression = self._parse_number()
+ self._match_r_paren()
+ return self.expression(exp.ColumnPrefix, this=this, expression=expression)
+
def _parse_index_constraint(
self, kind: t.Optional[str] = None
) -> exp.IndexColumnConstraint:
@@ -577,8 +588,10 @@ class MySQL(Dialect):
TYPE_MAPPING.pop(exp.DataType.Type.MEDIUMTEXT)
TYPE_MAPPING.pop(exp.DataType.Type.LONGTEXT)
+ TYPE_MAPPING.pop(exp.DataType.Type.TINYTEXT)
TYPE_MAPPING.pop(exp.DataType.Type.MEDIUMBLOB)
TYPE_MAPPING.pop(exp.DataType.Type.LONGBLOB)
+ TYPE_MAPPING.pop(exp.DataType.Type.TINYBLOB)
PROPERTIES_LOCATION = {
**generator.Generator.PROPERTIES_LOCATION,
diff --git a/sqlglot/dialects/oracle.py b/sqlglot/dialects/oracle.py
index 279ed31..378df49 100644
--- a/sqlglot/dialects/oracle.py
+++ b/sqlglot/dialects/oracle.py
@@ -7,6 +7,9 @@ from sqlglot.dialects.dialect import Dialect, no_ilike_sql, rename_func, trim_sq
from sqlglot.helper import seq_get
from sqlglot.tokens import TokenType
+if t.TYPE_CHECKING:
+ from sqlglot._typing import E
+
def _parse_xml_table(self: Oracle.Parser) -> exp.XMLTable:
this = self._parse_string()
@@ -69,6 +72,16 @@ class Oracle(Dialect):
FUNCTION_PARSERS: t.Dict[str, t.Callable] = {
**parser.Parser.FUNCTION_PARSERS,
+ "JSON_ARRAY": lambda self: self._parse_json_array(
+ exp.JSONArray,
+ expressions=self._parse_csv(lambda: self._parse_format_json(self._parse_bitwise())),
+ ),
+ "JSON_ARRAYAGG": lambda self: self._parse_json_array(
+ exp.JSONArrayAgg,
+ this=self._parse_format_json(self._parse_bitwise()),
+ order=self._parse_order(),
+ ),
+ "JSON_TABLE": lambda self: self._parse_json_table(),
"XMLTABLE": _parse_xml_table,
}
@@ -82,6 +95,38 @@ class Oracle(Dialect):
# Reference: https://stackoverflow.com/a/336455
DISTINCT_TOKENS = {TokenType.DISTINCT, TokenType.UNIQUE}
+ # Note: this is currently incomplete; it only implements the "JSON_value_column" part
+ def _parse_json_column_def(self) -> exp.JSONColumnDef:
+ this = self._parse_id_var()
+ kind = self._parse_types(allow_identifiers=False)
+ path = self._match_text_seq("PATH") and self._parse_string()
+ return self.expression(exp.JSONColumnDef, this=this, kind=kind, path=path)
+
+ def _parse_json_table(self) -> exp.JSONTable:
+ this = self._parse_format_json(self._parse_bitwise())
+ path = self._match(TokenType.COMMA) and self._parse_string()
+ error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL")
+ empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL")
+ self._match(TokenType.COLUMN)
+ expressions = self._parse_wrapped_csv(self._parse_json_column_def, optional=True)
+
+ return exp.JSONTable(
+ this=this,
+ expressions=expressions,
+ path=path,
+ error_handling=error_handling,
+ empty_handling=empty_handling,
+ )
+
+ def _parse_json_array(self, expr_type: t.Type[E], **kwargs) -> E:
+ return self.expression(
+ expr_type,
+ null_handling=self._parse_on_handling("NULL", "NULL", "ABSENT"),
+ return_type=self._match_text_seq("RETURNING") and self._parse_type(),
+ strict=self._match_text_seq("STRICT"),
+ **kwargs,
+ )
+
def _parse_column(self) -> t.Optional[exp.Expression]:
column = super()._parse_column()
if column:
diff --git a/sqlglot/dialects/redshift.py b/sqlglot/dialects/redshift.py
index 351c5df..554cbd3 100644
--- a/sqlglot/dialects/redshift.py
+++ b/sqlglot/dialects/redshift.py
@@ -5,6 +5,7 @@ import typing as t
from sqlglot import exp, transforms
from sqlglot.dialects.dialect import (
concat_to_dpipe_sql,
+ concat_ws_to_dpipe_sql,
rename_func,
ts_or_ds_to_date_sql,
)
@@ -123,6 +124,7 @@ class Redshift(Postgres):
TRANSFORMS = {
**Postgres.Generator.TRANSFORMS,
exp.Concat: concat_to_dpipe_sql,
+ exp.ConcatWs: concat_ws_to_dpipe_sql,
exp.CurrentTimestamp: lambda self, e: "SYSDATE",
exp.DateAdd: lambda self, e: self.func(
"DATEADD", exp.var(e.text("unit") or "day"), e.expression, e.this
diff --git a/sqlglot/expressions.py b/sqlglot/expressions.py
index 877e9fd..98afddc 100644
--- a/sqlglot/expressions.py
+++ b/sqlglot/expressions.py
@@ -20,6 +20,7 @@ import typing as t
from collections import deque
from copy import deepcopy
from enum import auto
+from functools import reduce
from sqlglot._typing import E
from sqlglot.errors import ParseError
@@ -1170,7 +1171,7 @@ class Column(Condition):
parts.append(parent.expression)
parent = parent.parent
- return Dot.build(parts)
+ return Dot.build(deepcopy(parts))
class ColumnPosition(Expression):
@@ -1537,6 +1538,10 @@ class ForeignKey(Expression):
}
+class ColumnPrefix(Expression):
+ arg_types = {"this": True, "expression": True}
+
+
class PrimaryKey(Expression):
arg_types = {"expressions": True, "options": False}
@@ -3529,6 +3534,8 @@ class DataType(Expression):
STRUCT = auto()
SUPER = auto()
TEXT = auto()
+ TINYBLOB = auto()
+ TINYTEXT = auto()
TIME = auto()
TIMETZ = auto()
TIMESTAMP = auto()
@@ -3793,13 +3800,7 @@ class Dot(Binary):
if len(expressions) < 2:
raise ValueError(f"Dot requires >= 2 expressions.")
- a, b, *expressions = expressions
- dot = Dot(this=a, expression=b)
-
- for expression in expressions:
- dot = Dot(this=dot, expression=expression)
-
- return dot
+ return t.cast(Dot, reduce(lambda x, y: Dot(this=x, expression=y), expressions))
class DPipe(Binary):
@@ -3959,6 +3960,13 @@ class Between(Predicate):
class Bracket(Condition):
arg_types = {"this": True, "expressions": True}
+ @property
+ def output_name(self) -> str:
+ if len(self.expressions) == 1:
+ return self.expressions[0].output_name
+
+ return super().output_name
+
class SafeBracket(Bracket):
"""Represents array lookup where OOB index yields NULL instead of causing a failure."""
@@ -4477,6 +4485,10 @@ class IsNan(Func):
_sql_names = ["IS_NAN", "ISNAN"]
+class FormatJson(Expression):
+ pass
+
+
class JSONKeyValue(Expression):
arg_types = {"this": True, "expression": True}
@@ -4487,11 +4499,48 @@ class JSONObject(Func):
"null_handling": False,
"unique_keys": False,
"return_type": False,
- "format_json": False,
"encoding": False,
}
+# https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/JSON_ARRAY.html
+class JSONArray(Func):
+ arg_types = {
+ "expressions": True,
+ "null_handling": False,
+ "return_type": False,
+ "strict": False,
+ }
+
+
+# https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/JSON_ARRAYAGG.html
+class JSONArrayAgg(Func):
+ arg_types = {
+ "this": True,
+ "order": False,
+ "null_handling": False,
+ "return_type": False,
+ "strict": False,
+ }
+
+
+# https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/JSON_TABLE.html
+# Note: parsing of JSON column definitions is currently incomplete.
+class JSONColumnDef(Expression):
+ arg_types = {"this": True, "kind": False, "path": False}
+
+
+# # https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/JSON_TABLE.html
+class JSONTable(Func):
+ arg_types = {
+ "this": True,
+ "expressions": True,
+ "path": False,
+ "error_handling": False,
+ "empty_handling": False,
+ }
+
+
class OpenJSONColumnDef(Expression):
arg_types = {"this": True, "kind": True, "path": False, "as_json": False}
diff --git a/sqlglot/generator.py b/sqlglot/generator.py
index 1074e9a..399b48b 100644
--- a/sqlglot/generator.py
+++ b/sqlglot/generator.py
@@ -193,8 +193,10 @@ class Generator:
exp.DataType.Type.NVARCHAR: "VARCHAR",
exp.DataType.Type.MEDIUMTEXT: "TEXT",
exp.DataType.Type.LONGTEXT: "TEXT",
+ exp.DataType.Type.TINYTEXT: "TEXT",
exp.DataType.Type.MEDIUMBLOB: "BLOB",
exp.DataType.Type.LONGBLOB: "BLOB",
+ exp.DataType.Type.TINYBLOB: "BLOB",
exp.DataType.Type.INET: "INET",
}
@@ -2021,6 +2023,9 @@ class Generator:
def jsonkeyvalue_sql(self, expression: exp.JSONKeyValue) -> str:
return f"{self.sql(expression, 'this')}: {self.sql(expression, 'expression')}"
+ def formatjson_sql(self, expression: exp.FormatJson) -> str:
+ return f"{self.sql(expression, 'this')} FORMAT JSON"
+
def jsonobject_sql(self, expression: exp.JSONObject) -> str:
null_handling = expression.args.get("null_handling")
null_handling = f" {null_handling}" if null_handling else ""
@@ -2031,13 +2036,57 @@ class Generator:
unique_keys = ""
return_type = self.sql(expression, "return_type")
return_type = f" RETURNING {return_type}" if return_type else ""
- format_json = " FORMAT JSON" if expression.args.get("format_json") else ""
encoding = self.sql(expression, "encoding")
encoding = f" ENCODING {encoding}" if encoding else ""
return self.func(
"JSON_OBJECT",
*expression.expressions,
- suffix=f"{null_handling}{unique_keys}{return_type}{format_json}{encoding})",
+ suffix=f"{null_handling}{unique_keys}{return_type}{encoding})",
+ )
+
+ def jsonarray_sql(self, expression: exp.JSONArray) -> str:
+ null_handling = expression.args.get("null_handling")
+ null_handling = f" {null_handling}" if null_handling else ""
+ return_type = self.sql(expression, "return_type")
+ return_type = f" RETURNING {return_type}" if return_type else ""
+ strict = " STRICT" if expression.args.get("strict") else ""
+ return self.func(
+ "JSON_ARRAY", *expression.expressions, suffix=f"{null_handling}{return_type}{strict})"
+ )
+
+ def jsonarrayagg_sql(self, expression: exp.JSONArrayAgg) -> str:
+ this = self.sql(expression, "this")
+ order = self.sql(expression, "order")
+ null_handling = expression.args.get("null_handling")
+ null_handling = f" {null_handling}" if null_handling else ""
+ return_type = self.sql(expression, "return_type")
+ return_type = f" RETURNING {return_type}" if return_type else ""
+ strict = " STRICT" if expression.args.get("strict") else ""
+ return self.func(
+ "JSON_ARRAYAGG",
+ this,
+ suffix=f"{order}{null_handling}{return_type}{strict})",
+ )
+
+ def jsoncolumndef_sql(self, expression: exp.JSONColumnDef) -> str:
+ this = self.sql(expression, "this")
+ kind = self.sql(expression, "kind")
+ kind = f" {kind}" if kind else ""
+ path = self.sql(expression, "path")
+ path = f" PATH {path}" if path else ""
+ return f"{this}{kind}{path}"
+
+ def jsontable_sql(self, expression: exp.JSONTable) -> str:
+ this = self.sql(expression, "this")
+ path = self.sql(expression, "path")
+ path = f", {path}" if path else ""
+ error_handling = expression.args.get("error_handling")
+ error_handling = f" {error_handling}" if error_handling else ""
+ empty_handling = expression.args.get("empty_handling")
+ empty_handling = f" {empty_handling}" if empty_handling else ""
+ columns = f" COLUMNS ({self.expressions(expression, skip_first=True)})"
+ return self.func(
+ "JSON_TABLE", this, suffix=f"{path}{error_handling}{empty_handling}{columns})"
)
def openjsoncolumndef_sql(self, expression: exp.OpenJSONColumnDef) -> str:
@@ -2722,6 +2771,9 @@ class Generator:
condition = f" IF {condition}" if condition else ""
return f"{this} FOR {expr} IN {iterator}{condition}"
+ def columnprefix_sql(self, expression: exp.ColumnPrefix) -> str:
+ return f"{self.sql(expression, 'this')}({self.sql(expression, 'expression')})"
+
def cached_generator(
cache: t.Optional[t.Dict[int, str]] = None
diff --git a/sqlglot/optimizer/merge_subqueries.py b/sqlglot/optimizer/merge_subqueries.py
index 7322424..976c9ad 100644
--- a/sqlglot/optimizer/merge_subqueries.py
+++ b/sqlglot/optimizer/merge_subqueries.py
@@ -128,7 +128,7 @@ def _mergeable(outer_scope, inner_scope, leave_tables_isolated, from_or_join):
def _is_a_window_expression_in_unmergable_operation():
window_expressions = inner_select.find_all(exp.Window)
window_alias_names = {window.parent.alias_or_name for window in window_expressions}
- inner_select_name = inner_select.parent.alias_or_name
+ inner_select_name = from_or_join.alias_or_name
unmergable_window_columns = [
column
for column in outer_scope.columns
diff --git a/sqlglot/optimizer/qualify_columns.py b/sqlglot/optimizer/qualify_columns.py
index 952999d..b06ea1d 100644
--- a/sqlglot/optimizer/qualify_columns.py
+++ b/sqlglot/optimizer/qualify_columns.py
@@ -129,7 +129,7 @@ def _expand_using(scope: Scope, resolver: Resolver) -> t.Dict[str, t.Any]:
table = columns.get(identifier)
if not table or identifier not in join_columns:
- if columns and join_columns:
+ if (columns and "*" not in columns) and join_columns:
raise OptimizeError(f"Cannot automatically join: {identifier}")
table = table or source_table
diff --git a/sqlglot/parser.py b/sqlglot/parser.py
index 939303f..f721582 100644
--- a/sqlglot/parser.py
+++ b/sqlglot/parser.py
@@ -155,6 +155,8 @@ class Parser(metaclass=_Parser):
TokenType.JSON,
TokenType.JSONB,
TokenType.INTERVAL,
+ TokenType.TINYBLOB,
+ TokenType.TINYTEXT,
TokenType.TIME,
TokenType.TIMETZ,
TokenType.TIMESTAMP,
@@ -764,6 +766,7 @@ class Parser(metaclass=_Parser):
"ANY_VALUE": lambda self: self._parse_any_value(),
"CAST": lambda self: self._parse_cast(self.STRICT_CAST),
"CONCAT": lambda self: self._parse_concat(),
+ "CONCAT_WS": lambda self: self._parse_concat_ws(),
"CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
"DECODE": lambda self: self._parse_decode(),
"EXTRACT": lambda self: self._parse_extract(),
@@ -1942,7 +1945,7 @@ class Parser(metaclass=_Parser):
def _parse_update(self) -> exp.Update:
comments = self._prev_comments
- this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS)
+ this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS)
expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality)
returning = self._parse_returning()
return self.expression(
@@ -3269,7 +3272,7 @@ class Parser(metaclass=_Parser):
if tokens[0].token_type in self.TYPE_TOKENS:
self._prev = tokens[0]
elif self.SUPPORTS_USER_DEFINED_TYPES:
- return identifier
+ return exp.DataType.build(identifier.name, udt=True)
else:
return None
else:
@@ -3888,6 +3891,9 @@ class Parser(metaclass=_Parser):
exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore
)
+ def _parse_primary_key_part(self) -> t.Optional[exp.Expression]:
+ return self._parse_field()
+
def _parse_primary_key(
self, wrapped_optional: bool = False, in_props: bool = False
) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey:
@@ -3899,7 +3905,9 @@ class Parser(metaclass=_Parser):
if not in_props and not self._match(TokenType.L_PAREN, advance=False):
return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc)
- expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional)
+ expressions = self._parse_wrapped_csv(
+ self._parse_primary_key_part, optional=wrapped_optional
+ )
options = self._parse_key_constraint_options()
return self.expression(exp.PrimaryKey, expressions=expressions, options=options)
@@ -4066,11 +4074,7 @@ class Parser(metaclass=_Parser):
def _parse_concat(self) -> t.Optional[exp.Expression]:
args = self._parse_csv(self._parse_conjunction)
if self.CONCAT_NULL_OUTPUTS_STRING:
- args = [
- exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string(""))
- for arg in args
- if arg
- ]
+ args = self._ensure_string_if_null(args)
# Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when
# we find such a call we replace it with its argument.
@@ -4081,6 +4085,16 @@ class Parser(metaclass=_Parser):
exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args
)
+ def _parse_concat_ws(self) -> t.Optional[exp.Expression]:
+ args = self._parse_csv(self._parse_conjunction)
+ if len(args) < 2:
+ return self.expression(exp.ConcatWs, expressions=args)
+ delim, *values = args
+ if self.CONCAT_NULL_OUTPUTS_STRING:
+ values = self._ensure_string_if_null(values)
+
+ return self.expression(exp.ConcatWs, expressions=[delim] + values)
+
def _parse_string_agg(self) -> exp.Expression:
if self._match(TokenType.DISTINCT):
args: t.List[t.Optional[exp.Expression]] = [
@@ -4181,15 +4195,28 @@ class Parser(metaclass=_Parser):
return None
return self.expression(exp.JSONKeyValue, this=key, expression=value)
+ def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
+ if not this or not self._match_text_seq("FORMAT", "JSON"):
+ return this
+
+ return self.expression(exp.FormatJson, this=this)
+
+ def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]:
+ # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL)
+ for value in values:
+ if self._match_text_seq(value, "ON", on):
+ return f"{value} ON {on}"
+
+ return None
+
def _parse_json_object(self) -> exp.JSONObject:
star = self._parse_star()
- expressions = [star] if star else self._parse_csv(self._parse_json_key_value)
-
- null_handling = None
- if self._match_text_seq("NULL", "ON", "NULL"):
- null_handling = "NULL ON NULL"
- elif self._match_text_seq("ABSENT", "ON", "NULL"):
- null_handling = "ABSENT ON NULL"
+ expressions = (
+ [star]
+ if star
+ else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value()))
+ )
+ null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT")
unique_keys = None
if self._match_text_seq("WITH", "UNIQUE"):
@@ -4199,8 +4226,9 @@ class Parser(metaclass=_Parser):
self._match_text_seq("KEYS")
- return_type = self._match_text_seq("RETURNING") and self._parse_type()
- format_json = self._match_text_seq("FORMAT", "JSON")
+ return_type = self._match_text_seq("RETURNING") and self._parse_format_json(
+ self._parse_type()
+ )
encoding = self._match_text_seq("ENCODING") and self._parse_var()
return self.expression(
@@ -4209,7 +4237,6 @@ class Parser(metaclass=_Parser):
null_handling=null_handling,
unique_keys=unique_keys,
return_type=return_type,
- format_json=format_json,
encoding=encoding,
)
@@ -4979,9 +5006,12 @@ class Parser(metaclass=_Parser):
self._match_r_paren()
return self.expression(exp.DictRange, this=this, min=min, max=max)
- def _parse_comprehension(self, this: exp.Expression) -> exp.Comprehension:
+ def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]:
+ index = self._index
expression = self._parse_column()
- self._match(TokenType.IN)
+ if not self._match(TokenType.IN):
+ self._retreat(index - 1)
+ return None
iterator = self._parse_column()
condition = self._parse_conjunction() if self._match_text_seq("IF") else None
return self.expression(
@@ -5125,3 +5155,10 @@ class Parser(metaclass=_Parser):
else:
column.replace(dot_or_id)
return node
+
+ def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]:
+ return [
+ exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string(""))
+ for value in values
+ if value
+ ]
diff --git a/sqlglot/tokens.py b/sqlglot/tokens.py
index 3ba8195..ce255c3 100644
--- a/sqlglot/tokens.py
+++ b/sqlglot/tokens.py
@@ -108,6 +108,8 @@ class TokenType(AutoName):
LONGTEXT = auto()
MEDIUMBLOB = auto()
LONGBLOB = auto()
+ TINYBLOB = auto()
+ TINYTEXT = auto()
BINARY = auto()
VARBINARY = auto()
JSON = auto()
@@ -675,6 +677,7 @@ class Tokenizer(metaclass=_Tokenizer):
"BOOL": TokenType.BOOLEAN,
"BOOLEAN": TokenType.BOOLEAN,
"BYTE": TokenType.TINYINT,
+ "MEDIUMINT": TokenType.MEDIUMINT,
"TINYINT": TokenType.TINYINT,
"SHORT": TokenType.SMALLINT,
"SMALLINT": TokenType.SMALLINT,
@@ -712,10 +715,16 @@ class Tokenizer(metaclass=_Tokenizer):
"STR": TokenType.TEXT,
"STRING": TokenType.TEXT,
"TEXT": TokenType.TEXT,
+ "LONGTEXT": TokenType.LONGTEXT,
+ "MEDIUMTEXT": TokenType.MEDIUMTEXT,
+ "TINYTEXT": TokenType.TINYTEXT,
"CLOB": TokenType.TEXT,
"LONGVARCHAR": TokenType.TEXT,
"BINARY": TokenType.BINARY,
"BLOB": TokenType.VARBINARY,
+ "LONGBLOB": TokenType.LONGBLOB,
+ "MEDIUMBLOB": TokenType.MEDIUMBLOB,
+ "TINYBLOB": TokenType.TINYBLOB,
"BYTEA": TokenType.VARBINARY,
"VARBINARY": TokenType.VARBINARY,
"TIME": TokenType.TIME,
@@ -1159,7 +1168,11 @@ class Tokenizer(metaclass=_Tokenizer):
escapes = self._STRING_ESCAPES if escapes is None else escapes
while True:
- if self._char in escapes and (self._peek == delimiter or self._peek in escapes):
+ if (
+ self._char in escapes
+ and (self._peek == delimiter or self._peek in escapes)
+ and (self._char not in self._QUOTES or self._char == self._peek)
+ ):
if self._peek == delimiter:
text += self._peek
else: