Merging upstream version 18.4.1.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2023-09-13 09:17:40 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2023-09-13 09:17:40 +0000
commit: bdf5cc7bdd5ec93dc928d81e286f7b1e678ba19d (patch)
tree: 4d46f9407b792f6fd5d767d510e6865ec9640569 /sqlglot
parent: Releasing progress-linux version 18.3.0-1. (diff)
download: sqlglot-bdf5cc7bdd5ec93dc928d81e286f7b1e678ba19d.tar.xz
sqlglot-bdf5cc7bdd5ec93dc928d81e286f7b1e678ba19d.zip
12 files changed, 262 insertions, 39 deletions
diff --git a/sqlglot/dialects/clickhouse.py b/sqlglot/dialects/clickhouse.py
index a38a239..4b36663 100644
--- a/sqlglot/dialects/clickhouse.py
+++ b/sqlglot/dialects/clickhouse.py
@@ -313,6 +313,8 @@ class ClickHouse(Dialect):
             exp.DataType.Type.LONGTEXT: "String",
             exp.DataType.Type.MEDIUMBLOB: "String",
             exp.DataType.Type.MEDIUMTEXT: "String",
+            exp.DataType.Type.TINYBLOB: "String",
+            exp.DataType.Type.TINYTEXT: "String",
             exp.DataType.Type.TEXT: "String",
             exp.DataType.Type.VARBINARY: "String",
             exp.DataType.Type.VARCHAR: "String",
@@ -331,6 +333,7 @@ class ClickHouse(Dialect):
             exp.DataType.Type.FIXEDSTRING: "FixedString",
             exp.DataType.Type.FLOAT: "Float32",
             exp.DataType.Type.INT: "Int32",
+            exp.DataType.Type.MEDIUMINT: "Int32",
             exp.DataType.Type.INT128: "Int128",
             exp.DataType.Type.INT256: "Int256",
             exp.DataType.Type.LOWCARDINALITY: "LowCardinality",
diff --git a/sqlglot/dialects/dialect.py b/sqlglot/dialects/dialect.py
index 1bfbfef..ff22547 100644
--- a/sqlglot/dialects/dialect.py
+++ b/sqlglot/dialects/dialect.py
@@ -2,6 +2,7 @@ from __future__ import annotations
 
 import typing as t
 from enum import Enum
+from functools import reduce
 
 from sqlglot import exp
 from sqlglot._typing import E
@@ -656,11 +657,18 @@ def ts_or_ds_to_date_sql(dialect: str) -> t.Callable:
 
 def concat_to_dpipe_sql(self: Generator, expression: exp.Concat | exp.SafeConcat) -> str:
     expression = expression.copy()
-    this, *rest_args = expression.expressions
-    for arg in rest_args:
-        this = exp.DPipe(this=this, expression=arg)
+    return self.sql(reduce(lambda x, y: exp.DPipe(this=x, expression=y), expression.expressions))
 
-    return self.sql(this)
+
+def concat_ws_to_dpipe_sql(self: Generator, expression: exp.ConcatWs) -> str:
+    expression = expression.copy()
+    delim, *rest_args = expression.expressions
+    return self.sql(
+        reduce(
+            lambda x, y: exp.DPipe(this=x, expression=exp.DPipe(this=delim, expression=y)),
+            rest_args,
+        )
+    )
 
 
 def regexp_extract_sql(self: Generator, expression: exp.RegexpExtract) -> str:
diff --git a/sqlglot/dialects/duckdb.py b/sqlglot/dialects/duckdb.py
index 684e35e..bf657ed 100644
--- a/sqlglot/dialects/duckdb.py
+++ b/sqlglot/dialects/duckdb.py
@@ -291,6 +291,7 @@ class DuckDB(Dialect):
             exp.UnixToStr: lambda self, e: f"STRFTIME(TO_TIMESTAMP({self.sql(e, 'this')}), {self.format_time(e)})",
             exp.UnixToTime: rename_func("TO_TIMESTAMP"),
             exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)",
+            exp.VariancePop: rename_func("VAR_POP"),
             exp.WeekOfYear: rename_func("WEEKOFYEAR"),
         }
 
diff --git a/sqlglot/dialects/mysql.py b/sqlglot/dialects/mysql.py
index 6327796..46e3f19 100644
--- a/sqlglot/dialects/mysql.py
+++ b/sqlglot/dialects/mysql.py
@@ -119,7 +119,7 @@ class MySQL(Dialect):
         QUOTES = ["'", '"']
         COMMENTS = ["--", "#", ("/*", "*/")]
         IDENTIFIERS = ["`"]
-        STRING_ESCAPES = ["'", "\\"]
+        STRING_ESCAPES = ["'", '"', "\\"]
         BIT_STRINGS = [("b'", "'"), ("B'", "'"), ("0b", "")]
         HEX_STRINGS = [("x'", "'"), ("X'", "'"), ("0x", "")]
 
@@ -132,6 +132,8 @@ class MySQL(Dialect):
             "LONGBLOB": TokenType.LONGBLOB,
             "LONGTEXT": TokenType.LONGTEXT,
             "MEDIUMBLOB": TokenType.MEDIUMBLOB,
+            "TINYBLOB": TokenType.TINYBLOB,
+            "TINYTEXT": TokenType.TINYTEXT,
             "MEDIUMTEXT": TokenType.MEDIUMTEXT,
             "MEDIUMINT": TokenType.MEDIUMINT,
             "MEMBER OF": TokenType.MEMBER_OF,
@@ -356,6 +358,15 @@ class MySQL(Dialect):
 
         LOG_DEFAULTS_TO_LN = True
 
+        def _parse_primary_key_part(self) -> t.Optional[exp.Expression]:
+            this = self._parse_id_var()
+            if not self._match(TokenType.L_PAREN):
+                return this
+
+            expression = self._parse_number()
+            self._match_r_paren()
+            return self.expression(exp.ColumnPrefix, this=this, expression=expression)
+
         def _parse_index_constraint(
             self, kind: t.Optional[str] = None
         ) -> exp.IndexColumnConstraint:
@@ -577,8 +588,10 @@ class MySQL(Dialect):
 
         TYPE_MAPPING.pop(exp.DataType.Type.MEDIUMTEXT)
         TYPE_MAPPING.pop(exp.DataType.Type.LONGTEXT)
+        TYPE_MAPPING.pop(exp.DataType.Type.TINYTEXT)
         TYPE_MAPPING.pop(exp.DataType.Type.MEDIUMBLOB)
         TYPE_MAPPING.pop(exp.DataType.Type.LONGBLOB)
+        TYPE_MAPPING.pop(exp.DataType.Type.TINYBLOB)
 
         PROPERTIES_LOCATION = {
             **generator.Generator.PROPERTIES_LOCATION,
diff --git a/sqlglot/dialects/oracle.py b/sqlglot/dialects/oracle.py
index 279ed31..378df49 100644
--- a/sqlglot/dialects/oracle.py
+++ b/sqlglot/dialects/oracle.py
@@ -7,6 +7,9 @@ from sqlglot.dialects.dialect import Dialect, no_ilike_sql, rename_func, trim_sq
 from sqlglot.helper import seq_get
 from sqlglot.tokens import TokenType
 
+if t.TYPE_CHECKING:
+    from sqlglot._typing import E
+
 
 def _parse_xml_table(self: Oracle.Parser) -> exp.XMLTable:
     this = self._parse_string()
@@ -69,6 +72,16 @@ class Oracle(Dialect):
 
         FUNCTION_PARSERS: t.Dict[str, t.Callable] = {
             **parser.Parser.FUNCTION_PARSERS,
+            "JSON_ARRAY": lambda self: self._parse_json_array(
+                exp.JSONArray,
+                expressions=self._parse_csv(lambda: self._parse_format_json(self._parse_bitwise())),
+            ),
+            "JSON_ARRAYAGG": lambda self: self._parse_json_array(
+                exp.JSONArrayAgg,
+                this=self._parse_format_json(self._parse_bitwise()),
+                order=self._parse_order(),
+            ),
+            "JSON_TABLE": lambda self: self._parse_json_table(),
             "XMLTABLE": _parse_xml_table,
         }
 
@@ -82,6 +95,38 @@ class Oracle(Dialect):
         # Reference: https://stackoverflow.com/a/336455
         DISTINCT_TOKENS = {TokenType.DISTINCT, TokenType.UNIQUE}
 
+        # Note: this is currently incomplete; it only implements the "JSON_value_column" part
+        def _parse_json_column_def(self) -> exp.JSONColumnDef:
+            this = self._parse_id_var()
+            kind = self._parse_types(allow_identifiers=False)
+            path = self._match_text_seq("PATH") and self._parse_string()
+            return self.expression(exp.JSONColumnDef, this=this, kind=kind, path=path)
+
+        def _parse_json_table(self) -> exp.JSONTable:
+            this = self._parse_format_json(self._parse_bitwise())
+            path = self._match(TokenType.COMMA) and self._parse_string()
+            error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL")
+            empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL")
+            self._match(TokenType.COLUMN)
+            expressions = self._parse_wrapped_csv(self._parse_json_column_def, optional=True)
+
+            return exp.JSONTable(
+                this=this,
+                expressions=expressions,
+                path=path,
+                error_handling=error_handling,
+                empty_handling=empty_handling,
+            )
+
+        def _parse_json_array(self, expr_type: t.Type[E], **kwargs) -> E:
+            return self.expression(
+                expr_type,
+                null_handling=self._parse_on_handling("NULL", "NULL", "ABSENT"),
+                return_type=self._match_text_seq("RETURNING") and self._parse_type(),
+                strict=self._match_text_seq("STRICT"),
+                **kwargs,
+            )
+
         def _parse_column(self) -> t.Optional[exp.Expression]:
             column = super()._parse_column()
             if column:
diff --git a/sqlglot/dialects/redshift.py b/sqlglot/dialects/redshift.py
index 351c5df..554cbd3 100644
--- a/sqlglot/dialects/redshift.py
+++ b/sqlglot/dialects/redshift.py
@@ -5,6 +5,7 @@ import typing as t
 from sqlglot import exp, transforms
 from sqlglot.dialects.dialect import (
     concat_to_dpipe_sql,
+    concat_ws_to_dpipe_sql,
     rename_func,
     ts_or_ds_to_date_sql,
 )
@@ -123,6 +124,7 @@ class Redshift(Postgres):
         TRANSFORMS = {
             **Postgres.Generator.TRANSFORMS,
             exp.Concat: concat_to_dpipe_sql,
+            exp.ConcatWs: concat_ws_to_dpipe_sql,
             exp.CurrentTimestamp: lambda self, e: "SYSDATE",
             exp.DateAdd: lambda self, e: self.func(
                 "DATEADD", exp.var(e.text("unit") or "day"), e.expression, e.this
diff --git a/sqlglot/expressions.py b/sqlglot/expressions.py
index 877e9fd..98afddc 100644
--- a/sqlglot/expressions.py
+++ b/sqlglot/expressions.py
@@ -20,6 +20,7 @@ import typing as t
 from collections import deque
 from copy import deepcopy
 from enum import auto
+from functools import reduce
 
 from sqlglot._typing import E
 from sqlglot.errors import ParseError
@@ -1170,7 +1171,7 @@ class Column(Condition):
                 parts.append(parent.expression)
             parent = parent.parent
 
-        return Dot.build(parts)
+        return Dot.build(deepcopy(parts))
 
 
 class ColumnPosition(Expression):
@@ -1537,6 +1538,10 @@ class ForeignKey(Expression):
     }
 
 
+class ColumnPrefix(Expression):
+    arg_types = {"this": True, "expression": True}
+
+
 class PrimaryKey(Expression):
     arg_types = {"expressions": True, "options": False}
 
@@ -3529,6 +3534,8 @@ class DataType(Expression):
         STRUCT = auto()
         SUPER = auto()
         TEXT = auto()
+        TINYBLOB = auto()
+        TINYTEXT = auto()
         TIME = auto()
         TIMETZ = auto()
         TIMESTAMP = auto()
@@ -3793,13 +3800,7 @@ class Dot(Binary):
         if len(expressions) < 2:
             raise ValueError(f"Dot requires >= 2 expressions.")
 
-        a, b, *expressions = expressions
-        dot = Dot(this=a, expression=b)
-
-        for expression in expressions:
-            dot = Dot(this=dot, expression=expression)
-
-        return dot
+        return t.cast(Dot, reduce(lambda x, y: Dot(this=x, expression=y), expressions))
 
 
 class DPipe(Binary):
@@ -3959,6 +3960,13 @@ class Between(Predicate):
 class Bracket(Condition):
     arg_types = {"this": True, "expressions": True}
 
+    @property
+    def output_name(self) -> str:
+        if len(self.expressions) == 1:
+            return self.expressions[0].output_name
+
+        return super().output_name
+
 
 class SafeBracket(Bracket):
     """Represents array lookup where OOB index yields NULL instead of causing a failure."""
@@ -4477,6 +4485,10 @@ class IsNan(Func):
     _sql_names = ["IS_NAN", "ISNAN"]
 
 
+class FormatJson(Expression):
+    pass
+
+
 class JSONKeyValue(Expression):
     arg_types = {"this": True, "expression": True}
 
@@ -4487,11 +4499,48 @@ class JSONObject(Func):
         "null_handling": False,
         "unique_keys": False,
         "return_type": False,
-        "format_json": False,
         "encoding": False,
     }
 
 
+# https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/JSON_ARRAY.html
+class JSONArray(Func):
+    arg_types = {
+        "expressions": True,
+        "null_handling": False,
+        "return_type": False,
+        "strict": False,
+    }
+
+
+# https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/JSON_ARRAYAGG.html
+class JSONArrayAgg(Func):
+    arg_types = {
+        "this": True,
+        "order": False,
+        "null_handling": False,
+        "return_type": False,
+        "strict": False,
+    }
+
+
+# https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/JSON_TABLE.html
+# Note: parsing of JSON column definitions is currently incomplete.
+class JSONColumnDef(Expression):
+    arg_types = {"this": True, "kind": False, "path": False}
+
+
+# # https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/JSON_TABLE.html
+class JSONTable(Func):
+    arg_types = {
+        "this": True,
+        "expressions": True,
+        "path": False,
+        "error_handling": False,
+        "empty_handling": False,
+    }
+
+
 class OpenJSONColumnDef(Expression):
     arg_types = {"this": True, "kind": True, "path": False, "as_json": False}
 
diff --git a/sqlglot/generator.py b/sqlglot/generator.py
index 1074e9a..399b48b 100644
--- a/sqlglot/generator.py
+++ b/sqlglot/generator.py
@@ -193,8 +193,10 @@ class Generator:
         exp.DataType.Type.NVARCHAR: "VARCHAR",
         exp.DataType.Type.MEDIUMTEXT: "TEXT",
         exp.DataType.Type.LONGTEXT: "TEXT",
+        exp.DataType.Type.TINYTEXT: "TEXT",
         exp.DataType.Type.MEDIUMBLOB: "BLOB",
         exp.DataType.Type.LONGBLOB: "BLOB",
+        exp.DataType.Type.TINYBLOB: "BLOB",
         exp.DataType.Type.INET: "INET",
     }
 
@@ -2021,6 +2023,9 @@ class Generator:
     def jsonkeyvalue_sql(self, expression: exp.JSONKeyValue) -> str:
         return f"{self.sql(expression, 'this')}: {self.sql(expression, 'expression')}"
 
+    def formatjson_sql(self, expression: exp.FormatJson) -> str:
+        return f"{self.sql(expression, 'this')} FORMAT JSON"
+
     def jsonobject_sql(self, expression: exp.JSONObject) -> str:
         null_handling = expression.args.get("null_handling")
         null_handling = f" {null_handling}" if null_handling else ""
@@ -2031,13 +2036,57 @@ class Generator:
             unique_keys = ""
         return_type = self.sql(expression, "return_type")
         return_type = f" RETURNING {return_type}" if return_type else ""
-        format_json = " FORMAT JSON" if expression.args.get("format_json") else ""
         encoding = self.sql(expression, "encoding")
         encoding = f" ENCODING {encoding}" if encoding else ""
         return self.func(
             "JSON_OBJECT",
             *expression.expressions,
-            suffix=f"{null_handling}{unique_keys}{return_type}{format_json}{encoding})",
+            suffix=f"{null_handling}{unique_keys}{return_type}{encoding})",
+        )
+
+    def jsonarray_sql(self, expression: exp.JSONArray) -> str:
+        null_handling = expression.args.get("null_handling")
+        null_handling = f" {null_handling}" if null_handling else ""
+        return_type = self.sql(expression, "return_type")
+        return_type = f" RETURNING {return_type}" if return_type else ""
+        strict = " STRICT" if expression.args.get("strict") else ""
+        return self.func(
+            "JSON_ARRAY", *expression.expressions, suffix=f"{null_handling}{return_type}{strict})"
+        )
+
+    def jsonarrayagg_sql(self, expression: exp.JSONArrayAgg) -> str:
+        this = self.sql(expression, "this")
+        order = self.sql(expression, "order")
+        null_handling = expression.args.get("null_handling")
+        null_handling = f" {null_handling}" if null_handling else ""
+        return_type = self.sql(expression, "return_type")
+        return_type = f" RETURNING {return_type}" if return_type else ""
+        strict = " STRICT" if expression.args.get("strict") else ""
+        return self.func(
+            "JSON_ARRAYAGG",
+            this,
+            suffix=f"{order}{null_handling}{return_type}{strict})",
+        )
+
+    def jsoncolumndef_sql(self, expression: exp.JSONColumnDef) -> str:
+        this = self.sql(expression, "this")
+        kind = self.sql(expression, "kind")
+        kind = f" {kind}" if kind else ""
+        path = self.sql(expression, "path")
+        path = f" PATH {path}" if path else ""
+        return f"{this}{kind}{path}"
+
+    def jsontable_sql(self, expression: exp.JSONTable) -> str:
+        this = self.sql(expression, "this")
+        path = self.sql(expression, "path")
+        path = f", {path}" if path else ""
+        error_handling = expression.args.get("error_handling")
+        error_handling = f" {error_handling}" if error_handling else ""
+        empty_handling = expression.args.get("empty_handling")
+        empty_handling = f" {empty_handling}" if empty_handling else ""
+        columns = f" COLUMNS ({self.expressions(expression, skip_first=True)})"
+        return self.func(
+            "JSON_TABLE", this, suffix=f"{path}{error_handling}{empty_handling}{columns})"
         )
 
     def openjsoncolumndef_sql(self, expression: exp.OpenJSONColumnDef) -> str:
@@ -2722,6 +2771,9 @@ class Generator:
         condition = f" IF {condition}" if condition else ""
         return f"{this} FOR {expr} IN {iterator}{condition}"
 
+    def columnprefix_sql(self, expression: exp.ColumnPrefix) -> str:
+        return f"{self.sql(expression, 'this')}({self.sql(expression, 'expression')})"
+
 
 def cached_generator(
     cache: t.Optional[t.Dict[int, str]] = None
diff --git a/sqlglot/optimizer/merge_subqueries.py b/sqlglot/optimizer/merge_subqueries.py
index 7322424..976c9ad 100644
--- a/sqlglot/optimizer/merge_subqueries.py
+++ b/sqlglot/optimizer/merge_subqueries.py
@@ -128,7 +128,7 @@ def _mergeable(outer_scope, inner_scope, leave_tables_isolated, from_or_join):
     def _is_a_window_expression_in_unmergable_operation():
         window_expressions = inner_select.find_all(exp.Window)
         window_alias_names = {window.parent.alias_or_name for window in window_expressions}
-        inner_select_name = inner_select.parent.alias_or_name
+        inner_select_name = from_or_join.alias_or_name
         unmergable_window_columns = [
             column
             for column in outer_scope.columns
diff --git a/sqlglot/optimizer/qualify_columns.py b/sqlglot/optimizer/qualify_columns.py
index 952999d..b06ea1d 100644
--- a/sqlglot/optimizer/qualify_columns.py
+++ b/sqlglot/optimizer/qualify_columns.py
@@ -129,7 +129,7 @@ def _expand_using(scope: Scope, resolver: Resolver) -> t.Dict[str, t.Any]:
             table = columns.get(identifier)
 
             if not table or identifier not in join_columns:
-                if columns and join_columns:
+                if (columns and "*" not in columns) and join_columns:
                     raise OptimizeError(f"Cannot automatically join: {identifier}")
 
             table = table or source_table
diff --git a/sqlglot/parser.py b/sqlglot/parser.py
index 939303f..f721582 100644
--- a/sqlglot/parser.py
+++ b/sqlglot/parser.py
@@ -155,6 +155,8 @@ class Parser(metaclass=_Parser):
         TokenType.JSON,
         TokenType.JSONB,
         TokenType.INTERVAL,
+        TokenType.TINYBLOB,
+        TokenType.TINYTEXT,
         TokenType.TIME,
         TokenType.TIMETZ,
         TokenType.TIMESTAMP,
@@ -764,6 +766,7 @@ class Parser(metaclass=_Parser):
         "ANY_VALUE": lambda self: self._parse_any_value(),
         "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
         "CONCAT": lambda self: self._parse_concat(),
+        "CONCAT_WS": lambda self: self._parse_concat_ws(),
         "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
         "DECODE": lambda self: self._parse_decode(),
         "EXTRACT": lambda self: self._parse_extract(),
@@ -1942,7 +1945,7 @@ class Parser(metaclass=_Parser):
 
     def _parse_update(self) -> exp.Update:
         comments = self._prev_comments
-        this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS)
+        this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS)
         expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality)
         returning = self._parse_returning()
         return self.expression(
@@ -3269,7 +3272,7 @@ class Parser(metaclass=_Parser):
                 if tokens[0].token_type in self.TYPE_TOKENS:
                     self._prev = tokens[0]
                 elif self.SUPPORTS_USER_DEFINED_TYPES:
-                    return identifier
+                    return exp.DataType.build(identifier.name, udt=True)
                 else:
                     return None
             else:
@@ -3888,6 +3891,9 @@ class Parser(metaclass=_Parser):
             exp.ForeignKey, expressions=expressions, reference=reference, **options  # type: ignore
         )
 
+    def _parse_primary_key_part(self) -> t.Optional[exp.Expression]:
+        return self._parse_field()
+
     def _parse_primary_key(
         self, wrapped_optional: bool = False, in_props: bool = False
     ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey:
@@ -3899,7 +3905,9 @@ class Parser(metaclass=_Parser):
         if not in_props and not self._match(TokenType.L_PAREN, advance=False):
             return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc)
 
-        expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional)
+        expressions = self._parse_wrapped_csv(
+            self._parse_primary_key_part, optional=wrapped_optional
+        )
         options = self._parse_key_constraint_options()
         return self.expression(exp.PrimaryKey, expressions=expressions, options=options)
 
@@ -4066,11 +4074,7 @@ class Parser(metaclass=_Parser):
     def _parse_concat(self) -> t.Optional[exp.Expression]:
         args = self._parse_csv(self._parse_conjunction)
         if self.CONCAT_NULL_OUTPUTS_STRING:
-            args = [
-                exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string(""))
-                for arg in args
-                if arg
-            ]
+            args = self._ensure_string_if_null(args)
 
         # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when
         # we find such a call we replace it with its argument.
@@ -4081,6 +4085,16 @@ class Parser(metaclass=_Parser):
             exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args
         )
 
+    def _parse_concat_ws(self) -> t.Optional[exp.Expression]:
+        args = self._parse_csv(self._parse_conjunction)
+        if len(args) < 2:
+            return self.expression(exp.ConcatWs, expressions=args)
+        delim, *values = args
+        if self.CONCAT_NULL_OUTPUTS_STRING:
+            values = self._ensure_string_if_null(values)
+
+        return self.expression(exp.ConcatWs, expressions=[delim] + values)
+
     def _parse_string_agg(self) -> exp.Expression:
         if self._match(TokenType.DISTINCT):
             args: t.List[t.Optional[exp.Expression]] = [
@@ -4181,15 +4195,28 @@ class Parser(metaclass=_Parser):
             return None
         return self.expression(exp.JSONKeyValue, this=key, expression=value)
 
+    def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
+        if not this or not self._match_text_seq("FORMAT", "JSON"):
+            return this
+
+        return self.expression(exp.FormatJson, this=this)
+
+    def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]:
+        # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL)
+        for value in values:
+            if self._match_text_seq(value, "ON", on):
+                return f"{value} ON {on}"
+
+        return None
+
     def _parse_json_object(self) -> exp.JSONObject:
         star = self._parse_star()
-        expressions = [star] if star else self._parse_csv(self._parse_json_key_value)
-
-        null_handling = None
-        if self._match_text_seq("NULL", "ON", "NULL"):
-            null_handling = "NULL ON NULL"
-        elif self._match_text_seq("ABSENT", "ON", "NULL"):
-            null_handling = "ABSENT ON NULL"
+        expressions = (
+            [star]
+            if star
+            else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value()))
+        )
+        null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT")
 
         unique_keys = None
         if self._match_text_seq("WITH", "UNIQUE"):
@@ -4199,8 +4226,9 @@ class Parser(metaclass=_Parser):
 
         self._match_text_seq("KEYS")
 
-        return_type = self._match_text_seq("RETURNING") and self._parse_type()
-        format_json = self._match_text_seq("FORMAT", "JSON")
+        return_type = self._match_text_seq("RETURNING") and self._parse_format_json(
+            self._parse_type()
+        )
         encoding = self._match_text_seq("ENCODING") and self._parse_var()
 
         return self.expression(
@@ -4209,7 +4237,6 @@ class Parser(metaclass=_Parser):
             null_handling=null_handling,
             unique_keys=unique_keys,
             return_type=return_type,
-            format_json=format_json,
             encoding=encoding,
         )
 
@@ -4979,9 +5006,12 @@ class Parser(metaclass=_Parser):
         self._match_r_paren()
         return self.expression(exp.DictRange, this=this, min=min, max=max)
 
-    def _parse_comprehension(self, this: exp.Expression) -> exp.Comprehension:
+    def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]:
+        index = self._index
         expression = self._parse_column()
-        self._match(TokenType.IN)
+        if not self._match(TokenType.IN):
+            self._retreat(index - 1)
+            return None
         iterator = self._parse_column()
         condition = self._parse_conjunction() if self._match_text_seq("IF") else None
         return self.expression(
@@ -5125,3 +5155,10 @@ class Parser(metaclass=_Parser):
                     else:
                         column.replace(dot_or_id)
         return node
+
+    def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]:
+        return [
+            exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string(""))
+            for value in values
+            if value
+        ]
diff --git a/sqlglot/tokens.py b/sqlglot/tokens.py
index 3ba8195..ce255c3 100644
--- a/sqlglot/tokens.py
+++ b/sqlglot/tokens.py
@@ -108,6 +108,8 @@ class TokenType(AutoName):
     LONGTEXT = auto()
     MEDIUMBLOB = auto()
     LONGBLOB = auto()
+    TINYBLOB = auto()
+    TINYTEXT = auto()
     BINARY = auto()
     VARBINARY = auto()
     JSON = auto()
@@ -675,6 +677,7 @@ class Tokenizer(metaclass=_Tokenizer):
         "BOOL": TokenType.BOOLEAN,
         "BOOLEAN": TokenType.BOOLEAN,
         "BYTE": TokenType.TINYINT,
+        "MEDIUMINT": TokenType.MEDIUMINT,
         "TINYINT": TokenType.TINYINT,
         "SHORT": TokenType.SMALLINT,
         "SMALLINT": TokenType.SMALLINT,
@@ -712,10 +715,16 @@ class Tokenizer(metaclass=_Tokenizer):
         "STR": TokenType.TEXT,
         "STRING": TokenType.TEXT,
         "TEXT": TokenType.TEXT,
+        "LONGTEXT": TokenType.LONGTEXT,
+        "MEDIUMTEXT": TokenType.MEDIUMTEXT,
+        "TINYTEXT": TokenType.TINYTEXT,
         "CLOB": TokenType.TEXT,
         "LONGVARCHAR": TokenType.TEXT,
         "BINARY": TokenType.BINARY,
         "BLOB": TokenType.VARBINARY,
+        "LONGBLOB": TokenType.LONGBLOB,
+        "MEDIUMBLOB": TokenType.MEDIUMBLOB,
+        "TINYBLOB": TokenType.TINYBLOB,
         "BYTEA": TokenType.VARBINARY,
         "VARBINARY": TokenType.VARBINARY,
         "TIME": TokenType.TIME,
@@ -1159,7 +1168,11 @@ class Tokenizer(metaclass=_Tokenizer):
         escapes = self._STRING_ESCAPES if escapes is None else escapes
 
         while True:
-            if self._char in escapes and (self._peek == delimiter or self._peek in escapes):
+            if (
+                self._char in escapes
+                and (self._peek == delimiter or self._peek in escapes)
+                and (self._char not in self._QUOTES or self._char == self._peek)
+            ):
                 if self._peek == delimiter:
                     text += self._peek
                 else:
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2023-09-13 09:17:40 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2023-09-13 09:17:40 +0000
commit	bdf5cc7bdd5ec93dc928d81e286f7b1e678ba19d (patch)
tree	4d46f9407b792f6fd5d767d510e6865ec9640569 /sqlglot
parent	Releasing progress-linux version 18.3.0-1. (diff)
download	sqlglot-bdf5cc7bdd5ec93dc928d81e286f7b1e678ba19d.tar.xz sqlglot-bdf5cc7bdd5ec93dc928d81e286f7b1e678ba19d.zip