Merging upstream version 16.7.3.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2023-06-29 13:02:29 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2023-06-29 13:02:29 +0000
commit: 9b39dac84e82bf473216939e50b8836170f01d23 (patch)
tree: 9b405bc86ef7e2ea28cddc6b787ed70355cf7fce /sqlglot/dialects
parent: Releasing debian version 16.4.2-1. (diff)
download: sqlglot-9b39dac84e82bf473216939e50b8836170f01d23.tar.xz
sqlglot-9b39dac84e82bf473216939e50b8836170f01d23.zip
11 files changed, 269 insertions, 28 deletions
diff --git a/sqlglot/dialects/bigquery.py b/sqlglot/dialects/bigquery.py
index 52d4a88..8786063 100644
--- a/sqlglot/dialects/bigquery.py
+++ b/sqlglot/dialects/bigquery.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import logging
 import re
 import typing as t
 
@@ -21,6 +22,8 @@ from sqlglot.dialects.dialect import (
 from sqlglot.helper import seq_get, split_num_words
 from sqlglot.tokens import TokenType
 
+logger = logging.getLogger("sqlglot")
+
 
 def _date_add_sql(
     data_type: str, kind: str
@@ -104,12 +107,70 @@ def _unqualify_unnest(expression: exp.Expression) -> exp.Expression:
     return expression
 
 
+# https://issuetracker.google.com/issues/162294746
+# workaround for bigquery bug when grouping by an expression and then ordering
+# WITH x AS (SELECT 1 y)
+# SELECT y + 1 z
+# FROM x
+# GROUP BY x + 1
+# ORDER by z
+def _alias_ordered_group(expression: exp.Expression) -> exp.Expression:
+    if isinstance(expression, exp.Select):
+        group = expression.args.get("group")
+        order = expression.args.get("order")
+
+        if group and order:
+            aliases = {
+                select.this: select.args["alias"]
+                for select in expression.selects
+                if isinstance(select, exp.Alias)
+            }
+
+            for e in group.expressions:
+                alias = aliases.get(e)
+
+                if alias:
+                    e.replace(exp.column(alias))
+
+    return expression
+
+
+def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression:
+    """BigQuery doesn't allow column names when defining a CTE, so we try to push them down."""
+    if isinstance(expression, exp.CTE) and expression.alias_column_names:
+        cte_query = expression.this
+
+        if cte_query.is_star:
+            logger.warning(
+                "Can't push down CTE column names for star queries. Run the query through"
+                " the optimizer or use 'qualify' to expand the star projections first."
+            )
+            return expression
+
+        column_names = expression.alias_column_names
+        expression.args["alias"].set("columns", None)
+
+        for name, select in zip(column_names, cte_query.selects):
+            to_replace = select
+
+            if isinstance(select, exp.Alias):
+                select = select.this
+
+            # Inner aliases are shadowed by the CTE column names
+            to_replace.replace(exp.alias_(select, name))
+
+    return expression
+
+
 class BigQuery(Dialect):
     UNNEST_COLUMN_ONLY = True
 
     # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity
     RESOLVES_IDENTIFIERS_AS_UPPERCASE = None
 
+    # bigquery udfs are case sensitive
+    NORMALIZE_FUNCTIONS = False
+
     TIME_MAPPING = {
         "%D": "%m/%d/%y",
     }
@@ -135,12 +196,16 @@ class BigQuery(Dialect):
         # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least).
         # The following check is essentially a heuristic to detect tables based on whether or
         # not they're qualified.
-        if (
-            isinstance(expression, exp.Identifier)
-            and not (isinstance(expression.parent, exp.Table) and expression.parent.db)
-            and not expression.meta.get("is_table")
-        ):
-            expression.set("this", expression.this.lower())
+        if isinstance(expression, exp.Identifier):
+            parent = expression.parent
+
+            while isinstance(parent, exp.Dot):
+                parent = parent.parent
+
+            if not (isinstance(parent, exp.Table) and parent.db) and not expression.meta.get(
+                "is_table"
+            ):
+                expression.set("this", expression.this.lower())
 
         return expression
 
@@ -298,10 +363,8 @@ class BigQuery(Dialect):
             **generator.Generator.TRANSFORMS,
             exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"),
             exp.ArraySize: rename_func("ARRAY_LENGTH"),
-            exp.AtTimeZone: lambda self, e: self.func(
-                "TIMESTAMP", self.func("DATETIME", e.this, e.args.get("zone"))
-            ),
             exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]),
+            exp.CTE: transforms.preprocess([_pushdown_cte_column_names]),
             exp.DateAdd: _date_add_sql("DATE", "ADD"),
             exp.DateSub: _date_add_sql("DATE", "SUB"),
             exp.DatetimeAdd: _date_add_sql("DATETIME", "ADD"),
@@ -325,7 +388,12 @@ class BigQuery(Dialect):
             ),
             exp.RegexpLike: rename_func("REGEXP_CONTAINS"),
             exp.Select: transforms.preprocess(
-                [_unqualify_unnest, transforms.eliminate_distinct_on]
+                [
+                    transforms.explode_to_unnest,
+                    _unqualify_unnest,
+                    transforms.eliminate_distinct_on,
+                    _alias_ordered_group,
+                ]
             ),
             exp.StrToDate: lambda self, e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})",
             exp.StrToTime: lambda self, e: f"PARSE_TIMESTAMP({self.format_time(e)}, {self.sql(e, 'this')})",
@@ -334,7 +402,6 @@ class BigQuery(Dialect):
             exp.TimestampAdd: _date_add_sql("TIMESTAMP", "ADD"),
             exp.TimestampSub: _date_add_sql("TIMESTAMP", "SUB"),
             exp.TimeStrToTime: timestrtotime_sql,
-            exp.TryCast: lambda self, e: f"SAFE_CAST({self.sql(e, 'this')} AS {self.sql(e, 'to')})",
             exp.TsOrDsToDate: ts_or_ds_to_date_sql("bigquery"),
             exp.TsOrDsAdd: _date_add_sql("DATE", "ADD"),
             exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}",
@@ -378,7 +445,121 @@ class BigQuery(Dialect):
             exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED,
         }
 
-        RESERVED_KEYWORDS = {*generator.Generator.RESERVED_KEYWORDS, "hash"}
+        # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords
+        RESERVED_KEYWORDS = {
+            *generator.Generator.RESERVED_KEYWORDS,
+            "all",
+            "and",
+            "any",
+            "array",
+            "as",
+            "asc",
+            "assert_rows_modified",
+            "at",
+            "between",
+            "by",
+            "case",
+            "cast",
+            "collate",
+            "contains",
+            "create",
+            "cross",
+            "cube",
+            "current",
+            "default",
+            "define",
+            "desc",
+            "distinct",
+            "else",
+            "end",
+            "enum",
+            "escape",
+            "except",
+            "exclude",
+            "exists",
+            "extract",
+            "false",
+            "fetch",
+            "following",
+            "for",
+            "from",
+            "full",
+            "group",
+            "grouping",
+            "groups",
+            "hash",
+            "having",
+            "if",
+            "ignore",
+            "in",
+            "inner",
+            "intersect",
+            "interval",
+            "into",
+            "is",
+            "join",
+            "lateral",
+            "left",
+            "like",
+            "limit",
+            "lookup",
+            "merge",
+            "natural",
+            "new",
+            "no",
+            "not",
+            "null",
+            "nulls",
+            "of",
+            "on",
+            "or",
+            "order",
+            "outer",
+            "over",
+            "partition",
+            "preceding",
+            "proto",
+            "qualify",
+            "range",
+            "recursive",
+            "respect",
+            "right",
+            "rollup",
+            "rows",
+            "select",
+            "set",
+            "some",
+            "struct",
+            "tablesample",
+            "then",
+            "to",
+            "treat",
+            "true",
+            "unbounded",
+            "union",
+            "unnest",
+            "using",
+            "when",
+            "where",
+            "window",
+            "with",
+            "within",
+        }
+
+        def attimezone_sql(self, expression: exp.AtTimeZone) -> str:
+            if not isinstance(expression.parent, exp.Cast):
+                return self.func(
+                    "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone"))
+                )
+            return super().attimezone_sql(expression)
+
+        def trycast_sql(self, expression: exp.TryCast) -> str:
+            return self.cast_sql(expression, safe_prefix="SAFE_")
+
+        def cte_sql(self, expression: exp.CTE) -> str:
+            if expression.alias_column_names:
+                self.unsupported("Column names in CTE definition are not supported.")
+            return super().cte_sql(expression)
 
         def array_sql(self, expression: exp.Array) -> str:
             first_arg = seq_get(expression.expressions, 0)
diff --git a/sqlglot/dialects/dialect.py b/sqlglot/dialects/dialect.py
index 0e25b9b..d258826 100644
--- a/sqlglot/dialects/dialect.py
+++ b/sqlglot/dialects/dialect.py
@@ -388,6 +388,11 @@ def no_comment_column_constraint_sql(
     return ""
 
 
+def no_map_from_entries_sql(self: Generator, expression: exp.MapFromEntries) -> str:
+    self.unsupported("MAP_FROM_ENTRIES unsupported")
+    return ""
+
+
 def str_position_sql(self: Generator, expression: exp.StrPosition) -> str:
     this = self.sql(expression, "this")
     substr = self.sql(expression, "substr")
diff --git a/sqlglot/dialects/mysql.py b/sqlglot/dialects/mysql.py
index 1dd2096..5f743ee 100644
--- a/sqlglot/dialects/mysql.py
+++ b/sqlglot/dialects/mysql.py
@@ -132,6 +132,10 @@ class MySQL(Dialect):
             "SEPARATOR": TokenType.SEPARATOR,
             "ENUM": TokenType.ENUM,
             "START": TokenType.BEGIN,
+            "SIGNED": TokenType.BIGINT,
+            "SIGNED INTEGER": TokenType.BIGINT,
+            "UNSIGNED": TokenType.UBIGINT,
+            "UNSIGNED INTEGER": TokenType.UBIGINT,
             "_ARMSCII8": TokenType.INTRODUCER,
             "_ASCII": TokenType.INTRODUCER,
             "_BIG5": TokenType.INTRODUCER,
@@ -441,6 +445,17 @@ class MySQL(Dialect):
 
         LIMIT_FETCH = "LIMIT"
 
+        def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str:
+            """(U)BIGINT is not allowed in a CAST expression, so we use (UN)SIGNED instead."""
+            if expression.to.this == exp.DataType.Type.BIGINT:
+                to = "SIGNED"
+            elif expression.to.this == exp.DataType.Type.UBIGINT:
+                to = "UNSIGNED"
+            else:
+                return super().cast_sql(expression)
+
+            return f"CAST({self.sql(expression, 'this')} AS {to})"
+
         def show_sql(self, expression: exp.Show) -> str:
             this = f" {expression.name}"
             full = " FULL" if expression.args.get("full") else ""
diff --git a/sqlglot/dialects/postgres.py b/sqlglot/dialects/postgres.py
index 8c2a4ab..766b584 100644
--- a/sqlglot/dialects/postgres.py
+++ b/sqlglot/dialects/postgres.py
@@ -11,6 +11,7 @@ from sqlglot.dialects.dialect import (
     format_time_lambda,
     max_or_greatest,
     min_or_least,
+    no_map_from_entries_sql,
     no_paren_current_date_sql,
     no_pivot_sql,
     no_tablesample_sql,
@@ -346,6 +347,7 @@ class Postgres(Dialect):
             exp.LogicalOr: rename_func("BOOL_OR"),
             exp.LogicalAnd: rename_func("BOOL_AND"),
             exp.Max: max_or_greatest,
+            exp.MapFromEntries: no_map_from_entries_sql,
             exp.Min: min_or_least,
             exp.ArrayOverlaps: lambda self, e: self.binary(e, "&&"),
             exp.ArrayContains: lambda self, e: self.binary(e, "@>"),
@@ -378,3 +380,11 @@ class Postgres(Dialect):
             exp.TransientProperty: exp.Properties.Location.UNSUPPORTED,
             exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED,
         }
+
+        def bracket_sql(self, expression: exp.Bracket) -> str:
+            """Forms like ARRAY[1, 2, 3][3] aren't allowed; we need to wrap the ARRAY."""
+            if isinstance(expression.this, exp.Array):
+                expression = expression.copy()
+                expression.set("this", exp.paren(expression.this, copy=False))
+
+            return super().bracket_sql(expression)
diff --git a/sqlglot/dialects/presto.py b/sqlglot/dialects/presto.py
index 265780e..24c439b 100644
--- a/sqlglot/dialects/presto.py
+++ b/sqlglot/dialects/presto.py
@@ -20,7 +20,7 @@ from sqlglot.dialects.dialect import (
 )
 from sqlglot.dialects.mysql import MySQL
 from sqlglot.errors import UnsupportedError
-from sqlglot.helper import seq_get
+from sqlglot.helper import apply_index_offset, seq_get
 from sqlglot.tokens import TokenType
 
 
@@ -154,6 +154,13 @@ def _from_unixtime(args: t.List) -> exp.Expression:
     return exp.UnixToTime.from_arg_list(args)
 
 
+def _parse_element_at(args: t.List) -> exp.SafeBracket:
+    this = seq_get(args, 0)
+    index = seq_get(args, 1)
+    assert isinstance(this, exp.Expression) and isinstance(index, exp.Expression)
+    return exp.SafeBracket(this=this, expressions=apply_index_offset(this, [index], -1))
+
+
 def _unnest_sequence(expression: exp.Expression) -> exp.Expression:
     if isinstance(expression, exp.Table):
         if isinstance(expression.this, exp.GenerateSeries):
@@ -201,6 +208,7 @@ class Presto(Dialect):
             "DATE_FORMAT": format_time_lambda(exp.TimeToStr, "presto"),
             "DATE_PARSE": format_time_lambda(exp.StrToTime, "presto"),
             "DATE_TRUNC": date_trunc_to_time,
+            "ELEMENT_AT": _parse_element_at,
             "FROM_HEX": exp.Unhex.from_arg_list,
             "FROM_UNIXTIME": _from_unixtime,
             "FROM_UTF8": lambda args: exp.Decode(
@@ -285,6 +293,9 @@ class Presto(Dialect):
             exp.Pivot: no_pivot_sql,
             exp.Quantile: _quantile_sql,
             exp.Right: right_to_substring_sql,
+            exp.SafeBracket: lambda self, e: self.func(
+                "ELEMENT_AT", e.this, seq_get(apply_index_offset(e.this, e.expressions, 1), 0)
+            ),
             exp.SafeDivide: no_safe_divide_sql,
             exp.Schema: _schema_sql,
             exp.Select: transforms.preprocess(
diff --git a/sqlglot/dialects/redshift.py b/sqlglot/dialects/redshift.py
index db6cc3f..87be42c 100644
--- a/sqlglot/dialects/redshift.py
+++ b/sqlglot/dialects/redshift.py
@@ -41,8 +41,6 @@ class Redshift(Postgres):
             "STRTOL": exp.FromBase.from_arg_list,
         }
 
-        CONVERT_TYPE_FIRST = True
-
         def _parse_types(
             self, check_func: bool = False, schema: bool = False
         ) -> t.Optional[exp.Expression]:
@@ -58,6 +56,12 @@ class Redshift(Postgres):
 
             return this
 
+        def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]:
+            to = self._parse_types()
+            self._match(TokenType.COMMA)
+            this = self._parse_bitwise()
+            return self.expression(exp.TryCast, this=this, to=to)
+
     class Tokenizer(Postgres.Tokenizer):
         BIT_STRINGS = []
         HEX_STRINGS = []
diff --git a/sqlglot/dialects/snowflake.py b/sqlglot/dialects/snowflake.py
index 1f620df..a2dbfd9 100644
--- a/sqlglot/dialects/snowflake.py
+++ b/sqlglot/dialects/snowflake.py
@@ -258,14 +258,29 @@ class Snowflake(Dialect):
 
         ALTER_PARSERS = {
             **parser.Parser.ALTER_PARSERS,
-            "UNSET": lambda self: self._parse_alter_table_set_tag(unset=True),
-            "SET": lambda self: self._parse_alter_table_set_tag(),
+            "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")),
+            "UNSET": lambda self: self.expression(
+                exp.Set,
+                tag=self._match_text_seq("TAG"),
+                expressions=self._parse_csv(self._parse_id_var),
+                unset=True,
+            ),
         }
 
-        def _parse_alter_table_set_tag(self, unset: bool = False) -> exp.Expression:
-            self._match_text_seq("TAG")
-            parser = t.cast(t.Callable, self._parse_id_var if unset else self._parse_conjunction)
-            return self.expression(exp.SetTag, expressions=self._parse_csv(parser), unset=unset)
+        def _parse_id_var(
+            self,
+            any_token: bool = True,
+            tokens: t.Optional[t.Collection[TokenType]] = None,
+        ) -> t.Optional[exp.Expression]:
+            if self._match_text_seq("IDENTIFIER", "("):
+                identifier = (
+                    super()._parse_id_var(any_token=any_token, tokens=tokens)
+                    or self._parse_string()
+                )
+                self._match_r_paren()
+                return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier])
+
+            return super()._parse_id_var(any_token=any_token, tokens=tokens)
 
     class Tokenizer(tokens.Tokenizer):
         QUOTES = ["'", "$$"]
@@ -380,10 +395,6 @@ class Snowflake(Dialect):
                 self.unsupported("INTERSECT with All is not supported in Snowflake")
             return super().intersect_op(expression)
 
-        def settag_sql(self, expression: exp.SetTag) -> str:
-            action = "UNSET" if expression.args.get("unset") else "SET"
-            return f"{action} TAG {self.expressions(expression)}"
-
         def describe_sql(self, expression: exp.Describe) -> str:
             # Default to table if kind is unknown
             kind_value = expression.args.get("kind") or "TABLE"
diff --git a/sqlglot/dialects/spark.py b/sqlglot/dialects/spark.py
index b7d1641..7a7ee01 100644
--- a/sqlglot/dialects/spark.py
+++ b/sqlglot/dialects/spark.py
@@ -43,6 +43,7 @@ class Spark(Spark2):
     class Generator(Spark2.Generator):
         TRANSFORMS = Spark2.Generator.TRANSFORMS.copy()
         TRANSFORMS.pop(exp.DateDiff)
+        TRANSFORMS.pop(exp.Group)
 
         def datediff_sql(self, expression: exp.DateDiff) -> str:
             unit = self.sql(expression, "unit")
diff --git a/sqlglot/dialects/spark2.py b/sqlglot/dialects/spark2.py
index ed6992d..3720b8d 100644
--- a/sqlglot/dialects/spark2.py
+++ b/sqlglot/dialects/spark2.py
@@ -231,14 +231,14 @@ class Spark2(Hive):
         WRAP_DERIVED_VALUES = False
         CREATE_FUNCTION_RETURN_AS = False
 
-        def cast_sql(self, expression: exp.Cast) -> str:
+        def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str:
             if isinstance(expression.this, exp.Cast) and expression.this.is_type("json"):
                 schema = f"'{self.sql(expression, 'to')}'"
                 return self.func("FROM_JSON", expression.this.this, schema)
             if expression.is_type("json"):
                 return self.func("TO_JSON", expression.this)
 
-            return super(Hive.Generator, self).cast_sql(expression)
+            return super(Hive.Generator, self).cast_sql(expression, safe_prefix=safe_prefix)
 
         def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str:
             return super().columndef_sql(
diff --git a/sqlglot/dialects/sqlite.py b/sqlglot/dialects/sqlite.py
index 803f361..519e62a 100644
--- a/sqlglot/dialects/sqlite.py
+++ b/sqlglot/dialects/sqlite.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 
+import typing as t
+
 from sqlglot import exp, generator, parser, tokens, transforms
 from sqlglot.dialects.dialect import (
     Dialect,
@@ -133,7 +135,7 @@ class SQLite(Dialect):
 
         LIMIT_FETCH = "LIMIT"
 
-        def cast_sql(self, expression: exp.Cast) -> str:
+        def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str:
             if expression.is_type("date"):
                 return self.func("DATE", expression.this)
 
diff --git a/sqlglot/dialects/tsql.py b/sqlglot/dialects/tsql.py
index 6d674f5..f671630 100644
--- a/sqlglot/dialects/tsql.py
+++ b/sqlglot/dialects/tsql.py
@@ -166,6 +166,7 @@ def _string_agg_sql(self: generator.Generator, expression: exp.GroupConcat) -> s
 
 
 class TSQL(Dialect):
+    RESOLVES_IDENTIFIERS_AS_UPPERCASE = None
     NULL_ORDERING = "nulls_are_small"
     TIME_FORMAT = "'yyyy-mm-dd hh:mm:ss'"
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2023-06-29 13:02:29 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2023-06-29 13:02:29 +0000
commit	9b39dac84e82bf473216939e50b8836170f01d23 (patch)
tree	9b405bc86ef7e2ea28cddc6b787ed70355cf7fce /sqlglot/dialects
parent	Releasing debian version 16.4.2-1. (diff)
download	sqlglot-9b39dac84e82bf473216939e50b8836170f01d23.tar.xz sqlglot-9b39dac84e82bf473216939e50b8836170f01d23.zip