Merging upstream version 21.1.1.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-02-16 05:45:52 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-02-16 05:45:52 +0000
commit: 3d48060515ba25b4c49d975a520ee0682327d1b7 (patch)
tree: e8730f509026e866d77c459f74a384505425363a /sqlglot/transforms.py
parent: Releasing debian version 21.0.2-1. (diff)
download: sqlglot-3d48060515ba25b4c49d975a520ee0682327d1b7.tar.xz
sqlglot-3d48060515ba25b4c49d975a520ee0682327d1b7.zip
1 files changed, 94 insertions, 0 deletions
diff --git a/sqlglot/transforms.py b/sqlglot/transforms.py
index f13569f..4777609 100644
--- a/sqlglot/transforms.py
+++ b/sqlglot/transforms.py
@@ -213,6 +213,19 @@ def explode_to_unnest(index_offset: int = 0) -> t.Callable[[exp.Expression], exp
                     is_posexplode = isinstance(explode, exp.Posexplode)
                     explode_arg = explode.this
 
+                    if isinstance(explode, exp.ExplodeOuter):
+                        bracket = explode_arg[0]
+                        bracket.set("safe", True)
+                        bracket.set("offset", True)
+                        explode_arg = exp.func(
+                            "IF",
+                            exp.func(
+                                "ARRAY_SIZE", exp.func("COALESCE", explode_arg, exp.Array())
+                            ).eq(0),
+                            exp.array(bracket, copy=False),
+                            explode_arg,
+                        )
+
                     # This ensures that we won't use [POS]EXPLODE's argument as a new selection
                     if isinstance(explode_arg, exp.Column):
                         taken_select_names.add(explode_arg.output_name)
@@ -466,6 +479,87 @@ def unqualify_columns(expression: exp.Expression) -> exp.Expression:
     return expression
 
 
+def remove_unique_constraints(expression: exp.Expression) -> exp.Expression:
+    assert isinstance(expression, exp.Create)
+    for constraint in expression.find_all(exp.UniqueColumnConstraint):
+        if constraint.parent:
+            constraint.parent.pop()
+
+    return expression
+
+
+def ctas_with_tmp_tables_to_create_tmp_view(
+    expression: exp.Expression,
+    tmp_storage_provider: t.Callable[[exp.Expression], exp.Expression] = lambda e: e,
+) -> exp.Expression:
+    assert isinstance(expression, exp.Create)
+    properties = expression.args.get("properties")
+    temporary = any(
+        isinstance(prop, exp.TemporaryProperty)
+        for prop in (properties.expressions if properties else [])
+    )
+
+    # CTAS with temp tables map to CREATE TEMPORARY VIEW
+    if expression.kind == "TABLE" and temporary:
+        if expression.expression:
+            return exp.Create(
+                kind="TEMPORARY VIEW",
+                this=expression.this,
+                expression=expression.expression,
+            )
+        return tmp_storage_provider(expression)
+
+    return expression
+
+
+def move_schema_columns_to_partitioned_by(expression: exp.Expression) -> exp.Expression:
+    """
+    In Hive, the PARTITIONED BY property acts as an extension of a table's schema. When the
+    PARTITIONED BY value is an array of column names, they are transformed into a schema.
+    The corresponding columns are removed from the create statement.
+    """
+    assert isinstance(expression, exp.Create)
+    has_schema = isinstance(expression.this, exp.Schema)
+    is_partitionable = expression.kind in {"TABLE", "VIEW"}
+
+    if has_schema and is_partitionable:
+        prop = expression.find(exp.PartitionedByProperty)
+        if prop and prop.this and not isinstance(prop.this, exp.Schema):
+            schema = expression.this
+            columns = {v.name.upper() for v in prop.this.expressions}
+            partitions = [col for col in schema.expressions if col.name.upper() in columns]
+            schema.set("expressions", [e for e in schema.expressions if e not in partitions])
+            prop.replace(exp.PartitionedByProperty(this=exp.Schema(expressions=partitions)))
+            expression.set("this", schema)
+
+    return expression
+
+
+def move_partitioned_by_to_schema_columns(expression: exp.Expression) -> exp.Expression:
+    """
+    Spark 3 supports both "HIVEFORMAT" and "DATASOURCE" formats for CREATE TABLE.
+
+    Currently, SQLGlot uses the DATASOURCE format for Spark 3.
+    """
+    assert isinstance(expression, exp.Create)
+    prop = expression.find(exp.PartitionedByProperty)
+    if (
+        prop
+        and prop.this
+        and isinstance(prop.this, exp.Schema)
+        and all(isinstance(e, exp.ColumnDef) and e.args.get("kind") for e in prop.this.expressions)
+    ):
+        prop_this = exp.Tuple(
+            expressions=[exp.to_identifier(e.this) for e in prop.this.expressions]
+        )
+        schema = expression.this
+        for e in prop.this.expressions:
+            schema.append("expressions", e)
+        prop.set("this", prop_this)
+
+    return expression
+
+
 def preprocess(
     transforms: t.List[t.Callable[[exp.Expression], exp.Expression]],
 ) -> t.Callable[[Generator, exp.Expression], str]:
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-02-16 05:45:52 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-02-16 05:45:52 +0000
commit	3d48060515ba25b4c49d975a520ee0682327d1b7 (patch)
tree	e8730f509026e866d77c459f74a384505425363a /sqlglot/transforms.py
parent	Releasing debian version 21.0.2-1. (diff)
download	sqlglot-3d48060515ba25b4c49d975a520ee0682327d1b7.tar.xz sqlglot-3d48060515ba25b4c49d975a520ee0682327d1b7.zip