Merging upstream version 18.2.0.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2023-09-07 11:39:48 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2023-09-07 11:39:48 +0000
commit: f73e9af131151f1e058446361c35b05c4c90bf10 (patch)
tree: ed425b89f12d3f5e4709290bdc03d876f365bc97 /tests/dataframe/unit
parent: Releasing debian version 17.12.0-1. (diff)
download: sqlglot-f73e9af131151f1e058446361c35b05c4c90bf10.tar.xz
sqlglot-f73e9af131151f1e058446361c35b05c4c90bf10.zip
5 files changed, 110 insertions, 36 deletions
diff --git a/tests/dataframe/unit/dataframe_sql_validator.py b/tests/dataframe/unit/dataframe_sql_validator.py
index 2dcdb39..4363b0d 100644
--- a/tests/dataframe/unit/dataframe_sql_validator.py
+++ b/tests/dataframe/unit/dataframe_sql_validator.py
@@ -1,14 +1,11 @@
-import typing as t
-import unittest
-
 from sqlglot.dataframe.sql import types
-from sqlglot.dataframe.sql.dataframe import DataFrame
 from sqlglot.dataframe.sql.session import SparkSession
-from sqlglot.helper import ensure_list
+from tests.dataframe.unit.dataframe_test_base import DataFrameTestBase
 
 
-class DataFrameSQLValidator(unittest.TestCase):
+class DataFrameSQLValidator(DataFrameTestBase):
     def setUp(self) -> None:
+        super().setUp()
         self.spark = SparkSession()
         self.employee_schema = types.StructType(
             [
@@ -29,12 +26,3 @@ class DataFrameSQLValidator(unittest.TestCase):
         self.df_employee = self.spark.createDataFrame(
             data=employee_data, schema=self.employee_schema
         )
-
-    def compare_sql(
-        self, df: DataFrame, expected_statements: t.Union[str, t.List[str]], pretty=False
-    ):
-        actual_sqls = df.sql(pretty=pretty)
-        expected_statements = ensure_list(expected_statements)
-        self.assertEqual(len(expected_statements), len(actual_sqls))
-        for expected, actual in zip(expected_statements, actual_sqls):
-            self.assertEqual(expected, actual)
diff --git a/tests/dataframe/unit/dataframe_test_base.py b/tests/dataframe/unit/dataframe_test_base.py
new file mode 100644
index 0000000..6b07df9
--- /dev/null
+++ b/tests/dataframe/unit/dataframe_test_base.py
@@ -0,0 +1,23 @@
+import typing as t
+import unittest
+
+import sqlglot
+from sqlglot import MappingSchema
+from sqlglot.dataframe.sql import SparkSession
+from sqlglot.dataframe.sql.dataframe import DataFrame
+from sqlglot.helper import ensure_list
+
+
+class DataFrameTestBase(unittest.TestCase):
+    def setUp(self) -> None:
+        sqlglot.schema = MappingSchema()
+        SparkSession._instance = None
+
+    def compare_sql(
+        self, df: DataFrame, expected_statements: t.Union[str, t.List[str]], pretty=False
+    ):
+        actual_sqls = df.sql(pretty=pretty)
+        expected_statements = ensure_list(expected_statements)
+        self.assertEqual(len(expected_statements), len(actual_sqls))
+        for expected, actual in zip(expected_statements, actual_sqls):
+            self.assertEqual(expected, actual)
diff --git a/tests/dataframe/unit/test_session.py b/tests/dataframe/unit/test_session.py
index 4c275e9..9758033 100644
--- a/tests/dataframe/unit/test_session.py
+++ b/tests/dataframe/unit/test_session.py
@@ -1,9 +1,6 @@
-from unittest import mock
-
 import sqlglot
 from sqlglot.dataframe.sql import functions as F, types
 from sqlglot.dataframe.sql.session import SparkSession
-from sqlglot.schema import MappingSchema
 from tests.dataframe.unit.dataframe_sql_validator import DataFrameSQLValidator
 
 
@@ -68,7 +65,6 @@ class TestDataframeSession(DataFrameSQLValidator):
 
         self.compare_sql(df, expected)
 
-    @mock.patch("sqlglot.schema", MappingSchema())
     def test_sql_select_only(self):
         query = "SELECT cola, colb FROM table"
         sqlglot.schema.add_table("table", {"cola": "string", "colb": "string"}, dialect="spark")
@@ -78,16 +74,6 @@ class TestDataframeSession(DataFrameSQLValidator):
             df.sql(pretty=False)[0],
         )
 
-    @mock.patch("sqlglot.schema", MappingSchema())
-    def test_select_quoted(self):
-        sqlglot.schema.add_table("`TEST`", {"name": "string"}, dialect="spark")
-
-        self.assertEqual(
-            SparkSession().table("`TEST`").select(F.col("name")).sql(dialect="snowflake")[0],
-            '''SELECT "test"."name" AS "name" FROM "test" AS "test"''',
-        )
-
-    @mock.patch("sqlglot.schema", MappingSchema())
     def test_sql_with_aggs(self):
         query = "SELECT cola, colb FROM table"
         sqlglot.schema.add_table("table", {"cola": "string", "colb": "string"}, dialect="spark")
@@ -97,7 +83,6 @@ class TestDataframeSession(DataFrameSQLValidator):
             df.sql(pretty=False, optimize=False)[0],
         )
 
-    @mock.patch("sqlglot.schema", MappingSchema())
     def test_sql_create(self):
         query = "CREATE TABLE new_table AS WITH t1 AS (SELECT cola, colb FROM table) SELECT cola, colb, FROM t1"
         sqlglot.schema.add_table("table", {"cola": "string", "colb": "string"}, dialect="spark")
@@ -105,7 +90,6 @@ class TestDataframeSession(DataFrameSQLValidator):
         expected = "CREATE TABLE new_table AS SELECT `table`.`cola` AS `cola`, `table`.`colb` AS `colb` FROM `table` AS `table`"
         self.compare_sql(df, expected)
 
-    @mock.patch("sqlglot.schema", MappingSchema())
     def test_sql_insert(self):
         query = "WITH t1 AS (SELECT cola, colb FROM table) INSERT INTO new_table SELECT cola, colb FROM t1"
         sqlglot.schema.add_table("table", {"cola": "string", "colb": "string"}, dialect="spark")
@@ -114,5 +98,4 @@ class TestDataframeSession(DataFrameSQLValidator):
         self.compare_sql(df, expected)
 
     def test_session_create_builder_patterns(self):
-        spark = SparkSession()
-        self.assertEqual(spark.builder.appName("abc").getOrCreate(), spark)
+        self.assertEqual(SparkSession.builder.appName("abc").getOrCreate(), SparkSession())
diff --git a/tests/dataframe/unit/test_session_case_sensitivity.py b/tests/dataframe/unit/test_session_case_sensitivity.py
new file mode 100644
index 0000000..7e35289
--- /dev/null
+++ b/tests/dataframe/unit/test_session_case_sensitivity.py
@@ -0,0 +1,81 @@
+import sqlglot
+from sqlglot.dataframe.sql import functions as F
+from sqlglot.dataframe.sql.session import SparkSession
+from sqlglot.errors import OptimizeError
+from tests.dataframe.unit.dataframe_test_base import DataFrameTestBase
+
+
+class TestSessionCaseSensitivity(DataFrameTestBase):
+    def setUp(self) -> None:
+        super().setUp()
+        self.spark = SparkSession.builder.config("sqlframe.dialect", "snowflake").getOrCreate()
+
+    tests = [
+        (
+            "All lower no intention of CS",
+            "test",
+            "test",
+            {"name": "VARCHAR"},
+            "name",
+            '''SELECT "TEST"."NAME" AS "NAME" FROM "TEST" AS "TEST"''',
+        ),
+        (
+            "Table has CS while column does not",
+            '"Test"',
+            '"Test"',
+            {"name": "VARCHAR"},
+            "name",
+            '''SELECT "TEST"."NAME" AS "NAME" FROM "Test" AS "TEST"''',
+        ),
+        (
+            "Column has CS while table does not",
+            "test",
+            "test",
+            {'"Name"': "VARCHAR"},
+            '"Name"',
+            '''SELECT "TEST"."Name" AS "Name" FROM "TEST" AS "TEST"''',
+        ),
+        (
+            "Both Table and column have CS",
+            '"Test"',
+            '"Test"',
+            {'"Name"': "VARCHAR"},
+            '"Name"',
+            '''SELECT "TEST"."Name" AS "Name" FROM "Test" AS "TEST"''',
+        ),
+        (
+            "Lowercase CS table and column",
+            '"test"',
+            '"test"',
+            {'"name"': "VARCHAR"},
+            '"name"',
+            '''SELECT "TEST"."name" AS "name" FROM "test" AS "TEST"''',
+        ),
+        (
+            "CS table and column and query table but no CS in query column",
+            '"test"',
+            '"test"',
+            {'"name"': "VARCHAR"},
+            "name",
+            OptimizeError(),
+        ),
+        (
+            "CS table and column and query column but no CS in query table",
+            '"test"',
+            "test",
+            {'"name"': "VARCHAR"},
+            '"name"',
+            OptimizeError(),
+        ),
+    ]
+
+    def test_basic_case_sensitivity(self):
+        for test_name, table_name, spark_table, schema, spark_column, expected in self.tests:
+            with self.subTest(test_name):
+                sqlglot.schema.add_table(table_name, schema, dialect=self.spark.dialect)
+                df = self.spark.table(spark_table).select(F.col(spark_column))
+                if isinstance(expected, OptimizeError):
+                    with self.assertRaises(OptimizeError):
+                        df.sql()
+                else:
+                    self.compare_sql(df, expected)
diff --git a/tests/dataframe/unit/test_window.py b/tests/dataframe/unit/test_window.py
index 45d736f..9c4c897 100644
--- a/tests/dataframe/unit/test_window.py
+++ b/tests/dataframe/unit/test_window.py
@@ -1,10 +1,9 @@
-import unittest
-
 from sqlglot.dataframe.sql import functions as F
 from sqlglot.dataframe.sql.window import Window, WindowSpec
+from tests.dataframe.unit.dataframe_test_base import DataFrameTestBase
 
 
-class TestDataframeWindow(unittest.TestCase):
+class TestDataframeWindow(DataFrameTestBase):
     def test_window_spec_partition_by(self):
         partition_by = WindowSpec().partitionBy(F.col("cola"), F.col("colb"))
         self.assertEqual("OVER (PARTITION BY cola, colb)", partition_by.sql())
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2023-09-07 11:39:48 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2023-09-07 11:39:48 +0000
commit	f73e9af131151f1e058446361c35b05c4c90bf10 (patch)
tree	ed425b89f12d3f5e4709290bdc03d876f365bc97 /tests/dataframe/unit
parent	Releasing debian version 17.12.0-1. (diff)
download	sqlglot-f73e9af131151f1e058446361c35b05c4c90bf10.tar.xz sqlglot-f73e9af131151f1e058446361c35b05c4c90bf10.zip