From f73e9af131151f1e058446361c35b05c4c90bf10 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Thu, 7 Sep 2023 13:39:48 +0200 Subject: Merging upstream version 18.2.0. Signed-off-by: Daniel Baumann --- tests/dataframe/unit/dataframe_sql_validator.py | 18 +---- tests/dataframe/unit/dataframe_test_base.py | 23 ++++++ tests/dataframe/unit/test_session.py | 19 +---- .../unit/test_session_case_sensitivity.py | 81 ++++++++++++++++++++++ tests/dataframe/unit/test_window.py | 5 +- 5 files changed, 110 insertions(+), 36 deletions(-) create mode 100644 tests/dataframe/unit/dataframe_test_base.py create mode 100644 tests/dataframe/unit/test_session_case_sensitivity.py (limited to 'tests/dataframe') diff --git a/tests/dataframe/unit/dataframe_sql_validator.py b/tests/dataframe/unit/dataframe_sql_validator.py index 2dcdb39..4363b0d 100644 --- a/tests/dataframe/unit/dataframe_sql_validator.py +++ b/tests/dataframe/unit/dataframe_sql_validator.py @@ -1,14 +1,11 @@ -import typing as t -import unittest - from sqlglot.dataframe.sql import types -from sqlglot.dataframe.sql.dataframe import DataFrame from sqlglot.dataframe.sql.session import SparkSession -from sqlglot.helper import ensure_list +from tests.dataframe.unit.dataframe_test_base import DataFrameTestBase -class DataFrameSQLValidator(unittest.TestCase): +class DataFrameSQLValidator(DataFrameTestBase): def setUp(self) -> None: + super().setUp() self.spark = SparkSession() self.employee_schema = types.StructType( [ @@ -29,12 +26,3 @@ class DataFrameSQLValidator(unittest.TestCase): self.df_employee = self.spark.createDataFrame( data=employee_data, schema=self.employee_schema ) - - def compare_sql( - self, df: DataFrame, expected_statements: t.Union[str, t.List[str]], pretty=False - ): - actual_sqls = df.sql(pretty=pretty) - expected_statements = ensure_list(expected_statements) - self.assertEqual(len(expected_statements), len(actual_sqls)) - for expected, actual in zip(expected_statements, actual_sqls): - self.assertEqual(expected, actual) diff --git a/tests/dataframe/unit/dataframe_test_base.py b/tests/dataframe/unit/dataframe_test_base.py new file mode 100644 index 0000000..6b07df9 --- /dev/null +++ b/tests/dataframe/unit/dataframe_test_base.py @@ -0,0 +1,23 @@ +import typing as t +import unittest + +import sqlglot +from sqlglot import MappingSchema +from sqlglot.dataframe.sql import SparkSession +from sqlglot.dataframe.sql.dataframe import DataFrame +from sqlglot.helper import ensure_list + + +class DataFrameTestBase(unittest.TestCase): + def setUp(self) -> None: + sqlglot.schema = MappingSchema() + SparkSession._instance = None + + def compare_sql( + self, df: DataFrame, expected_statements: t.Union[str, t.List[str]], pretty=False + ): + actual_sqls = df.sql(pretty=pretty) + expected_statements = ensure_list(expected_statements) + self.assertEqual(len(expected_statements), len(actual_sqls)) + for expected, actual in zip(expected_statements, actual_sqls): + self.assertEqual(expected, actual) diff --git a/tests/dataframe/unit/test_session.py b/tests/dataframe/unit/test_session.py index 4c275e9..9758033 100644 --- a/tests/dataframe/unit/test_session.py +++ b/tests/dataframe/unit/test_session.py @@ -1,9 +1,6 @@ -from unittest import mock - import sqlglot from sqlglot.dataframe.sql import functions as F, types from sqlglot.dataframe.sql.session import SparkSession -from sqlglot.schema import MappingSchema from tests.dataframe.unit.dataframe_sql_validator import DataFrameSQLValidator @@ -68,7 +65,6 @@ class TestDataframeSession(DataFrameSQLValidator): self.compare_sql(df, expected) - @mock.patch("sqlglot.schema", MappingSchema()) def test_sql_select_only(self): query = "SELECT cola, colb FROM table" sqlglot.schema.add_table("table", {"cola": "string", "colb": "string"}, dialect="spark") @@ -78,16 +74,6 @@ class TestDataframeSession(DataFrameSQLValidator): df.sql(pretty=False)[0], ) - @mock.patch("sqlglot.schema", MappingSchema()) - def test_select_quoted(self): - sqlglot.schema.add_table("`TEST`", {"name": "string"}, dialect="spark") - - self.assertEqual( - SparkSession().table("`TEST`").select(F.col("name")).sql(dialect="snowflake")[0], - '''SELECT "test"."name" AS "name" FROM "test" AS "test"''', - ) - - @mock.patch("sqlglot.schema", MappingSchema()) def test_sql_with_aggs(self): query = "SELECT cola, colb FROM table" sqlglot.schema.add_table("table", {"cola": "string", "colb": "string"}, dialect="spark") @@ -97,7 +83,6 @@ class TestDataframeSession(DataFrameSQLValidator): df.sql(pretty=False, optimize=False)[0], ) - @mock.patch("sqlglot.schema", MappingSchema()) def test_sql_create(self): query = "CREATE TABLE new_table AS WITH t1 AS (SELECT cola, colb FROM table) SELECT cola, colb, FROM t1" sqlglot.schema.add_table("table", {"cola": "string", "colb": "string"}, dialect="spark") @@ -105,7 +90,6 @@ class TestDataframeSession(DataFrameSQLValidator): expected = "CREATE TABLE new_table AS SELECT `table`.`cola` AS `cola`, `table`.`colb` AS `colb` FROM `table` AS `table`" self.compare_sql(df, expected) - @mock.patch("sqlglot.schema", MappingSchema()) def test_sql_insert(self): query = "WITH t1 AS (SELECT cola, colb FROM table) INSERT INTO new_table SELECT cola, colb FROM t1" sqlglot.schema.add_table("table", {"cola": "string", "colb": "string"}, dialect="spark") @@ -114,5 +98,4 @@ class TestDataframeSession(DataFrameSQLValidator): self.compare_sql(df, expected) def test_session_create_builder_patterns(self): - spark = SparkSession() - self.assertEqual(spark.builder.appName("abc").getOrCreate(), spark) + self.assertEqual(SparkSession.builder.appName("abc").getOrCreate(), SparkSession()) diff --git a/tests/dataframe/unit/test_session_case_sensitivity.py b/tests/dataframe/unit/test_session_case_sensitivity.py new file mode 100644 index 0000000..7e35289 --- /dev/null +++ b/tests/dataframe/unit/test_session_case_sensitivity.py @@ -0,0 +1,81 @@ +import sqlglot +from sqlglot.dataframe.sql import functions as F +from sqlglot.dataframe.sql.session import SparkSession +from sqlglot.errors import OptimizeError +from tests.dataframe.unit.dataframe_test_base import DataFrameTestBase + + +class TestSessionCaseSensitivity(DataFrameTestBase): + def setUp(self) -> None: + super().setUp() + self.spark = SparkSession.builder.config("sqlframe.dialect", "snowflake").getOrCreate() + + tests = [ + ( + "All lower no intention of CS", + "test", + "test", + {"name": "VARCHAR"}, + "name", + '''SELECT "TEST"."NAME" AS "NAME" FROM "TEST" AS "TEST"''', + ), + ( + "Table has CS while column does not", + '"Test"', + '"Test"', + {"name": "VARCHAR"}, + "name", + '''SELECT "TEST"."NAME" AS "NAME" FROM "Test" AS "TEST"''', + ), + ( + "Column has CS while table does not", + "test", + "test", + {'"Name"': "VARCHAR"}, + '"Name"', + '''SELECT "TEST"."Name" AS "Name" FROM "TEST" AS "TEST"''', + ), + ( + "Both Table and column have CS", + '"Test"', + '"Test"', + {'"Name"': "VARCHAR"}, + '"Name"', + '''SELECT "TEST"."Name" AS "Name" FROM "Test" AS "TEST"''', + ), + ( + "Lowercase CS table and column", + '"test"', + '"test"', + {'"name"': "VARCHAR"}, + '"name"', + '''SELECT "TEST"."name" AS "name" FROM "test" AS "TEST"''', + ), + ( + "CS table and column and query table but no CS in query column", + '"test"', + '"test"', + {'"name"': "VARCHAR"}, + "name", + OptimizeError(), + ), + ( + "CS table and column and query column but no CS in query table", + '"test"', + "test", + {'"name"': "VARCHAR"}, + '"name"', + OptimizeError(), + ), + ] + + def test_basic_case_sensitivity(self): + for test_name, table_name, spark_table, schema, spark_column, expected in self.tests: + with self.subTest(test_name): + sqlglot.schema.add_table(table_name, schema, dialect=self.spark.dialect) + df = self.spark.table(spark_table).select(F.col(spark_column)) + if isinstance(expected, OptimizeError): + with self.assertRaises(OptimizeError): + df.sql() + else: + self.compare_sql(df, expected) diff --git a/tests/dataframe/unit/test_window.py b/tests/dataframe/unit/test_window.py index 45d736f..9c4c897 100644 --- a/tests/dataframe/unit/test_window.py +++ b/tests/dataframe/unit/test_window.py @@ -1,10 +1,9 @@ -import unittest - from sqlglot.dataframe.sql import functions as F from sqlglot.dataframe.sql.window import Window, WindowSpec +from tests.dataframe.unit.dataframe_test_base import DataFrameTestBase -class TestDataframeWindow(unittest.TestCase): +class TestDataframeWindow(DataFrameTestBase): def test_window_spec_partition_by(self): partition_by = WindowSpec().partitionBy(F.col("cola"), F.col("colb")) self.assertEqual("OVER (PARTITION BY cola, colb)", partition_by.sql()) -- cgit v1.2.3