diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-23 07:22:20 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-23 07:22:20 +0000 |
commit | 41e67f6ce6b4b732d02e421d6825c18b8d15a59d (patch) | |
tree | 30fb0000d3e6ff11b366567bc35564842e7dbb50 /tests/dataframe/integration/test_session.py | |
parent | Adding upstream version 23.16.0. (diff) | |
download | sqlglot-41e67f6ce6b4b732d02e421d6825c18b8d15a59d.tar.xz sqlglot-41e67f6ce6b4b732d02e421d6825c18b8d15a59d.zip |
Adding upstream version 24.0.0.upstream/24.0.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'tests/dataframe/integration/test_session.py')
-rw-r--r-- | tests/dataframe/integration/test_session.py | 43 |
1 files changed, 0 insertions, 43 deletions
diff --git a/tests/dataframe/integration/test_session.py b/tests/dataframe/integration/test_session.py deleted file mode 100644 index 3bb3e20..0000000 --- a/tests/dataframe/integration/test_session.py +++ /dev/null @@ -1,43 +0,0 @@ -from pyspark.sql import functions as F - -from sqlglot.dataframe.sql import functions as SF -from tests.dataframe.integration.dataframe_validator import DataFrameValidator - - -class TestSessionFunc(DataFrameValidator): - def test_sql_simple_select(self): - query = "SELECT fname, lname FROM employee" - df = self.spark.sql(query) - dfs = self.sqlglot.sql(query) - self.compare_spark_with_sqlglot(df, dfs) - - def test_sql_with_join(self): - query = """ - SELECT - e.employee_id - , s.store_id - FROM - employee e - INNER JOIN - store s - ON - e.store_id = s.store_id - """ - df = ( - self.spark.sql(query) - .groupBy(F.col("store_id")) - .agg(F.countDistinct(F.col("employee_id"))) - ) - dfs = ( - self.sqlglot.sql(query) - .groupBy(SF.col("store_id")) - .agg(SF.countDistinct(SF.col("employee_id"))) - ) - self.compare_spark_with_sqlglot(df, dfs, skip_schema_compare=True) - - def test_nameless_column(self): - query = "SELECT MAX(age) FROM employee" - df = self.spark.sql(query) - dfs = self.sqlglot.sql(query) - # Spark will alias the column to `max(age)` while sqlglot will alias to `_col_0` so their schemas will differ - self.compare_spark_with_sqlglot(df, dfs, skip_schema_compare=True) |