summaryrefslogtreecommitdiffstats
path: root/tests/dataframe/integration/test_session.py
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-23 07:22:23 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-23 07:22:23 +0000
commit0d1477fdf20125df42fe49355b757625417c8f8c (patch)
tree0ace7a95d185b2b1ae36e25e341bf92cd9021cb0 /tests/dataframe/integration/test_session.py
parentReleasing debian version 23.16.0-1. (diff)
downloadsqlglot-0d1477fdf20125df42fe49355b757625417c8f8c.tar.xz
sqlglot-0d1477fdf20125df42fe49355b757625417c8f8c.zip
Merging upstream version 24.0.0.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'tests/dataframe/integration/test_session.py')
-rw-r--r--tests/dataframe/integration/test_session.py43
1 files changed, 0 insertions, 43 deletions
diff --git a/tests/dataframe/integration/test_session.py b/tests/dataframe/integration/test_session.py
deleted file mode 100644
index 3bb3e20..0000000
--- a/tests/dataframe/integration/test_session.py
+++ /dev/null
@@ -1,43 +0,0 @@
-from pyspark.sql import functions as F
-
-from sqlglot.dataframe.sql import functions as SF
-from tests.dataframe.integration.dataframe_validator import DataFrameValidator
-
-
-class TestSessionFunc(DataFrameValidator):
- def test_sql_simple_select(self):
- query = "SELECT fname, lname FROM employee"
- df = self.spark.sql(query)
- dfs = self.sqlglot.sql(query)
- self.compare_spark_with_sqlglot(df, dfs)
-
- def test_sql_with_join(self):
- query = """
- SELECT
- e.employee_id
- , s.store_id
- FROM
- employee e
- INNER JOIN
- store s
- ON
- e.store_id = s.store_id
- """
- df = (
- self.spark.sql(query)
- .groupBy(F.col("store_id"))
- .agg(F.countDistinct(F.col("employee_id")))
- )
- dfs = (
- self.sqlglot.sql(query)
- .groupBy(SF.col("store_id"))
- .agg(SF.countDistinct(SF.col("employee_id")))
- )
- self.compare_spark_with_sqlglot(df, dfs, skip_schema_compare=True)
-
- def test_nameless_column(self):
- query = "SELECT MAX(age) FROM employee"
- df = self.spark.sql(query)
- dfs = self.sqlglot.sql(query)
- # Spark will alias the column to `max(age)` while sqlglot will alias to `_col_0` so their schemas will differ
- self.compare_spark_with_sqlglot(df, dfs, skip_schema_compare=True)