summaryrefslogtreecommitdiffstats
path: root/tests/dataframe/integration/test_session.py
blob: ff1477b3f0bf6b5ab2c86a5f55f3cef779de9a82 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from pyspark.sql import functions as F

from sqlglot.dataframe.sql import functions as SF
from tests.dataframe.integration.dataframe_validator import DataFrameValidator


class TestSessionFunc(DataFrameValidator):
    def test_sql_simple_select(self):
        query = "SELECT fname, lname FROM employee"
        df = self.spark.sql(query)
        dfs = self.sqlglot.sql(query)
        self.compare_spark_with_sqlglot(df, dfs)

    def test_sql_with_join(self):
        query = """
        SELECT
            e.employee_id
            , s.store_id    
        FROM
            employee e
            INNER JOIN
            store s
            ON
                e.store_id = s.store_id
        """
        df = self.spark.sql(query).groupBy(F.col("store_id")).agg(F.countDistinct(F.col("employee_id")))
        dfs = self.sqlglot.sql(query).groupBy(SF.col("store_id")).agg(SF.countDistinct(SF.col("employee_id")))
        self.compare_spark_with_sqlglot(df, dfs, skip_schema_compare=True)