diff options
Diffstat (limited to 'tests/dialects/test_dialect.py')
-rw-r--r-- | tests/dialects/test_dialect.py | 141 |
1 files changed, 136 insertions, 5 deletions
diff --git a/tests/dialects/test_dialect.py b/tests/dialects/test_dialect.py index 190d044..f0faccb 100644 --- a/tests/dialects/test_dialect.py +++ b/tests/dialects/test_dialect.py @@ -20,7 +20,9 @@ class Validator(unittest.TestCase): def parse_one(self, sql, **kwargs): return parse_one(sql, read=self.dialect, **kwargs) - def validate_identity(self, sql, write_sql=None, pretty=False, check_command_warning=False): + def validate_identity( + self, sql, write_sql=None, pretty=False, check_command_warning=False, identify=False + ): if check_command_warning: with self.assertLogs(parser_logger) as cm: expression = self.parse_one(sql) @@ -28,7 +30,9 @@ class Validator(unittest.TestCase): else: expression = self.parse_one(sql) - self.assertEqual(write_sql or sql, expression.sql(dialect=self.dialect, pretty=pretty)) + self.assertEqual( + write_sql or sql, expression.sql(dialect=self.dialect, pretty=pretty, identify=identify) + ) return expression def validate_all(self, sql, read=None, write=None, pretty=False, identify=False): @@ -1408,6 +1412,13 @@ class TestDialect(Validator): }, ) + for dialect in ("duckdb", "starrocks"): + with self.subTest(f"Generating json extraction with digit-prefixed key ({dialect})"): + self.assertEqual( + parse_one("""select '{"0": "v"}' -> '0'""", read=dialect).sql(dialect=dialect), + """SELECT '{"0": "v"}' -> '0'""", + ) + def test_cross_join(self): self.validate_all( "SELECT a FROM x CROSS JOIN UNNEST(y) AS t (a)", @@ -1422,7 +1433,7 @@ class TestDialect(Validator): write={ "drill": "SELECT a, b FROM x CROSS JOIN UNNEST(y, z) AS t(a, b)", "presto": "SELECT a, b FROM x CROSS JOIN UNNEST(y, z) AS t(a, b)", - "spark": "SELECT a, b FROM x LATERAL VIEW EXPLODE(y) t AS a LATERAL VIEW EXPLODE(z) t AS b", + "spark": "SELECT a, b FROM x LATERAL VIEW INLINE(ARRAYS_ZIP(y, z)) t AS a, b", }, ) self.validate_all( @@ -1488,12 +1499,14 @@ class TestDialect(Validator): "SELECT * FROM a INTERSECT SELECT * FROM b", read={ "bigquery": "SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b", + "clickhouse": "SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b", "duckdb": "SELECT * FROM a INTERSECT SELECT * FROM b", "presto": "SELECT * FROM a INTERSECT SELECT * FROM b", "spark": "SELECT * FROM a INTERSECT SELECT * FROM b", }, write={ "bigquery": "SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b", + "clickhouse": "SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b", "duckdb": "SELECT * FROM a INTERSECT SELECT * FROM b", "presto": "SELECT * FROM a INTERSECT SELECT * FROM b", "spark": "SELECT * FROM a INTERSECT SELECT * FROM b", @@ -1503,12 +1516,14 @@ class TestDialect(Validator): "SELECT * FROM a EXCEPT SELECT * FROM b", read={ "bigquery": "SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b", + "clickhouse": "SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b", "duckdb": "SELECT * FROM a EXCEPT SELECT * FROM b", "presto": "SELECT * FROM a EXCEPT SELECT * FROM b", "spark": "SELECT * FROM a EXCEPT SELECT * FROM b", }, write={ "bigquery": "SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b", + "clickhouse": "SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b", "duckdb": "SELECT * FROM a EXCEPT SELECT * FROM b", "presto": "SELECT * FROM a EXCEPT SELECT * FROM b", "spark": "SELECT * FROM a EXCEPT SELECT * FROM b", @@ -1527,6 +1542,7 @@ class TestDialect(Validator): "SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b", write={ "bigquery": "SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b", + "clickhouse": "SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b", "duckdb": "SELECT * FROM a INTERSECT SELECT * FROM b", "presto": "SELECT * FROM a INTERSECT SELECT * FROM b", "spark": "SELECT * FROM a INTERSECT SELECT * FROM b", @@ -1536,6 +1552,7 @@ class TestDialect(Validator): "SELECT * FROM a INTERSECT ALL SELECT * FROM b", write={ "bigquery": "SELECT * FROM a INTERSECT ALL SELECT * FROM b", + "clickhouse": "SELECT * FROM a INTERSECT SELECT * FROM b", "duckdb": "SELECT * FROM a INTERSECT ALL SELECT * FROM b", "presto": "SELECT * FROM a INTERSECT ALL SELECT * FROM b", "spark": "SELECT * FROM a INTERSECT ALL SELECT * FROM b", @@ -1545,6 +1562,7 @@ class TestDialect(Validator): "SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b", write={ "bigquery": "SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b", + "clickhouse": "SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b", "duckdb": "SELECT * FROM a EXCEPT SELECT * FROM b", "presto": "SELECT * FROM a EXCEPT SELECT * FROM b", "spark": "SELECT * FROM a EXCEPT SELECT * FROM b", @@ -1554,6 +1572,7 @@ class TestDialect(Validator): "SELECT * FROM a EXCEPT ALL SELECT * FROM b", read={ "bigquery": "SELECT * FROM a EXCEPT ALL SELECT * FROM b", + "clickhouse": "SELECT * FROM a EXCEPT ALL SELECT * FROM b", "duckdb": "SELECT * FROM a EXCEPT ALL SELECT * FROM b", "presto": "SELECT * FROM a EXCEPT ALL SELECT * FROM b", "spark": "SELECT * FROM a EXCEPT ALL SELECT * FROM b", @@ -2354,7 +2373,7 @@ SELECT "mysql": "SELECT * FROM (SELECT *, COUNT(*) OVER () AS _w FROM t) AS _t WHERE _w > 1", "oracle": "SELECT * FROM (SELECT *, COUNT(*) OVER () AS _w FROM t) _t WHERE _w > 1", "postgres": "SELECT * FROM (SELECT *, COUNT(*) OVER () AS _w FROM t) AS _t WHERE _w > 1", - "tsql": "SELECT * FROM (SELECT *, COUNT(*) OVER () AS _w FROM t) AS _t WHERE _w > 1", + "tsql": "SELECT * FROM (SELECT *, COUNT_BIG(*) OVER () AS _w FROM t) AS _t WHERE _w > 1", }, ) self.validate_all( @@ -2366,7 +2385,7 @@ SELECT "mysql": "SELECT `user id`, some_id, other_id, `2 nd id` FROM (SELECT `user id`, some_id, 1 AS other_id, 2 AS `2 nd id`, COUNT(*) OVER () AS _w FROM t) AS _t WHERE _w > 1", "oracle": 'SELECT "user id", some_id, other_id, "2 nd id" FROM (SELECT "user id", some_id, 1 AS other_id, 2 AS "2 nd id", COUNT(*) OVER () AS _w FROM t) _t WHERE _w > 1', "postgres": 'SELECT "user id", some_id, other_id, "2 nd id" FROM (SELECT "user id", some_id, 1 AS other_id, 2 AS "2 nd id", COUNT(*) OVER () AS _w FROM t) AS _t WHERE _w > 1', - "tsql": "SELECT [user id], some_id, other_id, [2 nd id] FROM (SELECT [user id] AS [user id], some_id AS some_id, 1 AS other_id, 2 AS [2 nd id], COUNT(*) OVER () AS _w FROM t) AS _t WHERE _w > 1", + "tsql": "SELECT [user id], some_id, other_id, [2 nd id] FROM (SELECT [user id] AS [user id], some_id AS some_id, 1 AS other_id, 2 AS [2 nd id], COUNT_BIG(*) OVER () AS _w FROM t) AS _t WHERE _w > 1", }, ) @@ -2722,3 +2741,115 @@ FROM subquery2""", "tsql": "WITH _generated_dates(date_week) AS (SELECT CAST('2020-01-01' AS DATE) AS date_week UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_week) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_week) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_week AS date_week FROM _generated_dates) AS _generated_dates", }, ) + + def test_set_operation_specifiers(self): + self.validate_all( + "SELECT 1 EXCEPT ALL SELECT 1", + write={ + "": "SELECT 1 EXCEPT ALL SELECT 1", + "bigquery": UnsupportedError, + "clickhouse": "SELECT 1 EXCEPT SELECT 1", + "databricks": "SELECT 1 EXCEPT ALL SELECT 1", + "duckdb": "SELECT 1 EXCEPT ALL SELECT 1", + "mysql": "SELECT 1 EXCEPT ALL SELECT 1", + "oracle": "SELECT 1 EXCEPT ALL SELECT 1", + "postgres": "SELECT 1 EXCEPT ALL SELECT 1", + "presto": UnsupportedError, + "redshift": UnsupportedError, + "snowflake": UnsupportedError, + "spark": "SELECT 1 EXCEPT ALL SELECT 1", + "sqlite": UnsupportedError, + "starrocks": UnsupportedError, + "trino": UnsupportedError, + "tsql": UnsupportedError, + }, + ) + + def test_normalize(self): + for form in ("", ", nfkc"): + with self.subTest(f"Testing NORMALIZE('str'{form}) roundtrip"): + self.validate_all( + f"SELECT NORMALIZE('str'{form})", + read={ + "presto": f"SELECT NORMALIZE('str'{form})", + "trino": f"SELECT NORMALIZE('str'{form})", + "bigquery": f"SELECT NORMALIZE('str'{form})", + }, + write={ + "presto": f"SELECT NORMALIZE('str'{form})", + "trino": f"SELECT NORMALIZE('str'{form})", + "bigquery": f"SELECT NORMALIZE('str'{form})", + }, + ) + + self.assertIsInstance(parse_one("NORMALIZE('str', NFD)").args.get("form"), exp.Var) + + def test_coalesce(self): + """ + Validate that "expressions" is a list for all the exp.Coalesce instances; This is important + as some optimizer rules are coalesce specific and will iterate on "expressions" + """ + + # Check the 2-arg aliases + for func in ("COALESCE", "IFNULL", "NVL"): + self.assertIsInstance(self.parse_one(f"{func}(1, 2)").expressions, list) + + # Check the varlen case + coalesce = self.parse_one("COALESCE(x, y, z)") + self.assertIsInstance(coalesce.expressions, list) + self.assertIsNone(coalesce.args.get("is_nvl")) + + # Check Oracle's NVL which is decoupled from COALESCE + oracle_nvl = parse_one("NVL(x, y)", read="oracle") + self.assertIsInstance(oracle_nvl.expressions, list) + self.assertTrue(oracle_nvl.args.get("is_nvl")) + + # Check T-SQL's ISNULL which is parsed into exp.Coalesce + self.assertIsInstance(parse_one("ISNULL(x, y)", read="tsql").expressions, list) + + def test_trim(self): + self.validate_all( + "TRIM('abc', 'a')", + read={ + "bigquery": "TRIM('abc', 'a')", + "snowflake": "TRIM('abc', 'a')", + }, + write={ + "bigquery": "TRIM('abc', 'a')", + "snowflake": "TRIM('abc', 'a')", + }, + ) + + self.validate_all( + "LTRIM('Hello World', 'H')", + read={ + "oracle": "LTRIM('Hello World', 'H')", + "clickhouse": "TRIM(LEADING 'H' FROM 'Hello World')", + "snowflake": "LTRIM('Hello World', 'H')", + "bigquery": "LTRIM('Hello World', 'H')", + "": "LTRIM('Hello World', 'H')", + }, + write={ + "clickhouse": "TRIM(LEADING 'H' FROM 'Hello World')", + "oracle": "LTRIM('Hello World', 'H')", + "snowflake": "LTRIM('Hello World', 'H')", + "bigquery": "LTRIM('Hello World', 'H')", + }, + ) + + self.validate_all( + "RTRIM('Hello World', 'd')", + read={ + "clickhouse": "TRIM(TRAILING 'd' FROM 'Hello World')", + "oracle": "RTRIM('Hello World', 'd')", + "snowflake": "RTRIM('Hello World', 'd')", + "bigquery": "RTRIM('Hello World', 'd')", + "": "RTRIM('Hello World', 'd')", + }, + write={ + "clickhouse": "TRIM(TRAILING 'd' FROM 'Hello World')", + "oracle": "RTRIM('Hello World', 'd')", + "snowflake": "RTRIM('Hello World', 'd')", + "bigquery": "RTRIM('Hello World', 'd')", + }, + ) |