From c51a9844b869fd7cd69e5cc7658d34f61a865185 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 1 Nov 2023 06:12:42 +0100 Subject: Merging upstream version 19.0.1. Signed-off-by: Daniel Baumann --- tests/dialects/test_dialect.py | 42 ++++++++++++++++++-- tests/dialects/test_hive.py | 15 ++++++++ tests/dialects/test_mysql.py | 14 +++++++ tests/dialects/test_postgres.py | 45 ++++++++++++++++++---- tests/dialects/test_redshift.py | 16 ++++++++ tests/dialects/test_snowflake.py | 5 +++ tests/dialects/test_spark.py | 5 ++- tests/dialects/test_teradata.py | 28 ++++++++------ tests/dialects/test_tsql.py | 44 +++++++++++++++++++-- tests/fixtures/optimizer/normalize.sql | 2 +- tests/fixtures/optimizer/normalize_identifiers.sql | 3 ++ tests/fixtures/optimizer/optimizer.sql | 4 +- tests/fixtures/optimizer/pushdown_predicates.sql | 8 ++-- tests/fixtures/optimizer/qualify_tables.sql | 4 ++ tests/fixtures/optimizer/quote_identities.sql | 3 ++ tests/fixtures/optimizer/simplify.sql | 20 +++++----- tests/fixtures/optimizer/tpc-ds/tpc-ds.sql | 4 +- tests/fixtures/optimizer/tpc-h/tpc-h.sql | 6 +-- tests/test_expressions.py | 2 +- tests/test_optimizer.py | 18 +++++++++ 20 files changed, 235 insertions(+), 53 deletions(-) (limited to 'tests') diff --git a/tests/dialects/test_dialect.py b/tests/dialects/test_dialect.py index 0d43b2a..2546c98 100644 --- a/tests/dialects/test_dialect.py +++ b/tests/dialects/test_dialect.py @@ -839,10 +839,6 @@ class TestDialect(Validator): ) self.validate_all( "STR_TO_DATE(x, '%Y-%m-%dT%H:%M:%S')", - read={ - "mysql": "STR_TO_DATE(x, '%Y-%m-%dT%H:%i:%S')", - "starrocks": "STR_TO_DATE(x, '%Y-%m-%dT%H:%i:%S')", - }, write={ "drill": "TO_DATE(x, 'yyyy-MM-dd''T''HH:mm:ss')", "mysql": "STR_TO_DATE(x, '%Y-%m-%dT%T')", @@ -1861,3 +1857,41 @@ SELECT "tsql": "SELECT * FROM (SELECT *, COUNT(*) OVER () AS _w FROM t) AS _t WHERE _w > 1", }, ) + + def test_nested_ctes(self): + self.validate_all( + "SELECT * FROM (WITH t AS (SELECT 1 AS c) SELECT c FROM t) AS subq", + write={ + "bigquery": "SELECT * FROM (WITH t AS (SELECT 1 AS c) SELECT c FROM t) AS subq", + "clickhouse": "SELECT * FROM (WITH t AS (SELECT 1 AS c) SELECT c FROM t) AS subq", + "databricks": "SELECT * FROM (WITH t AS (SELECT 1 AS c) SELECT c FROM t) AS subq", + "duckdb": "SELECT * FROM (WITH t AS (SELECT 1 AS c) SELECT c FROM t) AS subq", + "hive": "WITH t AS (SELECT 1 AS c) SELECT * FROM (SELECT c FROM t) AS subq", + "mysql": "SELECT * FROM (WITH t AS (SELECT 1 AS c) SELECT c FROM t) AS subq", + "postgres": "SELECT * FROM (WITH t AS (SELECT 1 AS c) SELECT c FROM t) AS subq", + "presto": "SELECT * FROM (WITH t AS (SELECT 1 AS c) SELECT c FROM t) AS subq", + "redshift": "SELECT * FROM (WITH t AS (SELECT 1 AS c) SELECT c FROM t) AS subq", + "snowflake": "SELECT * FROM (WITH t AS (SELECT 1 AS c) SELECT c FROM t) AS subq", + "spark": "SELECT * FROM (WITH t AS (SELECT 1 AS c) SELECT c FROM t) AS subq", + "spark2": "WITH t AS (SELECT 1 AS c) SELECT * FROM (SELECT c FROM t) AS subq", + "sqlite": "SELECT * FROM (WITH t AS (SELECT 1 AS c) SELECT c FROM t) AS subq", + "trino": "SELECT * FROM (WITH t AS (SELECT 1 AS c) SELECT c FROM t) AS subq", + "tsql": "WITH t AS (SELECT 1 AS c) SELECT * FROM (SELECT c FROM t) AS subq", + }, + ) + self.validate_all( + "SELECT * FROM (SELECT * FROM (WITH t AS (SELECT 1 AS c) SELECT c FROM t) AS subq1) AS subq2", + write={ + "bigquery": "SELECT * FROM (SELECT * FROM (WITH t AS (SELECT 1 AS c) SELECT c FROM t) AS subq1) AS subq2", + "duckdb": "SELECT * FROM (SELECT * FROM (WITH t AS (SELECT 1 AS c) SELECT c FROM t) AS subq1) AS subq2", + "hive": "WITH t AS (SELECT 1 AS c) SELECT * FROM (SELECT * FROM (SELECT c FROM t) AS subq1) AS subq2", + "tsql": "WITH t AS (SELECT 1 AS c) SELECT * FROM (SELECT * FROM (SELECT c FROM t) AS subq1) AS subq2", + }, + ) + self.validate_all( + "WITH t1(x) AS (SELECT 1) SELECT * FROM (WITH t2(y) AS (SELECT 2) SELECT y FROM t2) AS subq", + write={ + "duckdb": "WITH t1(x) AS (SELECT 1) SELECT * FROM (WITH t2(y) AS (SELECT 2) SELECT y FROM t2) AS subq", + "tsql": "WITH t1(x) AS (SELECT 1), t2(y) AS (SELECT 2) SELECT * FROM (SELECT y FROM t2) AS subq", + }, + ) diff --git a/tests/dialects/test_hive.py b/tests/dialects/test_hive.py index 26f0189..ba95442 100644 --- a/tests/dialects/test_hive.py +++ b/tests/dialects/test_hive.py @@ -429,6 +429,21 @@ class TestHive(Validator): self.validate_identity( "SELECT key, value, GROUPING__ID, COUNT(*) FROM T1 GROUP BY key, value WITH ROLLUP" ) + + self.validate_all( + "SET hiveconf:some_var = 5", + write={ + "hive": "SET hiveconf:some_var = 5", + "spark": "SET hiveconf:some_var = 5", + }, + ) + self.validate_all( + "SELECT ${hiveconf:some_var}", + write={ + "hive": "SELECT ${hiveconf:some_var}", + "spark": "SELECT ${hiveconf:some_var}", + }, + ) self.validate_all( "SELECT A.1a AS b FROM test_a AS A", write={ diff --git a/tests/dialects/test_mysql.py b/tests/dialects/test_mysql.py index dce2b9d..3c165a3 100644 --- a/tests/dialects/test_mysql.py +++ b/tests/dialects/test_mysql.py @@ -564,8 +564,22 @@ class TestMySQL(Validator): "STR_TO_DATE(x, '%M')", read={"": "TS_OR_DS_TO_DATE(x, '%B')"}, ) + self.validate_all( + "STR_TO_DATE(x, '%Y-%m-%d')", + write={"presto": "CAST(DATE_PARSE(x, '%Y-%m-%d') AS DATE)"}, + ) + self.validate_all( + "STR_TO_DATE(x, '%Y-%m-%dT%T')", write={"presto": "DATE_PARSE(x, '%Y-%m-%dT%T')"} + ) def test_mysql(self): + self.validate_all( + # MySQL doesn't support FULL OUTER joins + "SELECT * FROM t1 LEFT OUTER JOIN t2 ON t1.x = t2.x UNION SELECT * FROM t1 RIGHT OUTER JOIN t2 ON t1.x = t2.x", + read={ + "postgres": "SELECT * FROM t1 FULL OUTER JOIN t2 ON t1.x = t2.x", + }, + ) self.validate_all( "a XOR b", read={ diff --git a/tests/dialects/test_postgres.py b/tests/dialects/test_postgres.py index 3121cb0..0e5f1a1 100644 --- a/tests/dialects/test_postgres.py +++ b/tests/dialects/test_postgres.py @@ -9,22 +9,39 @@ class TestPostgres(Validator): dialect = "postgres" def test_ddl(self): - self.validate_identity( - "CREATE INDEX foo ON bar.baz USING btree(col1 varchar_pattern_ops ASC, col2)" - ) - self.validate_identity( - "CREATE TABLE test (x TIMESTAMP WITHOUT TIME ZONE[][])", - "CREATE TABLE test (x TIMESTAMP[][])", - ) self.validate_identity("CREATE INDEX idx_x ON x USING BTREE(x, y) WHERE (NOT y IS NULL)") self.validate_identity("CREATE TABLE test (elems JSONB[])") self.validate_identity("CREATE TABLE public.y (x TSTZRANGE NOT NULL)") self.validate_identity("CREATE TABLE test (foo HSTORE)") self.validate_identity("CREATE TABLE test (foo JSONB)") self.validate_identity("CREATE TABLE test (foo VARCHAR(64)[])") + self.validate_identity("CREATE TABLE test (foo INT) PARTITION BY HASH(foo)") self.validate_identity("INSERT INTO x VALUES (1, 'a', 2.0) RETURNING a") self.validate_identity("INSERT INTO x VALUES (1, 'a', 2.0) RETURNING a, b") self.validate_identity("INSERT INTO x VALUES (1, 'a', 2.0) RETURNING *") + self.validate_identity("UPDATE tbl_name SET foo = 123 RETURNING a") + self.validate_identity("CREATE TABLE cities_partdef PARTITION OF cities DEFAULT") + self.validate_identity( + "CREATE TABLE cust_part3 PARTITION OF customers FOR VALUES WITH (MODULUS 3, REMAINDER 2)" + ) + self.validate_identity( + "CREATE TABLE measurement_y2016m07 PARTITION OF measurement (unitsales DEFAULT 0) FOR VALUES FROM ('2016-07-01') TO ('2016-08-01')" + ) + self.validate_identity( + "CREATE TABLE measurement_ym_older PARTITION OF measurement_year_month FOR VALUES FROM (MINVALUE, MINVALUE) TO (2016, 11)" + ) + self.validate_identity( + "CREATE TABLE measurement_ym_y2016m11 PARTITION OF measurement_year_month FOR VALUES FROM (2016, 11) TO (2016, 12)" + ) + self.validate_identity( + "CREATE TABLE cities_ab PARTITION OF cities (CONSTRAINT city_id_nonzero CHECK (city_id <> 0)) FOR VALUES IN ('a', 'b')" + ) + self.validate_identity( + "CREATE TABLE cities_ab PARTITION OF cities (CONSTRAINT city_id_nonzero CHECK (city_id <> 0)) FOR VALUES IN ('a', 'b') PARTITION BY RANGE(population)" + ) + self.validate_identity( + "CREATE INDEX foo ON bar.baz USING btree(col1 varchar_pattern_ops ASC, col2)" + ) self.validate_identity( "INSERT INTO x VALUES (1, 'a', 2.0) ON CONFLICT (id) DO NOTHING RETURNING *" ) @@ -43,7 +60,10 @@ class TestPostgres(Validator): self.validate_identity( "DELETE FROM event USING sales AS s WHERE event.eventid = s.eventid RETURNING a" ) - self.validate_identity("UPDATE tbl_name SET foo = 123 RETURNING a") + self.validate_identity( + "CREATE TABLE test (x TIMESTAMP WITHOUT TIME ZONE[][])", + "CREATE TABLE test (x TIMESTAMP[][])", + ) self.validate_all( "CREATE OR REPLACE FUNCTION function_name (input_a character varying DEFAULT NULL::character varying)", @@ -187,6 +207,15 @@ class TestPostgres(Validator): self.validate_identity( """ALTER TABLE ONLY "Album" ADD CONSTRAINT "FK_AlbumArtistId" FOREIGN KEY ("ArtistId") REFERENCES "Artist" ("ArtistId") ON DELETE RESTRICT""" ) + self.validate_identity( + "SELECT * FROM JSON_ARRAY_ELEMENTS('[1,true, [2,false]]') WITH ORDINALITY" + ) + self.validate_identity( + "SELECT * FROM JSON_ARRAY_ELEMENTS('[1,true, [2,false]]') WITH ORDINALITY AS kv_json" + ) + self.validate_identity( + "SELECT * FROM JSON_ARRAY_ELEMENTS('[1,true, [2,false]]') WITH ORDINALITY AS kv_json(a, b)" + ) self.validate_identity("x @@ y") self.validate_identity("CAST(x AS MONEY)") self.validate_identity("CAST(x AS INT4RANGE)") diff --git a/tests/dialects/test_redshift.py b/tests/dialects/test_redshift.py index c848010..3e42525 100644 --- a/tests/dialects/test_redshift.py +++ b/tests/dialects/test_redshift.py @@ -6,10 +6,26 @@ class TestRedshift(Validator): dialect = "redshift" def test_redshift(self): + self.validate_identity( + "SELECT DATE_DIFF('month', CAST('2020-02-29 00:00:00' AS TIMESTAMP), CAST('2020-03-02 00:00:00' AS TIMESTAMP))", + "SELECT DATEDIFF(month, CAST(CAST('2020-02-29 00:00:00' AS TIMESTAMP) AS DATE), CAST(CAST('2020-03-02 00:00:00' AS TIMESTAMP) AS DATE))", + ) self.validate_identity( "SELECT * FROM x WHERE y = DATEADD('month', -1, DATE_TRUNC('month', (SELECT y FROM #temp_table)))", "SELECT * FROM x WHERE y = DATEADD(month, -1, CAST(DATE_TRUNC('month', (SELECT y FROM #temp_table)) AS DATE))", ) + + self.validate_all( + "LISTAGG(sellerid, ', ')", + read={ + "duckdb": "STRING_AGG(sellerid, ', ')", + }, + write={ + # GROUP_CONCAT and STRING_AGG are aliases in DuckDB + "duckdb": "GROUP_CONCAT(sellerid, ', ')", + "redshift": "LISTAGG(sellerid, ', ')", + }, + ) self.validate_all( "SELECT APPROXIMATE COUNT(DISTINCT y)", read={ diff --git a/tests/dialects/test_snowflake.py b/tests/dialects/test_snowflake.py index 65b77ea..2cad1d2 100644 --- a/tests/dialects/test_snowflake.py +++ b/tests/dialects/test_snowflake.py @@ -13,6 +13,7 @@ class TestSnowflake(Validator): expr.selects[0].assert_is(exp.AggFunc) self.assertEqual(expr.sql(dialect="snowflake"), "SELECT APPROX_TOP_K(C4, 3, 5) FROM t") + self.validate_identity("SELECT OBJECT_CONSTRUCT()") self.validate_identity("SELECT DAYOFMONTH(CURRENT_TIMESTAMP())") self.validate_identity("SELECT DAYOFYEAR(CURRENT_TIMESTAMP())") self.validate_identity("LISTAGG(data['some_field'], ',')") @@ -77,6 +78,10 @@ class TestSnowflake(Validator): "SELECT {fn CEILING(5.3)}", "SELECT CEIL(5.3)", ) + self.validate_identity( + "SELECT TO_TIMESTAMP(x) FROM t", + "SELECT CAST(x AS TIMESTAMPNTZ) FROM t", + ) self.validate_all("CAST(x AS BYTEINT)", write={"snowflake": "CAST(x AS INT)"}) self.validate_all("CAST(x AS CHAR VARYING)", write={"snowflake": "CAST(x AS VARCHAR)"}) diff --git a/tests/dialects/test_spark.py b/tests/dialects/test_spark.py index e08915b..841a005 100644 --- a/tests/dialects/test_spark.py +++ b/tests/dialects/test_spark.py @@ -592,9 +592,10 @@ TBLPROPERTIES ( self.validate_all( "INSERT OVERWRITE TABLE table WITH cte AS (SELECT cola FROM other_table) SELECT cola FROM cte", write={ - "spark": "WITH cte AS (SELECT cola FROM other_table) INSERT OVERWRITE TABLE table SELECT cola FROM cte", + "databricks": "INSERT OVERWRITE TABLE table WITH cte AS (SELECT cola FROM other_table) SELECT cola FROM cte", + "hive": "WITH cte AS (SELECT cola FROM other_table) INSERT OVERWRITE TABLE table SELECT cola FROM cte", + "spark": "INSERT OVERWRITE TABLE table WITH cte AS (SELECT cola FROM other_table) SELECT cola FROM cte", "spark2": "WITH cte AS (SELECT cola FROM other_table) INSERT OVERWRITE TABLE table SELECT cola FROM cte", - "databricks": "WITH cte AS (SELECT cola FROM other_table) INSERT OVERWRITE TABLE table SELECT cola FROM cte", }, ) diff --git a/tests/dialects/test_teradata.py b/tests/dialects/test_teradata.py index b5c0fe8..14703c4 100644 --- a/tests/dialects/test_teradata.py +++ b/tests/dialects/test_teradata.py @@ -148,18 +148,21 @@ class TestTeradata(Validator): self.validate_all("a MOD b", write={"teradata": "a MOD b", "mysql": "a % b"}) def test_abbrev(self): - self.validate_all("a LT b", write={"teradata": "a < b"}) - self.validate_all("a LE b", write={"teradata": "a <= b"}) - self.validate_all("a GT b", write={"teradata": "a > b"}) - self.validate_all("a GE b", write={"teradata": "a >= b"}) - self.validate_all("a ^= b", write={"teradata": "a <> b"}) - self.validate_all("a NE b", write={"teradata": "a <> b"}) - self.validate_all("a NOT= b", write={"teradata": "a <> b"}) - - self.validate_all( - "SEL a FROM b", - write={"teradata": "SELECT a FROM b"}, - ) + self.validate_identity("a LT b", "a < b") + self.validate_identity("a LE b", "a <= b") + self.validate_identity("a GT b", "a > b") + self.validate_identity("a GE b", "a >= b") + self.validate_identity("a ^= b", "a <> b") + self.validate_identity("a NE b", "a <> b") + self.validate_identity("a NOT= b", "a <> b") + self.validate_identity("a EQ b", "a = b") + self.validate_identity("SEL a FROM b", "SELECT a FROM b") + self.validate_identity( + "SELECT col1, col2 FROM dbc.table1 WHERE col1 EQ 'value1' MINUS SELECT col1, col2 FROM dbc.table2", + "SELECT col1, col2 FROM dbc.table1 WHERE col1 = 'value1' EXCEPT SELECT col1, col2 FROM dbc.table2", + ) + self.validate_identity("UPD a SET b = 1", "UPDATE a SET b = 1") + self.validate_identity("DEL FROM a", "DELETE FROM a") def test_datatype(self): self.validate_all( @@ -187,3 +190,4 @@ class TestTeradata(Validator): "": "STR_TO_DATE('1992-01', '%Y-%d')", }, ) + self.validate_identity("CAST('1992-01' AS FORMAT 'YYYY-DD')") diff --git a/tests/dialects/test_tsql.py b/tests/dialects/test_tsql.py index 4775020..0ac94f2 100644 --- a/tests/dialects/test_tsql.py +++ b/tests/dialects/test_tsql.py @@ -6,6 +6,36 @@ class TestTSQL(Validator): dialect = "tsql" def test_tsql(self): + self.validate_all( + "WITH t(c) AS (SELECT 1) SELECT * INTO foo FROM (SELECT c FROM t) AS temp", + read={ + "duckdb": "CREATE TABLE foo AS WITH t(c) AS (SELECT 1) SELECT c FROM t", + }, + ) + self.validate_all( + "WITH y AS (SELECT 2 AS c) INSERT INTO t SELECT * FROM y", + read={ + "duckdb": "WITH y AS (SELECT 2 AS c) INSERT INTO t SELECT * FROM y", + }, + ) + self.validate_all( + "WITH t(c) AS (SELECT 1) SELECT 1 AS c UNION (SELECT c FROM t)", + read={ + "duckdb": "SELECT 1 AS c UNION (WITH t(c) AS (SELECT 1) SELECT c FROM t)", + }, + ) + self.validate_all( + "WITH t(c) AS (SELECT 1) MERGE INTO x AS z USING (SELECT c FROM t) AS y ON a = b WHEN MATCHED THEN UPDATE SET a = y.b", + read={ + "postgres": "MERGE INTO x AS z USING (WITH t(c) AS (SELECT 1) SELECT c FROM t) AS y ON a = b WHEN MATCHED THEN UPDATE SET a = y.b", + }, + ) + self.validate_all( + "WITH t(n) AS (SELECT 1 AS n UNION ALL SELECT n + 1 AS n FROM t WHERE n < 4) SELECT * FROM (SELECT SUM(n) AS s4 FROM t) AS subq", + read={ + "duckdb": "SELECT * FROM (WITH RECURSIVE t(n) AS (SELECT 1 AS n UNION ALL SELECT n + 1 AS n FROM t WHERE n < 4) SELECT SUM(n) AS s4 FROM t) AS subq", + }, + ) self.validate_all( "CREATE TABLE #mytemptable (a INTEGER)", read={ @@ -825,12 +855,18 @@ WHERE def test_datename(self): self.validate_all( - "SELECT DATENAME(mm,'1970-01-01')", - write={"spark": "SELECT DATE_FORMAT(CAST('1970-01-01' AS TIMESTAMP), 'MMMM')"}, + "SELECT DATENAME(mm, '1970-01-01')", + write={ + "spark": "SELECT DATE_FORMAT(CAST('1970-01-01' AS TIMESTAMP), 'MMMM')", + "tsql": "SELECT FORMAT(CAST('1970-01-01' AS DATETIME2), 'MMMM')", + }, ) self.validate_all( - "SELECT DATENAME(dw,'1970-01-01')", - write={"spark": "SELECT DATE_FORMAT(CAST('1970-01-01' AS TIMESTAMP), 'EEEE')"}, + "SELECT DATENAME(dw, '1970-01-01')", + write={ + "spark": "SELECT DATE_FORMAT(CAST('1970-01-01' AS TIMESTAMP), 'EEEE')", + "tsql": "SELECT FORMAT(CAST('1970-01-01' AS DATETIME2), 'dddd')", + }, ) def test_datepart(self): diff --git a/tests/fixtures/optimizer/normalize.sql b/tests/fixtures/optimizer/normalize.sql index 12bc388..8e94c19 100644 --- a/tests/fixtures/optimizer/normalize.sql +++ b/tests/fixtures/optimizer/normalize.sql @@ -44,4 +44,4 @@ SELECT * FROM x WHERE (A AND B) OR C; SELECT * FROM x WHERE (A OR C) AND (B OR C); dt2 between '2022-01-01 12:00:00' and '2022-12-31' and dt2 >= '2022-05-01 12:00:00' or dt2 = '2021-06-01 12:00:00'; -(dt2 <= '2022-12-31' OR dt2 = '2021-06-01 12:00:00') AND (dt2 = '2021-06-01 12:00:00' OR dt2 >= '2022-01-01 12:00:00') AND (dt2 = '2021-06-01 12:00:00' OR dt2 >= '2022-05-01 12:00:00') +(dt2 <= '2022-12-31' OR dt2 = '2021-06-01 12:00:00') AND (dt2 = '2021-06-01 12:00:00' OR dt2 >= '2022-01-01 12:00:00') AND (dt2 = '2021-06-01 12:00:00' OR dt2 >= '2022-05-01 12:00:00'); diff --git a/tests/fixtures/optimizer/normalize_identifiers.sql b/tests/fixtures/optimizer/normalize_identifiers.sql index 4cb7dd1..7ddcc6d 100644 --- a/tests/fixtures/optimizer/normalize_identifiers.sql +++ b/tests/fixtures/optimizer/normalize_identifiers.sql @@ -70,3 +70,6 @@ SELECT a /* sqlglot.meta case_sensitive */, B FROM table /* sqlglot.meta case_se # dialect: redshift SELECT COALESCE(json_val.a /* sqlglot.meta case_sensitive */, json_val.A /* sqlglot.meta case_sensitive */) FROM table; SELECT COALESCE(json_val.a /* sqlglot.meta case_sensitive */, json_val.A /* sqlglot.meta case_sensitive */) FROM table; + +SELECT @X; +SELECT @X; diff --git a/tests/fixtures/optimizer/optimizer.sql b/tests/fixtures/optimizer/optimizer.sql index a812bc5..a9d6584 100644 --- a/tests/fixtures/optimizer/optimizer.sql +++ b/tests/fixtures/optimizer/optimizer.sql @@ -480,8 +480,8 @@ JOIN "company_table" AS "company_table_2" LEFT JOIN "unlocked" AS "unlocked" ON "company_table_2"."id" = "unlocked"."company_id" WHERE - CASE WHEN "unlocked"."company_id" IS NULL THEN 0 ELSE 1 END = FALSE - AND NOT "company_table_2"."id" IS NULL; + NOT "company_table_2"."id" IS NULL + AND CASE WHEN "unlocked"."company_id" IS NULL THEN 0 ELSE 1 END = FALSE; # title: db.table alias clash # execute: false diff --git a/tests/fixtures/optimizer/pushdown_predicates.sql b/tests/fixtures/optimizer/pushdown_predicates.sql index cfa69fb..9912b89 100644 --- a/tests/fixtures/optimizer/pushdown_predicates.sql +++ b/tests/fixtures/optimizer/pushdown_predicates.sql @@ -11,7 +11,7 @@ SELECT x.a FROM (SELECT * FROM x) AS x JOIN y WHERE (x.a = y.a AND x.a = 1 AND x SELECT x.a FROM (SELECT * FROM x) AS x JOIN y ON x.a = y.a WHERE TRUE; SELECT x.a FROM (SELECT * FROM x) AS x JOIN y WHERE (x.a = y.a AND x.a = 1 AND x.b = 1) OR x.a = y.b; -SELECT x.a FROM (SELECT * FROM x) AS x JOIN y ON x.a = y.a OR x.a = y.b WHERE (x.a = y.a AND x.a = 1 AND x.b = 1) OR x.a = y.b; +SELECT x.a FROM (SELECT * FROM x) AS x JOIN y ON x.a = y.a OR x.a = y.b WHERE (x.a = 1 AND x.a = y.a AND x.b = 1) OR x.a = y.b; SELECT x.a FROM (SELECT x.a AS a, x.b * 1 AS c FROM x) AS x WHERE x.c = 1; SELECT x.a FROM (SELECT x.a AS a, x.b * 1 AS c FROM x WHERE x.b * 1 = 1) AS x WHERE TRUE; @@ -26,10 +26,10 @@ SELECT x.a FROM x AS x JOIN (SELECT y.a FROM y AS y) AS y ON y.a = 1 AND x.a = y SELECT x.a FROM x AS x JOIN (SELECT y.a FROM y AS y WHERE y.a = 1) AS y ON x.a = y.a AND TRUE; SELECT x.a AS a FROM x AS x JOIN (SELECT * FROM y AS y) AS y ON y.a = 1 WHERE x.a = 1 AND x.b = 1 AND y.a = x.a; -SELECT x.a AS a FROM x AS x JOIN (SELECT * FROM y AS y WHERE y.a = 1) AS y ON y.a = x.a AND TRUE WHERE x.a = 1 AND x.b = 1 AND TRUE; +SELECT x.a AS a FROM x AS x JOIN (SELECT * FROM y AS y WHERE y.a = 1) AS y ON TRUE AND y.a = x.a WHERE x.a = 1 AND x.b = 1 AND TRUE; SELECT x.a AS a FROM x AS x CROSS JOIN (SELECT * FROM y AS y) AS y WHERE x.a = 1 AND x.b = 1 AND y.a = x.a AND y.a = 1; -SELECT x.a AS a FROM x AS x JOIN (SELECT * FROM y AS y WHERE y.a = 1) AS y ON y.a = x.a AND TRUE WHERE x.a = 1 AND x.b = 1 AND TRUE AND TRUE; +SELECT x.a AS a FROM x AS x JOIN (SELECT * FROM y AS y WHERE y.a = 1) AS y ON TRUE AND y.a = x.a WHERE x.a = 1 AND x.b = 1 AND TRUE AND TRUE; with t1 as (SELECT x.a, x.b, ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) as row_num FROM x) SELECT t1.a, t1.b FROM t1 WHERE row_num = 1; WITH t1 AS (SELECT x.a, x.b, ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) AS row_num FROM x) SELECT t1.a, t1.b FROM t1 WHERE row_num = 1; @@ -43,4 +43,4 @@ SELECT x.cnt AS cnt FROM (SELECT COUNT(1) AS cnt FROM x AS x HAVING COUNT(1) > 0 -- Pushdown predicate to HAVING (DNF) SELECT x.cnt AS cnt FROM (SELECT COUNT(1) AS cnt, COUNT(x.a) AS cnt_a, COUNT(x.b) AS cnt_b FROM x AS x) AS x WHERE (x.cnt_a > 0 AND x.cnt_b > 0) OR x.cnt > 0; -SELECT x.cnt AS cnt FROM (SELECT COUNT(1) AS cnt, COUNT(x.a) AS cnt_a, COUNT(x.b) AS cnt_b FROM x AS x HAVING COUNT(1) > 0 OR (COUNT(x.a) > 0 AND COUNT(x.b) > 0)) AS x WHERE x.cnt > 0 OR (x.cnt_a > 0 AND x.cnt_b > 0); \ No newline at end of file +SELECT x.cnt AS cnt FROM (SELECT COUNT(1) AS cnt, COUNT(x.a) AS cnt_a, COUNT(x.b) AS cnt_b FROM x AS x HAVING COUNT(1) > 0 OR (COUNT(x.a) > 0 AND COUNT(x.b) > 0)) AS x WHERE x.cnt > 0 OR (x.cnt_a > 0 AND x.cnt_b > 0); diff --git a/tests/fixtures/optimizer/qualify_tables.sql b/tests/fixtures/optimizer/qualify_tables.sql index 3717cd4..248d7e9 100644 --- a/tests/fixtures/optimizer/qualify_tables.sql +++ b/tests/fixtures/optimizer/qualify_tables.sql @@ -117,3 +117,7 @@ SELECT x FROM c.db.t AS t, LATERAL UNNEST(t.xs) AS x; # title: lateral unnest without alias SELECT x FROM t, LATERAL UNNEST(t.xs); SELECT x FROM c.db.t AS t, LATERAL UNNEST(t.xs) AS _q_0; + +# title: table with ordinality +SELECT * FROM t CROSS JOIN JSON_ARRAY_ELEMENTS(t.response) WITH ORDINALITY AS kv_json; +SELECT * FROM c.db.t AS t CROSS JOIN JSON_ARRAY_ELEMENTS(t.response) WITH ORDINALITY AS kv_json; diff --git a/tests/fixtures/optimizer/quote_identities.sql b/tests/fixtures/optimizer/quote_identities.sql index 407b7f6..d6cfbf8 100644 --- a/tests/fixtures/optimizer/quote_identities.sql +++ b/tests/fixtures/optimizer/quote_identities.sql @@ -6,3 +6,6 @@ SELECT "a" FROM "x"; SELECT x.a AS a FROM db.x; SELECT "x"."a" AS "a" FROM "db"."x"; + +SELECT @x; +SELECT @x; diff --git a/tests/fixtures/optimizer/simplify.sql b/tests/fixtures/optimizer/simplify.sql index c53a972..f50f688 100644 --- a/tests/fixtures/optimizer/simplify.sql +++ b/tests/fixtures/optimizer/simplify.sql @@ -631,19 +631,19 @@ COALESCE(x); x; COALESCE(x, 1) = 2; -x = 2 AND NOT x IS NULL; +NOT x IS NULL AND x = 2; 2 = COALESCE(x, 1); 2 = x AND NOT x IS NULL; COALESCE(x, 1, 1) = 1 + 1; -x = 2 AND NOT x IS NULL; +NOT x IS NULL AND x = 2; COALESCE(x, 1, 2) = 2; -x = 2 AND NOT x IS NULL; +NOT x IS NULL AND x = 2; COALESCE(x, 3) <= 2; -x <= 2 AND NOT x IS NULL; +NOT x IS NULL AND x <= 2; COALESCE(x, 1) <> 2; x <> 2 OR x IS NULL; @@ -872,7 +872,7 @@ x = 5 AND y = x; x = 5 AND y = 5; 5 = x AND y = x; -y = 5 AND 5 = x; +5 = x AND y = 5; x = 5 OR y = x; x = 5 OR y = x; @@ -899,13 +899,13 @@ x = 5 AND x + 3 = 8; x = 5; x = 5 AND (SELECT x FROM t WHERE y = 1); -x = 5 AND (SELECT x FROM t WHERE y = 1); +(SELECT x FROM t WHERE y = 1) AND x = 5; x = 1 AND y > 0 AND (SELECT z = 5 FROM t WHERE y = 1); -x = 1 AND y > 0 AND (SELECT z = 5 FROM t WHERE y = 1); +(SELECT z = 5 FROM t WHERE y = 1) AND x = 1 AND y > 0; x = 1 AND x = y AND (SELECT z FROM t WHERE a AND (b OR c)); -x = 1 AND (SELECT z FROM t WHERE a AND (b OR c)) AND 1 = y; +(SELECT z FROM t WHERE a AND (b OR c)) AND 1 = y AND x = 1; t1.a = 39 AND t2.b = t1.a AND t3.c = t2.b; t1.a = 39 AND t2.b = 39 AND t3.c = 39; @@ -920,10 +920,10 @@ x = 1 AND CASE x WHEN 5 THEN FALSE ELSE TRUE END; x = 1; x = y AND CASE WHEN x = 5 THEN FALSE ELSE TRUE END; -x = y AND CASE WHEN x = 5 THEN FALSE ELSE TRUE END; +CASE WHEN x = 5 THEN FALSE ELSE TRUE END AND x = y; x = 1 AND CASE WHEN y = 5 THEN x = z END; -x = 1 AND CASE WHEN y = 5 THEN 1 = z END; +CASE WHEN y = 5 THEN 1 = z END AND x = 1; -------------------------------------- -- Simplify Conditionals diff --git a/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql b/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql index 52ee12c..62f1d79 100644 --- a/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql +++ b/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql @@ -2547,8 +2547,8 @@ JOIN "date_dim" AS "date_dim" ) WHERE "_u_3"."_u_4" IS NULL - AND ARRAY_ANY("_u_0"."_u_2", "_x" -> "cs1"."cs_warehouse_sk" <> "_x") AND NOT "_u_0"."_u_1" IS NULL + AND ARRAY_ANY("_u_0"."_u_2", "_x" -> "cs1"."cs_warehouse_sk" <> "_x") ORDER BY COUNT(DISTINCT "cs1"."cs_order_number") LIMIT 100; @@ -12586,8 +12586,8 @@ JOIN "web_site" AS "web_site" AND "ws1"."ws_web_site_sk" = "web_site"."web_site_sk" WHERE "_u_3"."_u_4" IS NULL - AND ARRAY_ANY("_u_0"."_u_2", "_x" -> "ws1"."ws_warehouse_sk" <> "_x") AND NOT "_u_0"."_u_1" IS NULL + AND ARRAY_ANY("_u_0"."_u_2", "_x" -> "ws1"."ws_warehouse_sk" <> "_x") ORDER BY COUNT(DISTINCT "ws1"."ws_order_number") LIMIT 100; diff --git a/tests/fixtures/optimizer/tpc-h/tpc-h.sql b/tests/fixtures/optimizer/tpc-h/tpc-h.sql index 942295e..14d8b53 100644 --- a/tests/fixtures/optimizer/tpc-h/tpc-h.sql +++ b/tests/fixtures/optimizer/tpc-h/tpc-h.sql @@ -249,9 +249,9 @@ FROM "orders" AS "orders" LEFT JOIN "_u_0" AS "_u_0" ON "_u_0"."l_orderkey" = "orders"."o_orderkey" WHERE - CAST("orders"."o_orderdate" AS DATE) < CAST('1993-10-01' AS DATE) + NOT "_u_0"."l_orderkey" IS NULL + AND CAST("orders"."o_orderdate" AS DATE) < CAST('1993-10-01' AS DATE) AND CAST("orders"."o_orderdate" AS DATE) >= CAST('1993-07-01' AS DATE) - AND NOT "_u_0"."l_orderkey" IS NULL GROUP BY "orders"."o_orderpriority" ORDER BY @@ -1348,8 +1348,8 @@ WHERE "_u_2"."l_orderkey" IS NULL OR NOT ARRAY_ANY("_u_2"."_u_3", "_x" -> "_x" <> "l1"."l_suppkey") ) - AND ARRAY_ANY("_u_0"."_u_1", "_x" -> "_x" <> "l1"."l_suppkey") AND NOT "_u_0"."l_orderkey" IS NULL + AND ARRAY_ANY("_u_0"."_u_1", "_x" -> "_x" <> "l1"."l_suppkey") GROUP BY "supplier"."s_name" ORDER BY diff --git a/tests/test_expressions.py b/tests/test_expressions.py index 6c48943..1fbe2d7 100644 --- a/tests/test_expressions.py +++ b/tests/test_expressions.py @@ -443,7 +443,7 @@ class TestExpressions(unittest.TestCase): return None return node - self.assertEqual(expression.transform(remove_non_list_arg).sql(), "CAST(x AS )") + self.assertEqual(expression.transform(remove_non_list_arg).sql(), "CAST(x AS)") expression = parse_one("SELECT a, b FROM x") diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py index 8f5dd08..fd95577 100644 --- a/tests/test_optimizer.py +++ b/tests/test_optimizer.py @@ -246,6 +246,15 @@ class TestOptimizer(unittest.TestCase): "CREATE FUNCTION `udfs`.`myTest`(`x` FLOAT64) AS (1)", ) + self.assertEqual( + optimizer.qualify.qualify( + parse_one("SELECT `bar_bazfoo_$id` FROM test", read="spark"), + schema={"test": {"bar_bazFoo_$id": "BIGINT"}}, + dialect="spark", + ).sql(dialect="spark"), + "SELECT `test`.`bar_bazfoo_$id` AS `bar_bazfoo_$id` FROM `test` AS `test`", + ) + self.check_file( "qualify_columns", qualify_columns, execute=True, schema=self.schema, set_dialect=True ) @@ -271,6 +280,8 @@ class TestOptimizer(unittest.TestCase): set_dialect=True, ) + self.assertEqual(optimizer.normalize_identifiers.normalize_identifiers("a%").sql(), '"a%"') + def test_pushdown_projection(self): self.check_file("pushdown_projections", pushdown_projections, schema=self.schema) @@ -978,3 +989,10 @@ FROM READ_CSV('tests/fixtures/optimizer/tpc-h/nation.csv.gz', 'delimiter', '|') query = parse_one("select a.b:c from d", read="snowflake") qualified = optimizer.qualify.qualify(query) self.assertEqual(qualified.expressions[0].alias, "c") + + def test_qualify_tables_no_schema(self): + query = parse_one("select a from b") + self.assertEqual( + optimizer.qualify_tables.qualify_tables(query, catalog="catalog").sql(), + "SELECT a FROM b AS b", + ) -- cgit v1.2.3