From 5a674d94c3ab243e2dd6a00f9edf6cc50b018512 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 30 Sep 2022 07:07:28 +0200 Subject: Merging upstream version 6.2.6. Signed-off-by: Daniel Baumann --- tests/dialects/test_bigquery.py | 21 ++ tests/dialects/test_dialect.py | 17 +- tests/dialects/test_duckdb.py | 8 + tests/dialects/test_hive.py | 10 +- tests/dialects/test_mysql.py | 36 +++ tests/dialects/test_presto.py | 27 +- tests/dialects/test_snowflake.py | 73 ++++++ tests/dialects/test_spark.py | 14 +- tests/dialects/test_tsql.py | 18 ++ tests/fixtures/identity.sql | 24 +- tests/fixtures/optimizer/eliminate_subqueries.sql | 121 ++++++--- tests/fixtures/optimizer/merge_derived_tables.sql | 63 ----- tests/fixtures/optimizer/merge_subqueries.sql | 99 +++++++ tests/fixtures/optimizer/optimizer.sql | 34 ++- tests/fixtures/optimizer/qualify_columns.sql | 10 + .../optimizer/qualify_columns__invalid.sql | 1 - tests/fixtures/optimizer/tpc-h/tpc-h.sql | 290 +++++++++++---------- tests/fixtures/pretty.sql | 19 -- tests/test_build.py | 14 +- tests/test_expressions.py | 33 ++- tests/test_optimizer.py | 125 ++++++--- tests/test_parser.py | 5 + tests/test_transforms.py | 25 +- 23 files changed, 737 insertions(+), 350 deletions(-) delete mode 100644 tests/fixtures/optimizer/merge_derived_tables.sql create mode 100644 tests/fixtures/optimizer/merge_subqueries.sql (limited to 'tests') diff --git a/tests/dialects/test_bigquery.py b/tests/dialects/test_bigquery.py index 1337c3d..c929e59 100644 --- a/tests/dialects/test_bigquery.py +++ b/tests/dialects/test_bigquery.py @@ -236,3 +236,24 @@ class TestBigQuery(Validator): "snowflake": "SELECT a FROM test WHERE a = 1 GROUP BY a HAVING a = 2 QUALIFY z ORDER BY a NULLS FIRST LIMIT 10", }, ) + self.validate_all( + "SELECT cola, colb FROM (VALUES (1, 'test')) AS tab(cola, colb)", + write={ + "spark": "SELECT cola, colb FROM (VALUES (1, 'test')) AS tab(cola, colb)", + "bigquery": "SELECT cola, colb FROM UNNEST([STRUCT(1 AS cola, 'test' AS colb)])", + "snowflake": "SELECT cola, colb FROM (VALUES (1, 'test')) AS tab(cola, colb)", + }, + ) + self.validate_all( + "SELECT * FROM (SELECT a, b, c FROM test) PIVOT(SUM(b) d, COUNT(*) e FOR c IN ('x', 'y'))", + write={ + "bigquery": "SELECT * FROM (SELECT a, b, c FROM test) PIVOT(SUM(b) AS d, COUNT(*) AS e FOR c IN ('x', 'y'))", + }, + ) + + def test_user_defined_functions(self): + self.validate_identity( + "CREATE TEMPORARY FUNCTION a(x FLOAT64, y FLOAT64) RETURNS FLOAT64 LANGUAGE js AS 'return x*y;'" + ) + self.validate_identity("CREATE TEMPORARY FUNCTION a(x FLOAT64, y FLOAT64) AS ((x + 4) / y)") + self.validate_identity("CREATE TABLE FUNCTION a(x INT64) RETURNS TABLE AS SELECT s, t") diff --git a/tests/dialects/test_dialect.py b/tests/dialects/test_dialect.py index 4e0a3c6..e0ec824 100644 --- a/tests/dialects/test_dialect.py +++ b/tests/dialects/test_dialect.py @@ -13,9 +13,6 @@ from sqlglot import ( class Validator(unittest.TestCase): dialect = None - def validate(self, sql, target, **kwargs): - self.assertEqual(transpile(sql, **kwargs)[0], target) - def validate_identity(self, sql): self.assertEqual(transpile(sql, read=self.dialect, write=self.dialect)[0], sql) @@ -258,6 +255,7 @@ class TestDialect(Validator): "duckdb": "EPOCH(STRPTIME('2020-01-01', '%Y-%M-%d'))", "hive": "UNIX_TIMESTAMP('2020-01-01', 'yyyy-mm-dd')", "presto": "TO_UNIXTIME(DATE_PARSE('2020-01-01', '%Y-%i-%d'))", + "starrocks": "UNIX_TIMESTAMP('2020-01-01', '%Y-%i-%d')", }, ) self.validate_all( @@ -266,6 +264,7 @@ class TestDialect(Validator): "duckdb": "CAST('2020-01-01' AS DATE)", "hive": "TO_DATE('2020-01-01')", "presto": "DATE_PARSE('2020-01-01', '%Y-%m-%d %H:%i:%s')", + "starrocks": "TO_DATE('2020-01-01')", }, ) self.validate_all( @@ -341,6 +340,7 @@ class TestDialect(Validator): "duckdb": "STRFTIME(TO_TIMESTAMP(CAST(x AS BIGINT)), y)", "hive": "FROM_UNIXTIME(x, y)", "presto": "DATE_FORMAT(FROM_UNIXTIME(x), y)", + "starrocks": "FROM_UNIXTIME(x, y)", }, ) self.validate_all( @@ -349,6 +349,7 @@ class TestDialect(Validator): "duckdb": "TO_TIMESTAMP(CAST(x AS BIGINT))", "hive": "FROM_UNIXTIME(x)", "presto": "FROM_UNIXTIME(x)", + "starrocks": "FROM_UNIXTIME(x)", }, ) self.validate_all( @@ -840,10 +841,20 @@ class TestDialect(Validator): "starrocks": UnsupportedError, }, ) + self.validate_all( + "POSITION(' ' in x)", + write={ + "duckdb": "STRPOS(x, ' ')", + "postgres": "STRPOS(x, ' ')", + "presto": "STRPOS(x, ' ')", + "spark": "LOCATE(' ', x)", + }, + ) self.validate_all( "STR_POSITION(x, 'a')", write={ "duckdb": "STRPOS(x, 'a')", + "postgres": "STRPOS(x, 'a')", "presto": "STRPOS(x, 'a')", "spark": "LOCATE('a', x)", }, diff --git a/tests/dialects/test_duckdb.py b/tests/dialects/test_duckdb.py index f52decb..96e51df 100644 --- a/tests/dialects/test_duckdb.py +++ b/tests/dialects/test_duckdb.py @@ -1,3 +1,4 @@ +from sqlglot import ErrorLevel, UnsupportedError, transpile from tests.dialects.test_dialect import Validator @@ -250,3 +251,10 @@ class TestDuckDB(Validator): "spark": "MONTH('2021-03-01')", }, ) + + with self.assertRaises(UnsupportedError): + transpile( + "SELECT a FROM b PIVOT(SUM(x) FOR y IN ('z', 'q'))", + read="duckdb", + unsupported_level=ErrorLevel.IMMEDIATE, + ) diff --git a/tests/dialects/test_hive.py b/tests/dialects/test_hive.py index a9b5168..d335921 100644 --- a/tests/dialects/test_hive.py +++ b/tests/dialects/test_hive.py @@ -127,17 +127,17 @@ class TestHive(Validator): def test_ddl(self): self.validate_all( - "CREATE TABLE test STORED AS parquet TBLPROPERTIES ('x' = '1', 'Z' = '2') AS SELECT 1", + "CREATE TABLE test STORED AS parquet TBLPROPERTIES ('x'='1', 'Z'='2') AS SELECT 1", write={ - "presto": "CREATE TABLE test WITH (FORMAT = 'parquet', x = '1', Z = '2') AS SELECT 1", - "hive": "CREATE TABLE test STORED AS PARQUET TBLPROPERTIES ('x' = '1', 'Z' = '2') AS SELECT 1", - "spark": "CREATE TABLE test USING PARQUET TBLPROPERTIES ('x' = '1', 'Z' = '2') AS SELECT 1", + "presto": "CREATE TABLE test WITH (FORMAT='parquet', x='1', Z='2') AS SELECT 1", + "hive": "CREATE TABLE test STORED AS PARQUET TBLPROPERTIES ('x'='1', 'Z'='2') AS SELECT 1", + "spark": "CREATE TABLE test USING PARQUET TBLPROPERTIES ('x'='1', 'Z'='2') AS SELECT 1", }, ) self.validate_all( "CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)", write={ - "presto": "CREATE TABLE x (w VARCHAR, y INTEGER, z INTEGER) WITH (PARTITIONED_BY = ARRAY['y', 'z'])", + "presto": "CREATE TABLE x (w VARCHAR, y INTEGER, z INTEGER) WITH (PARTITIONED_BY=ARRAY['y', 'z'])", "hive": "CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)", "spark": "CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)", }, diff --git a/tests/dialects/test_mysql.py b/tests/dialects/test_mysql.py index 87a3d64..02dc1ad 100644 --- a/tests/dialects/test_mysql.py +++ b/tests/dialects/test_mysql.py @@ -119,3 +119,39 @@ class TestMySQL(Validator): "sqlite": "GROUP_CONCAT(DISTINCT x ORDER BY y DESC, '')", }, ) + self.validate_identity( + "CREATE TABLE z (a INT) ENGINE=InnoDB AUTO_INCREMENT=1 CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x'" + ) + self.validate_identity( + "CREATE TABLE z (a INT) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x'" + ) + self.validate_identity( + "CREATE TABLE z (a INT DEFAULT NULL, PRIMARY KEY(a)) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x'" + ) + + self.validate_all( + """ + CREATE TABLE `t_customer_account` ( + "id" int(11) NOT NULL AUTO_INCREMENT, + "customer_id" int(11) DEFAULT NULL COMMENT '客户id', + "bank" varchar(100) COLLATE utf8_bin DEFAULT NULL COMMENT '行别', + "account_no" varchar(100) COLLATE utf8_bin DEFAULT NULL COMMENT '账号', + PRIMARY KEY ("id") + ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='客户账户表' + """, + write={ + "mysql": """CREATE TABLE `t_customer_account` ( + 'id' INT(11) NOT NULL AUTO_INCREMENT, + 'customer_id' INT(11) DEFAULT NULL COMMENT '客户id', + 'bank' VARCHAR(100) COLLATE utf8_bin DEFAULT NULL COMMENT '行别', + 'account_no' VARCHAR(100) COLLATE utf8_bin DEFAULT NULL COMMENT '账号', + PRIMARY KEY('id') +) +ENGINE=InnoDB +AUTO_INCREMENT=1 +DEFAULT CHARACTER SET=utf8 +COLLATE=utf8_bin +COMMENT='客户账户表'""" + }, + pretty=True, + ) diff --git a/tests/dialects/test_presto.py b/tests/dialects/test_presto.py index 96c299d..b0d9ad9 100644 --- a/tests/dialects/test_presto.py +++ b/tests/dialects/test_presto.py @@ -171,7 +171,7 @@ class TestPresto(Validator): self.validate_all( "CREATE TABLE test WITH (FORMAT = 'PARQUET') AS SELECT 1", write={ - "presto": "CREATE TABLE test WITH (FORMAT = 'PARQUET') AS SELECT 1", + "presto": "CREATE TABLE test WITH (FORMAT='PARQUET') AS SELECT 1", "hive": "CREATE TABLE test STORED AS PARQUET AS SELECT 1", "spark": "CREATE TABLE test USING PARQUET AS SELECT 1", }, @@ -179,15 +179,15 @@ class TestPresto(Validator): self.validate_all( "CREATE TABLE test WITH (FORMAT = 'PARQUET', X = '1', Z = '2') AS SELECT 1", write={ - "presto": "CREATE TABLE test WITH (FORMAT = 'PARQUET', X = '1', Z = '2') AS SELECT 1", - "hive": "CREATE TABLE test STORED AS PARQUET TBLPROPERTIES ('X' = '1', 'Z' = '2') AS SELECT 1", - "spark": "CREATE TABLE test USING PARQUET TBLPROPERTIES ('X' = '1', 'Z' = '2') AS SELECT 1", + "presto": "CREATE TABLE test WITH (FORMAT='PARQUET', X='1', Z='2') AS SELECT 1", + "hive": "CREATE TABLE test STORED AS PARQUET TBLPROPERTIES ('X'='1', 'Z'='2') AS SELECT 1", + "spark": "CREATE TABLE test USING PARQUET TBLPROPERTIES ('X'='1', 'Z'='2') AS SELECT 1", }, ) self.validate_all( - "CREATE TABLE x (w VARCHAR, y INTEGER, z INTEGER) WITH (PARTITIONED_BY = ARRAY['y', 'z'])", + "CREATE TABLE x (w VARCHAR, y INTEGER, z INTEGER) WITH (PARTITIONED_BY=ARRAY['y', 'z'])", write={ - "presto": "CREATE TABLE x (w VARCHAR, y INTEGER, z INTEGER) WITH (PARTITIONED_BY = ARRAY['y', 'z'])", + "presto": "CREATE TABLE x (w VARCHAR, y INTEGER, z INTEGER) WITH (PARTITIONED_BY=ARRAY['y', 'z'])", "hive": "CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)", "spark": "CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)", }, @@ -195,9 +195,9 @@ class TestPresto(Validator): self.validate_all( "CREATE TABLE x WITH (bucket_by = ARRAY['y'], bucket_count = 64) AS SELECT 1 AS y", write={ - "presto": "CREATE TABLE x WITH (bucket_by = ARRAY['y'], bucket_count = 64) AS SELECT 1 AS y", - "hive": "CREATE TABLE x TBLPROPERTIES ('bucket_by' = ARRAY('y'), 'bucket_count' = 64) AS SELECT 1 AS y", - "spark": "CREATE TABLE x TBLPROPERTIES ('bucket_by' = ARRAY('y'), 'bucket_count' = 64) AS SELECT 1 AS y", + "presto": "CREATE TABLE x WITH (bucket_by=ARRAY['y'], bucket_count=64) AS SELECT 1 AS y", + "hive": "CREATE TABLE x TBLPROPERTIES ('bucket_by'=ARRAY('y'), 'bucket_count'=64) AS SELECT 1 AS y", + "spark": "CREATE TABLE x TBLPROPERTIES ('bucket_by'=ARRAY('y'), 'bucket_count'=64) AS SELECT 1 AS y", }, ) self.validate_all( @@ -217,11 +217,12 @@ class TestPresto(Validator): }, ) - self.validate( + self.validate_all( "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname", - "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname, lname", - read="presto", - write="presto", + write={ + "presto": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname, lname", + "spark": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname NULLS LAST", + }, ) def test_quotes(self): diff --git a/tests/dialects/test_snowflake.py b/tests/dialects/test_snowflake.py index 165f8e2..b7e39a7 100644 --- a/tests/dialects/test_snowflake.py +++ b/tests/dialects/test_snowflake.py @@ -143,6 +143,31 @@ class TestSnowflake(Validator): "snowflake": r"SELECT 'a \' \\ \\t \\x21 z $ '", }, ) + self.validate_identity("SELECT REGEXP_LIKE(a, b, c)") + self.validate_all( + "SELECT RLIKE(a, b)", + write={ + "snowflake": "SELECT REGEXP_LIKE(a, b)", + }, + ) + self.validate_all( + "SELECT a FROM test SAMPLE BLOCK (0.5) SEED (42)", + write={ + "snowflake": "SELECT a FROM test TABLESAMPLE BLOCK (0.5) SEED (42)", + }, + ) + self.validate_all( + "SELECT a FROM test pivot", + write={ + "snowflake": "SELECT a FROM test AS pivot", + }, + ) + self.validate_all( + "SELECT a FROM test unpivot", + write={ + "snowflake": "SELECT a FROM test AS unpivot", + }, + ) def test_null_treatment(self): self.validate_all( @@ -220,3 +245,51 @@ class TestSnowflake(Validator): "snowflake": "SELECT EXTRACT(month FROM CAST(a AS DATETIME))", }, ) + + def test_semi_structured_types(self): + self.validate_identity("SELECT CAST(a AS VARIANT)") + self.validate_all( + "SELECT a::VARIANT", + write={ + "snowflake": "SELECT CAST(a AS VARIANT)", + "tsql": "SELECT CAST(a AS SQL_VARIANT)", + }, + ) + self.validate_identity("SELECT CAST(a AS ARRAY)") + self.validate_all( + "ARRAY_CONSTRUCT(0, 1, 2)", + write={ + "snowflake": "[0, 1, 2]", + "bigquery": "[0, 1, 2]", + "duckdb": "LIST_VALUE(0, 1, 2)", + "presto": "ARRAY[0, 1, 2]", + "spark": "ARRAY(0, 1, 2)", + }, + ) + self.validate_all( + "SELECT a::OBJECT", + write={ + "snowflake": "SELECT CAST(a AS OBJECT)", + }, + ) + + def test_ddl(self): + self.validate_identity( + "CREATE TABLE a (x DATE, y BIGINT) WITH (PARTITION BY (x), integration='q', auto_refresh=TRUE, file_format=(type = parquet))" + ) + self.validate_identity("CREATE MATERIALIZED VIEW a COMMENT='...' AS SELECT 1 FROM x") + + def test_user_defined_functions(self): + self.validate_all( + "CREATE FUNCTION a(x DATE, y BIGINT) RETURNS ARRAY LANGUAGE JAVASCRIPT AS $$ SELECT 1 $$", + write={ + "snowflake": "CREATE FUNCTION a(x DATE, y BIGINT) RETURNS ARRAY LANGUAGE JAVASCRIPT AS ' SELECT 1 '", + }, + ) + self.validate_all( + "CREATE FUNCTION a() RETURNS TABLE (b INT) AS 'SELECT 1'", + write={ + "snowflake": "CREATE FUNCTION a() RETURNS TABLE (b INT) AS 'SELECT 1'", + "bigquery": "CREATE TABLE FUNCTION a() RETURNS TABLE AS SELECT 1", + }, + ) diff --git a/tests/dialects/test_spark.py b/tests/dialects/test_spark.py index 22f6947..8377e47 100644 --- a/tests/dialects/test_spark.py +++ b/tests/dialects/test_spark.py @@ -34,7 +34,7 @@ class TestSpark(Validator): self.validate_all( "CREATE TABLE x USING ICEBERG PARTITIONED BY (MONTHS(y)) LOCATION 's3://z'", write={ - "presto": "CREATE TABLE x WITH (TABLE_FORMAT = 'ICEBERG', PARTITIONED_BY = ARRAY['MONTHS'])", + "presto": "CREATE TABLE x WITH (TABLE_FORMAT = 'ICEBERG', PARTITIONED_BY=ARRAY['MONTHS'])", "hive": "CREATE TABLE x USING ICEBERG PARTITIONED BY (MONTHS(y)) LOCATION 's3://z'", "spark": "CREATE TABLE x USING ICEBERG PARTITIONED BY (MONTHS(y)) LOCATION 's3://z'", }, @@ -42,7 +42,7 @@ class TestSpark(Validator): self.validate_all( "CREATE TABLE test STORED AS PARQUET AS SELECT 1", write={ - "presto": "CREATE TABLE test WITH (FORMAT = 'PARQUET') AS SELECT 1", + "presto": "CREATE TABLE test WITH (FORMAT='PARQUET') AS SELECT 1", "hive": "CREATE TABLE test STORED AS PARQUET AS SELECT 1", "spark": "CREATE TABLE test USING PARQUET AS SELECT 1", }, @@ -56,9 +56,9 @@ class TestSpark(Validator): ) COMMENT='Test comment: blah' WITH ( - PARTITIONED_BY = ARRAY['date'], - FORMAT = 'ICEBERG', - x = '1' + PARTITIONED_BY=ARRAY['date'], + FORMAT='ICEBERG', + x='1' )""", "hive": """CREATE TABLE blah ( col_a INT @@ -69,7 +69,7 @@ PARTITIONED BY ( ) STORED AS ICEBERG TBLPROPERTIES ( - 'x' = '1' + 'x'='1' )""", "spark": """CREATE TABLE blah ( col_a INT @@ -80,7 +80,7 @@ PARTITIONED BY ( ) USING ICEBERG TBLPROPERTIES ( - 'x' = '1' + 'x'='1' )""", }, pretty=True, diff --git a/tests/dialects/test_tsql.py b/tests/dialects/test_tsql.py index 0619eaa..6b0b39b 100644 --- a/tests/dialects/test_tsql.py +++ b/tests/dialects/test_tsql.py @@ -15,6 +15,14 @@ class TestTSQL(Validator): }, ) + self.validate_all( + "CONVERT(INT, CONVERT(NUMERIC, '444.75'))", + write={ + "mysql": "CAST(CAST('444.75' AS DECIMAL) AS INT)", + "tsql": "CAST(CAST('444.75' AS NUMERIC) AS INTEGER)", + }, + ) + def test_types(self): self.validate_identity("CAST(x AS XML)") self.validate_identity("CAST(x AS UNIQUEIDENTIFIER)") @@ -24,3 +32,13 @@ class TestTSQL(Validator): self.validate_identity("CAST(x AS IMAGE)") self.validate_identity("CAST(x AS SQL_VARIANT)") self.validate_identity("CAST(x AS BIT)") + self.validate_all( + "CAST(x AS DATETIME2)", + read={ + "": "CAST(x AS DATETIME)", + }, + write={ + "mysql": "CAST(x AS DATETIME)", + "tsql": "CAST(x AS DATETIME2)", + }, + ) diff --git a/tests/fixtures/identity.sql b/tests/fixtures/identity.sql index 1b4168c..2654be1 100644 --- a/tests/fixtures/identity.sql +++ b/tests/fixtures/identity.sql @@ -8,6 +8,7 @@ SUM(CASE WHEN x > 1 THEN 1 ELSE 0 END) / y 1.1E10 1.12e-10 -11.023E7 * 3 +0.2 (1 * 2) / (3 - 5) ((TRUE)) '' @@ -167,7 +168,7 @@ SELECT LEAD(a) OVER (ORDER BY b) AS a SELECT LEAD(a, 1) OVER (PARTITION BY a ORDER BY a) AS x SELECT LEAD(a, 1, b) OVER (PARTITION BY a ORDER BY a) AS x SELECT X((a, b) -> a + b, z -> z) AS x -SELECT X(a -> "a" + ("z" - 1)) +SELECT X(a -> a + ("z" - 1)) SELECT EXISTS(ARRAY(2, 3), x -> x % 2 = 0) SELECT test.* FROM test SELECT a AS b FROM test @@ -258,15 +259,24 @@ SELECT a FROM test TABLESAMPLE(100) SELECT a FROM test TABLESAMPLE(100 ROWS) SELECT a FROM test TABLESAMPLE BERNOULLI (50) SELECT a FROM test TABLESAMPLE SYSTEM (75) +SELECT a FROM test PIVOT(SUM(x) FOR y IN ('z', 'q')) +SELECT a FROM test PIVOT(SOMEAGG(x, y, z) FOR q IN (1)) +SELECT a FROM test PIVOT(SUM(x) FOR y IN ('z', 'q')) PIVOT(MAX(b) FOR c IN ('d')) +SELECT a FROM (SELECT a, b FROM test) PIVOT(SUM(x) FOR y IN ('z', 'q')) +SELECT a FROM test UNPIVOT(x FOR y IN (z, q)) AS x +SELECT a FROM test PIVOT(SUM(x) FOR y IN ('z', 'q')) AS x TABLESAMPLE(0.1) +SELECT a FROM test PIVOT(SUM(x) FOR y IN ('z', 'q')) UNPIVOT(x FOR y IN (z, q)) AS x SELECT ABS(a) FROM test SELECT AVG(a) FROM test SELECT CEIL(a) FROM test +SELECT CEIL(a, b) FROM test SELECT COUNT(a) FROM test SELECT COUNT(1) FROM test SELECT COUNT(*) FROM test SELECT COUNT(DISTINCT a) FROM test SELECT EXP(a) FROM test SELECT FLOOR(a) FROM test +SELECT FLOOR(a, b) FROM test SELECT FIRST(a) FROM test SELECT GREATEST(a, b, c) FROM test SELECT LAST(a) FROM test @@ -299,6 +309,7 @@ SELECT CAST(a AS MAP) FROM test SELECT CAST(a AS TIMESTAMP) FROM test SELECT CAST(a AS DATE) FROM test SELECT CAST(a AS ARRAY) FROM test +SELECT CAST(a AS VARIANT) FROM test SELECT TRY_CAST(a AS INT) FROM test SELECT COALESCE(a, b, c) FROM test SELECT IFNULL(a, b) FROM test @@ -442,13 +453,10 @@ CREATE TABLE z (a INT(11) DEFAULT NULL COMMENT '客户id') CREATE TABLE z (a INT(11) NOT NULL DEFAULT 1) CREATE TABLE z (a INT(11) NOT NULL COLLATE utf8_bin AUTO_INCREMENT) CREATE TABLE z (a INT, PRIMARY KEY(a)) -CREATE TABLE z (a INT) ENGINE=InnoDB AUTO_INCREMENT=1 CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x' -CREATE TABLE z (a INT) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x' -CREATE TABLE z (a INT DEFAULT NULL, PRIMARY KEY(a)) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x' CREATE TABLE z WITH (FORMAT='parquet') AS SELECT 1 -CREATE TABLE z WITH (FORMAT='ORC', x = '2') AS SELECT 1 +CREATE TABLE z WITH (FORMAT='ORC', x='2') AS SELECT 1 CREATE TABLE z WITH (TABLE_FORMAT='iceberg', FORMAT='parquet') AS SELECT 1 -CREATE TABLE z WITH (TABLE_FORMAT='iceberg', FORMAT='ORC', x = '2') AS SELECT 1 +CREATE TABLE z WITH (TABLE_FORMAT='iceberg', FORMAT='ORC', x='2') AS SELECT 1 CREATE TABLE z (z INT) WITH (PARTITIONED_BY=(x INT, y INT)) CREATE TABLE z (z INT) WITH (PARTITIONED_BY=(x INT)) AS SELECT 1 CREATE TABLE z AS (WITH cte AS (SELECT 1) SELECT * FROM cte) @@ -460,6 +468,9 @@ CREATE TEMPORARY FUNCTION f CREATE TEMPORARY FUNCTION f AS 'g' CREATE FUNCTION f CREATE FUNCTION f AS 'g' +CREATE FUNCTION a(b INT, c VARCHAR) AS 'SELECT 1' +CREATE FUNCTION a() LANGUAGE sql +CREATE FUNCTION a() LANGUAGE sql RETURNS INT CREATE INDEX abc ON t (a) CREATE INDEX abc ON t (a, b, b) CREATE UNIQUE INDEX abc ON t (a, b, b) @@ -519,3 +530,4 @@ WITH a AS ((SELECT b.foo AS foo, b.bar AS bar FROM b) UNION ALL (SELECT c.foo AS WITH a AS ((SELECT 1 AS b) UNION ALL (SELECT 1 AS b)) SELECT * FROM a SELECT (WITH x AS (SELECT 1 AS y) SELECT * FROM x) AS z SELECT ((SELECT 1) + 1) +SELECT * FROM project.dataset.INFORMATION_SCHEMA.TABLES diff --git a/tests/fixtures/optimizer/eliminate_subqueries.sql b/tests/fixtures/optimizer/eliminate_subqueries.sql index aae5f2a..f395c0a 100644 --- a/tests/fixtures/optimizer/eliminate_subqueries.sql +++ b/tests/fixtures/optimizer/eliminate_subqueries.sql @@ -1,42 +1,79 @@ -SELECT 1 AS x, 2 AS y -UNION ALL -SELECT 1 AS x, 2 AS y; -WITH _e_0 AS ( - SELECT - 1 AS x, - 2 AS y -) -SELECT - * -FROM _e_0 -UNION ALL -SELECT - * -FROM _e_0; - -SELECT x.id -FROM ( - SELECT * - FROM x AS x - JOIN y AS y - ON x.id = y.id -) AS x -JOIN ( - SELECT * - FROM x AS x - JOIN y AS y - ON x.id = y.id -) AS y -ON x.id = y.id; -WITH _e_0 AS ( - SELECT - * - FROM x AS x - JOIN y AS y - ON x.id = y.id -) -SELECT - x.id -FROM "_e_0" AS x -JOIN "_e_0" AS y - ON x.id = y.id; +-- No derived tables +SELECT * FROM x; +SELECT * FROM x; + +-- Unaliased derived tables +SELECT a FROM (SELECT b FROM (SELECT c FROM x)); +WITH cte AS (SELECT c FROM x), cte_2 AS (SELECT b FROM cte AS cte) SELECT a FROM cte_2 AS cte_2; + +-- Joined derived table inside nested derived table +SELECT b FROM (SELECT b FROM (SELECT b FROM x JOIN (SELECT b FROM y) AS y ON x.b = y.b)); +WITH y_2 AS (SELECT b FROM y), cte AS (SELECT b FROM x JOIN y_2 AS y ON x.b = y.b), cte_2 AS (SELECT b FROM cte AS cte) SELECT b FROM cte_2 AS cte_2; + +-- Aliased derived tables +SELECT a FROM (SELECT b FROM (SELECT c FROM x) AS y) AS z; +WITH y AS (SELECT c FROM x), z AS (SELECT b FROM y AS y) SELECT a FROM z AS z; + +-- Existing CTEs +WITH q AS (SELECT c FROM x) SELECT a FROM (SELECT b FROM q AS y) AS z; +WITH q AS (SELECT c FROM x), z AS (SELECT b FROM q AS y) SELECT a FROM z AS z; + +-- Derived table inside CTE +WITH x AS (SELECT a FROM (SELECT a FROM x) AS y) SELECT a FROM x; +WITH y AS (SELECT a FROM x), x AS (SELECT a FROM y AS y) SELECT a FROM x; + +-- Name conflicts with existing outer derived table +SELECT a FROM (SELECT b FROM (SELECT c FROM x) AS y) AS y; +WITH y AS (SELECT c FROM x), y_2 AS (SELECT b FROM y AS y) SELECT a FROM y_2 AS y; + +-- Name conflicts with outer join +SELECT a, b FROM (SELECT c FROM (SELECT d FROM x) AS x) AS y JOIN x ON x.a = y.a; +WITH x_2 AS (SELECT d FROM x), y AS (SELECT c FROM x_2 AS x) SELECT a, b FROM y AS y JOIN x ON x.a = y.a; + +-- Name conflicts with table name that is selected in another branch +SELECT * FROM (SELECT * FROM (SELECT a FROM x) AS x) AS y JOIN (SELECT * FROM x) AS z ON x.a = y.a; +WITH x_2 AS (SELECT a FROM x), y AS (SELECT * FROM x_2 AS x), z AS (SELECT * FROM x) SELECT * FROM y AS y JOIN z AS z ON x.a = y.a; + +-- Name conflicts with table alias +SELECT a FROM (SELECT a FROM (SELECT a FROM x) AS y) AS z JOIN q AS y; +WITH y AS (SELECT a FROM x), z AS (SELECT a FROM y AS y) SELECT a FROM z AS z JOIN q AS y; + +-- Name conflicts with existing CTE +WITH y AS (SELECT a FROM (SELECT a FROM x) AS y) SELECT a FROM y; +WITH y_2 AS (SELECT a FROM x), y AS (SELECT a FROM y_2 AS y) SELECT a FROM y; + +-- Union +SELECT 1 AS x, 2 AS y UNION ALL SELECT 1 AS x, 2 AS y; +WITH cte AS (SELECT 1 AS x, 2 AS y) SELECT cte.x AS x, cte.y AS y FROM cte AS cte UNION ALL SELECT cte.x AS x, cte.y AS y FROM cte AS cte; + +-- Union of selects with derived tables +(SELECT a FROM (SELECT b FROM x)) UNION (SELECT a FROM (SELECT b FROM y)); +WITH cte AS (SELECT b FROM x), cte_2 AS (SELECT a FROM cte AS cte), cte_3 AS (SELECT b FROM y), cte_4 AS (SELECT a FROM cte_3 AS cte_3) (SELECT cte_2.a AS a FROM cte_2 AS cte_2) UNION (SELECT cte_4.a AS a FROM cte_4 AS cte_4); + +-- Subquery +SELECT a FROM x WHERE b = (SELECT y.c FROM y); +SELECT a FROM x WHERE b = (SELECT y.c FROM y); + +-- Correlated subquery +SELECT a FROM x WHERE b = (SELECT c FROM y WHERE y.a = x.a); +SELECT a FROM x WHERE b = (SELECT c FROM y WHERE y.a = x.a); + +-- Duplicate CTE +SELECT a FROM (SELECT b FROM x) AS y JOIN (SELECT b FROM x) AS z; +WITH y AS (SELECT b FROM x) SELECT a FROM y AS y JOIN y AS z; + +-- Doubly duplicate CTE +SELECT * FROM (SELECT * FROM x JOIN (SELECT * FROM x) AS y) AS z JOIN (SELECT * FROM x JOIN (SELECT * FROM x) AS y) AS q; +WITH y AS (SELECT * FROM x), z AS (SELECT * FROM x JOIN y AS y) SELECT * FROM z AS z JOIN z AS q; + +-- Another duplicate... +SELECT x.id FROM (SELECT * FROM x AS x JOIN y AS y ON x.id = y.id) AS x JOIN (SELECT * FROM x AS x JOIN y AS y ON x.id = y.id) AS y ON x.id = y.id; +WITH x_2 AS (SELECT * FROM x AS x JOIN y AS y ON x.id = y.id) SELECT x.id FROM x_2 AS x JOIN x_2 AS y ON x.id = y.id; + +-- Root subquery +(SELECT * FROM (SELECT * FROM x)) LIMIT 1; +(WITH cte AS (SELECT * FROM x) SELECT * FROM cte AS cte) LIMIT 1; + +-- Existing duplicate CTE +WITH y AS (SELECT a FROM x) SELECT a FROM (SELECT a FROM x) AS y JOIN y AS z; +WITH y AS (SELECT a FROM x) SELECT a FROM y AS y JOIN y AS z; diff --git a/tests/fixtures/optimizer/merge_derived_tables.sql b/tests/fixtures/optimizer/merge_derived_tables.sql deleted file mode 100644 index c5aa7e9..0000000 --- a/tests/fixtures/optimizer/merge_derived_tables.sql +++ /dev/null @@ -1,63 +0,0 @@ --- Simple -SELECT a, b FROM (SELECT a, b FROM x); -SELECT x.a AS a, x.b AS b FROM x AS x; - --- Inner table alias is merged -SELECT a, b FROM (SELECT a, b FROM x AS q) AS r; -SELECT q.a AS a, q.b AS b FROM x AS q; - --- Double nesting -SELECT a, b FROM (SELECT a, b FROM (SELECT a, b FROM x)); -SELECT x.a AS a, x.b AS b FROM x AS x; - --- WHERE clause is merged -SELECT a, SUM(b) FROM (SELECT a, b FROM x WHERE a > 1) GROUP BY a; -SELECT x.a AS a, SUM(x.b) AS "_col_1" FROM x AS x WHERE x.a > 1 GROUP BY x.a; - --- Outer query has join -SELECT a, c FROM (SELECT a, b FROM x WHERE a > 1) AS x JOIN y ON x.b = y.b; -SELECT x.a AS a, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b WHERE x.a > 1; - --- Join on derived table -SELECT a, c FROM x JOIN (SELECT b, c FROM y) AS y ON x.b = y.b; -SELECT x.a AS a, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b; - --- Inner query has a join -SELECT a, c FROM (SELECT a, c FROM x JOIN y ON x.b = y.b); -SELECT x.a AS a, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b; - --- Inner query has conflicting name in outer query -SELECT a, c FROM (SELECT q.a, q.b FROM x AS q) AS x JOIN y AS q ON x.b = q.b; -SELECT q_2.a AS a, q.c AS c FROM x AS q_2 JOIN y AS q ON q_2.b = q.b; - --- Inner query has conflicting name in joined source -SELECT x.a, q.c FROM (SELECT a, x.b FROM x JOIN y AS q ON x.b = q.b) AS x JOIN y AS q ON x.b = q.b; -SELECT x.a AS a, q.c AS c FROM x AS x JOIN y AS q_2 ON x.b = q_2.b JOIN y AS q ON x.b = q.b; - --- Inner query has multiple conflicting names -SELECT x.a, q.c, r.c FROM (SELECT q.a, r.b FROM x AS q JOIN y AS r ON q.b = r.b) AS x JOIN y AS q ON x.b = q.b JOIN y AS r ON x.b = r.b; -SELECT q_2.a AS a, q.c AS c, r.c AS c FROM x AS q_2 JOIN y AS r_2 ON q_2.b = r_2.b JOIN y AS q ON r_2.b = q.b JOIN y AS r ON r_2.b = r.b; - --- Inner queries have conflicting names with each other -SELECT r.b FROM (SELECT b FROM x AS x) AS q JOIN (SELECT b FROM x) AS r ON q.b = r.b; -SELECT x_2.b AS b FROM x AS x JOIN x AS x_2 ON x.b = x_2.b; - --- WHERE clause in joined derived table is merged -SELECT x.a, y.c FROM x JOIN (SELECT b, c FROM y WHERE c > 1) AS y; -SELECT x.a AS a, y.c AS c FROM x AS x JOIN y AS y WHERE y.c > 1; - --- WHERE clause in outer joined derived table is merged to ON clause -SELECT x.a, y.c FROM x LEFT JOIN (SELECT b, c FROM y WHERE c > 1) AS y; -SELECT x.a AS a, y.c AS c FROM x AS x LEFT JOIN y AS y ON y.c > 1; - --- Comma JOIN in outer query -SELECT x.a, y.c FROM (SELECT a FROM x) AS x, (SELECT c FROM y) AS y; -SELECT x.a AS a, y.c AS c FROM x AS x, y AS y; - --- Comma JOIN in inner query -SELECT x.a, x.c FROM (SELECT x.a, z.c FROM x, y AS z) AS x; -SELECT x.a AS a, z.c AS c FROM x AS x CROSS JOIN y AS z; - --- (Regression) Column in ORDER BY -SELECT * FROM (SELECT * FROM (SELECT * FROM x)) ORDER BY a LIMIT 1; -SELECT x.a AS a, x.b AS b FROM x AS x ORDER BY x.a LIMIT 1; diff --git a/tests/fixtures/optimizer/merge_subqueries.sql b/tests/fixtures/optimizer/merge_subqueries.sql new file mode 100644 index 0000000..35aed3b --- /dev/null +++ b/tests/fixtures/optimizer/merge_subqueries.sql @@ -0,0 +1,99 @@ +-- Simple +SELECT a, b FROM (SELECT a, b FROM x); +SELECT x.a AS a, x.b AS b FROM x AS x; + +-- Inner table alias is merged +SELECT a, b FROM (SELECT a, b FROM x AS q) AS r; +SELECT q.a AS a, q.b AS b FROM x AS q; + +-- Double nesting +SELECT a, b FROM (SELECT a, b FROM (SELECT a, b FROM x)); +SELECT x.a AS a, x.b AS b FROM x AS x; + +-- WHERE clause is merged +SELECT a, SUM(b) FROM (SELECT a, b FROM x WHERE a > 1) GROUP BY a; +SELECT x.a AS a, SUM(x.b) AS "_col_1" FROM x AS x WHERE x.a > 1 GROUP BY x.a; + +-- Outer query has join +SELECT a, c FROM (SELECT a, b FROM x WHERE a > 1) AS x JOIN y ON x.b = y.b; +SELECT x.a AS a, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b WHERE x.a > 1; + +-- Outer query has join +SELECT a, c FROM (SELECT a, b FROM x WHERE a > 1) AS x JOIN y ON x.b = y.b; +SELECT x.a AS a, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b WHERE x.a > 1; + +# leave_tables_isolated: true +SELECT a, c FROM (SELECT a, b FROM x WHERE a > 1) AS x JOIN y ON x.b = y.b; +SELECT x.a AS a, y.c AS c FROM (SELECT x.a AS a, x.b AS b FROM x AS x WHERE x.a > 1) AS x JOIN y AS y ON x.b = y.b; + +-- Join on derived table +SELECT a, c FROM x JOIN (SELECT b, c FROM y) AS y ON x.b = y.b; +SELECT x.a AS a, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b; + +-- Inner query has a join +SELECT a, c FROM (SELECT a, c FROM x JOIN y ON x.b = y.b); +SELECT x.a AS a, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b; + +-- Inner query has conflicting name in outer query +SELECT a, c FROM (SELECT q.a, q.b FROM x AS q) AS x JOIN y AS q ON x.b = q.b; +SELECT q_2.a AS a, q.c AS c FROM x AS q_2 JOIN y AS q ON q_2.b = q.b; + +-- Inner query has conflicting name in joined source +SELECT x.a, q.c FROM (SELECT a, x.b FROM x JOIN y AS q ON x.b = q.b) AS x JOIN y AS q ON x.b = q.b; +SELECT x.a AS a, q.c AS c FROM x AS x JOIN y AS q_2 ON x.b = q_2.b JOIN y AS q ON x.b = q.b; + +-- Inner query has multiple conflicting names +SELECT x.a, q.c, r.c FROM (SELECT q.a, r.b FROM x AS q JOIN y AS r ON q.b = r.b) AS x JOIN y AS q ON x.b = q.b JOIN y AS r ON x.b = r.b; +SELECT q_2.a AS a, q.c AS c, r.c AS c FROM x AS q_2 JOIN y AS r_2 ON q_2.b = r_2.b JOIN y AS q ON r_2.b = q.b JOIN y AS r ON r_2.b = r.b; + +-- Inner queries have conflicting names with each other +SELECT r.b FROM (SELECT b FROM x AS x) AS q JOIN (SELECT b FROM x) AS r ON q.b = r.b; +SELECT x_2.b AS b FROM x AS x JOIN x AS x_2 ON x.b = x_2.b; + +-- WHERE clause in joined derived table is merged to ON clause +SELECT x.a, y.c FROM x JOIN (SELECT b, c FROM y WHERE c > 1) AS y; +SELECT x.a AS a, y.c AS c FROM x AS x JOIN y AS y ON y.c > 1; + +-- Comma JOIN in outer query +SELECT x.a, y.c FROM (SELECT a FROM x) AS x, (SELECT c FROM y) AS y; +SELECT x.a AS a, y.c AS c FROM x AS x, y AS y; + +-- Comma JOIN in inner query +SELECT x.a, x.c FROM (SELECT x.a, z.c FROM x, y AS z) AS x; +SELECT x.a AS a, z.c AS c FROM x AS x CROSS JOIN y AS z; + +-- (Regression) Column in ORDER BY +SELECT * FROM (SELECT * FROM (SELECT * FROM x)) ORDER BY a LIMIT 1; +SELECT x.a AS a, x.b AS b FROM x AS x ORDER BY x.a LIMIT 1; + +-- CTE +WITH x AS (SELECT a, b FROM x) SELECT a, b FROM x; +SELECT x.a AS a, x.b AS b FROM x AS x; + +-- CTE with outer table alias +WITH y AS (SELECT a, b FROM x) SELECT a, b FROM y AS z; +SELECT x.a AS a, x.b AS b FROM x AS x; + +-- Nested CTE +WITH x AS (SELECT a FROM x), x2 AS (SELECT a FROM x) SELECT a FROM x2; +SELECT x.a AS a FROM x AS x; + +-- CTE WHERE clause is merged +WITH x AS (SELECT a, b FROM x WHERE a > 1) SELECT a, SUM(b) FROM x GROUP BY a; +SELECT x.a AS a, SUM(x.b) AS "_col_1" FROM x AS x WHERE x.a > 1 GROUP BY x.a; + +-- CTE Outer query has join +WITH x AS (SELECT a, b FROM x WHERE a > 1) SELECT a, c FROM x AS x JOIN y ON x.b = y.b; +SELECT x.a AS a, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b WHERE x.a > 1; + +-- CTE with inner table alias +WITH y AS (SELECT a, b FROM x AS q) SELECT a, b FROM y AS z; +SELECT q.a AS a, q.b AS b FROM x AS q; + +-- Duplicate queries to CTE +WITH x AS (SELECT a, b FROM x) SELECT x.a, y.b FROM x JOIN x AS y; +WITH x AS (SELECT x.a AS a, x.b AS b FROM x AS x) SELECT x.a AS a, y.b AS b FROM x JOIN x AS y; + +-- Nested CTE +SELECT * FROM (WITH x AS (SELECT a, b FROM x) SELECT a, b FROM x); +SELECT x.a AS a, x.b AS b FROM x AS x; diff --git a/tests/fixtures/optimizer/optimizer.sql b/tests/fixtures/optimizer/optimizer.sql index f1d0f7d..0bb742b 100644 --- a/tests/fixtures/optimizer/optimizer.sql +++ b/tests/fixtures/optimizer/optimizer.sql @@ -65,18 +65,14 @@ WITH "cte1" AS ( SELECT "x"."a" AS "a" FROM "x" AS "x" -), "cte2" AS ( - SELECT - "cte1"."a" + 1 AS "a" - FROM "cte1" ) SELECT "cte1"."a" AS "a" FROM "cte1" UNION ALL SELECT - "cte2"."a" AS "a" -FROM "cte2"; + "cte1"."a" + 1 AS "a" +FROM "cte1"; SELECT a, SUM(b) FROM ( @@ -86,18 +82,19 @@ FROM ( ) d WHERE (TRUE AND TRUE OR 'a' = 'b') AND a > 1 GROUP BY a; -SELECT - "x"."a" AS "a", - SUM("y"."b") AS "_col_1" -FROM "x" AS "x" -LEFT JOIN ( +WITH "_u_0" AS ( SELECT MAX("y"."b") AS "_col_0", "y"."a" AS "_u_1" FROM "y" AS "y" GROUP BY "y"."a" -) AS "_u_0" +) +SELECT + "x"."a" AS "a", + SUM("y"."b") AS "_col_1" +FROM "x" AS "x" +LEFT JOIN "_u_0" AS "_u_0" ON "x"."a" = "_u_0"."_u_1" JOIN "y" AS "y" ON "x"."a" = "y"."a" @@ -127,3 +124,16 @@ LIMIT 1; FROM "y" AS "y" ) LIMIT 1; + +# dialect: spark +SELECT /*+ BROADCAST(y) */ x.b FROM x JOIN y ON x.b = y.b; +SELECT /*+ BROADCAST(`y`) */ + `x`.`b` AS `b` +FROM `x` AS `x` +JOIN `y` AS `y` + ON `x`.`b` = `y`.`b`; + +SELECT AGGREGATE(ARRAY(x.a, x.b), 0, (x, acc) -> x + acc + a) AS sum_agg FROM x; +SELECT + AGGREGATE(ARRAY("x"."a", "x"."b"), 0, ("x", "acc") -> "x" + "acc" + "x"."a") AS "sum_agg" +FROM "x" AS "x"; diff --git a/tests/fixtures/optimizer/qualify_columns.sql b/tests/fixtures/optimizer/qualify_columns.sql index 004c57c..f848e7a 100644 --- a/tests/fixtures/optimizer/qualify_columns.sql +++ b/tests/fixtures/optimizer/qualify_columns.sql @@ -69,6 +69,9 @@ SELECT ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.b) AS row_num FROM x AS x SELECT x.b, x.a FROM x LEFT JOIN y ON x.b = y.b QUALIFY ROW_NUMBER() OVER(PARTITION BY x.b ORDER BY x.a DESC) = 1; SELECT x.b AS b, x.a AS a FROM x AS x LEFT JOIN y AS y ON x.b = y.b QUALIFY ROW_NUMBER() OVER (PARTITION BY x.b ORDER BY x.a DESC) = 1; +SELECT AGGREGATE(ARRAY(a, x.b), 0, (x, acc) -> x + acc + a) AS sum_agg FROM x; +SELECT AGGREGATE(ARRAY(x.a, x.b), 0, (x, acc) -> x + acc + x.a) AS sum_agg FROM x AS x; + -------------------------------------- -- Derived tables -------------------------------------- @@ -231,3 +234,10 @@ SELECT COALESCE(x.b, y.b) AS b FROM x AS x JOIN y AS y ON x.b = y.b WHERE COALES SELECT b FROM x JOIN y USING (b) JOIN z USING (b); SELECT COALESCE(x.b, y.b, z.b) AS b FROM x AS x JOIN y AS y ON x.b = y.b JOIN z AS z ON x.b = z.b; + +-------------------------------------- +-- Hint with table reference +-------------------------------------- +# dialect: spark +SELECT /*+ BROADCAST(y) */ x.b FROM x JOIN y ON x.b = y.b; +SELECT /*+ BROADCAST(y) */ x.b AS b FROM x AS x JOIN y AS y ON x.b = y.b; diff --git a/tests/fixtures/optimizer/qualify_columns__invalid.sql b/tests/fixtures/optimizer/qualify_columns__invalid.sql index 056b0e9..1104b6e 100644 --- a/tests/fixtures/optimizer/qualify_columns__invalid.sql +++ b/tests/fixtures/optimizer/qualify_columns__invalid.sql @@ -5,7 +5,6 @@ SELECT z.* FROM x; SELECT x FROM x; INSERT INTO x VALUES (1, 2); SELECT a FROM x AS z JOIN y AS z; -WITH z AS (SELECT * FROM x) SELECT * FROM x AS z; SELECT a FROM x JOIN (SELECT b FROM y WHERE y.b = x.c); SELECT a FROM x AS y JOIN (SELECT a FROM y) AS q ON y.a = q.a; SELECT q.a FROM (SELECT x.b FROM x) AS z JOIN (SELECT a FROM z) AS q ON z.b = q.a; diff --git a/tests/fixtures/optimizer/tpc-h/tpc-h.sql b/tests/fixtures/optimizer/tpc-h/tpc-h.sql index 0b6d382..d2f10fc 100644 --- a/tests/fixtures/optimizer/tpc-h/tpc-h.sql +++ b/tests/fixtures/optimizer/tpc-h/tpc-h.sql @@ -97,19 +97,32 @@ order by p_partkey limit 100; -WITH "_e_0" AS ( +WITH "partsupp_2" AS ( SELECT "partsupp"."ps_partkey" AS "ps_partkey", "partsupp"."ps_suppkey" AS "ps_suppkey", "partsupp"."ps_supplycost" AS "ps_supplycost" FROM "partsupp" AS "partsupp" -), "_e_1" AS ( +), "region_2" AS ( SELECT "region"."r_regionkey" AS "r_regionkey", "region"."r_name" AS "r_name" FROM "region" AS "region" WHERE "region"."r_name" = 'EUROPE' +), "_u_0" AS ( + SELECT + MIN("partsupp"."ps_supplycost") AS "_col_0", + "partsupp"."ps_partkey" AS "_u_1" + FROM "partsupp_2" AS "partsupp" + CROSS JOIN "region_2" AS "region" + JOIN "nation" AS "nation" + ON "nation"."n_regionkey" = "region"."r_regionkey" + JOIN "supplier" AS "supplier" + ON "supplier"."s_nationkey" = "nation"."n_nationkey" + AND "supplier"."s_suppkey" = "partsupp"."ps_suppkey" + GROUP BY + "partsupp"."ps_partkey" ) SELECT "supplier"."s_acctbal" AS "s_acctbal", @@ -121,25 +134,12 @@ SELECT "supplier"."s_phone" AS "s_phone", "supplier"."s_comment" AS "s_comment" FROM "part" AS "part" -LEFT JOIN ( - SELECT - MIN("partsupp"."ps_supplycost") AS "_col_0", - "partsupp"."ps_partkey" AS "_u_1" - FROM "_e_0" AS "partsupp" - CROSS JOIN "_e_1" AS "region" - JOIN "nation" AS "nation" - ON "nation"."n_regionkey" = "region"."r_regionkey" - JOIN "supplier" AS "supplier" - ON "supplier"."s_nationkey" = "nation"."n_nationkey" - AND "supplier"."s_suppkey" = "partsupp"."ps_suppkey" - GROUP BY - "partsupp"."ps_partkey" -) AS "_u_0" +LEFT JOIN "_u_0" AS "_u_0" ON "part"."p_partkey" = "_u_0"."_u_1" -CROSS JOIN "_e_1" AS "region" +CROSS JOIN "region_2" AS "region" JOIN "nation" AS "nation" ON "nation"."n_regionkey" = "region"."r_regionkey" -JOIN "_e_0" AS "partsupp" +JOIN "partsupp_2" AS "partsupp" ON "part"."p_partkey" = "partsupp"."ps_partkey" JOIN "supplier" AS "supplier" ON "supplier"."s_nationkey" = "nation"."n_nationkey" @@ -193,12 +193,12 @@ SELECT FROM "customer" AS "customer" JOIN "orders" AS "orders" ON "customer"."c_custkey" = "orders"."o_custkey" + AND "orders"."o_orderdate" < '1995-03-15' JOIN "lineitem" AS "lineitem" ON "lineitem"."l_orderkey" = "orders"."o_orderkey" + AND "lineitem"."l_shipdate" > '1995-03-15' WHERE "customer"."c_mktsegment" = 'BUILDING' - AND "lineitem"."l_shipdate" > '1995-03-15' - AND "orders"."o_orderdate" < '1995-03-15' GROUP BY "lineitem"."l_orderkey", "orders"."o_orderdate", @@ -232,11 +232,7 @@ group by o_orderpriority order by o_orderpriority; -SELECT - "orders"."o_orderpriority" AS "o_orderpriority", - COUNT(*) AS "order_count" -FROM "orders" AS "orders" -LEFT JOIN ( +WITH "_u_0" AS ( SELECT "lineitem"."l_orderkey" AS "l_orderkey" FROM "lineitem" AS "lineitem" @@ -244,7 +240,12 @@ LEFT JOIN ( "lineitem"."l_commitdate" < "lineitem"."l_receiptdate" GROUP BY "lineitem"."l_orderkey" -) AS "_u_0" +) +SELECT + "orders"."o_orderpriority" AS "o_orderpriority", + COUNT(*) AS "order_count" +FROM "orders" AS "orders" +LEFT JOIN "_u_0" AS "_u_0" ON "_u_0"."l_orderkey" = "orders"."o_orderkey" WHERE "orders"."o_orderdate" < CAST('1993-10-01' AS DATE) @@ -290,7 +291,10 @@ SELECT FROM "customer" AS "customer" JOIN "orders" AS "orders" ON "customer"."c_custkey" = "orders"."o_custkey" -CROSS JOIN "region" AS "region" + AND "orders"."o_orderdate" < CAST('1995-01-01' AS DATE) + AND "orders"."o_orderdate" >= CAST('1994-01-01' AS DATE) +JOIN "region" AS "region" + ON "region"."r_name" = 'ASIA' JOIN "nation" AS "nation" ON "nation"."n_regionkey" = "region"."r_regionkey" JOIN "supplier" AS "supplier" @@ -299,10 +303,6 @@ JOIN "supplier" AS "supplier" JOIN "lineitem" AS "lineitem" ON "lineitem"."l_orderkey" = "orders"."o_orderkey" AND "lineitem"."l_suppkey" = "supplier"."s_suppkey" -WHERE - "orders"."o_orderdate" < CAST('1995-01-01' AS DATE) - AND "orders"."o_orderdate" >= CAST('1994-01-01' AS DATE) - AND "region"."r_name" = 'ASIA' GROUP BY "nation"."n_name" ORDER BY @@ -371,7 +371,7 @@ order by supp_nation, cust_nation, l_year; -WITH "_e_0" AS ( +WITH "n1" AS ( SELECT "nation"."n_nationkey" AS "n_nationkey", "nation"."n_name" AS "n_name" @@ -389,14 +389,15 @@ SELECT )) AS "revenue" FROM "supplier" AS "supplier" JOIN "lineitem" AS "lineitem" - ON "supplier"."s_suppkey" = "lineitem"."l_suppkey" + ON "lineitem"."l_shipdate" BETWEEN CAST('1995-01-01' AS DATE) AND CAST('1996-12-31' AS DATE) + AND "supplier"."s_suppkey" = "lineitem"."l_suppkey" JOIN "orders" AS "orders" ON "orders"."o_orderkey" = "lineitem"."l_orderkey" JOIN "customer" AS "customer" ON "customer"."c_custkey" = "orders"."o_custkey" -JOIN "_e_0" AS "n1" +JOIN "n1" AS "n1" ON "supplier"."s_nationkey" = "n1"."n_nationkey" -JOIN "_e_0" AS "n2" +JOIN "n1" AS "n2" ON "customer"."c_nationkey" = "n2"."n_nationkey" AND ( "n1"."n_name" = 'FRANCE' @@ -406,8 +407,6 @@ JOIN "_e_0" AS "n2" "n1"."n_name" = 'GERMANY' OR "n2"."n_name" = 'GERMANY' ) -WHERE - "lineitem"."l_shipdate" BETWEEN CAST('1995-01-01' AS DATE) AND CAST('1996-12-31' AS DATE) GROUP BY "n1"."n_name", "n2"."n_name", @@ -469,13 +468,15 @@ SELECT 1 - "lineitem"."l_discount" )) AS "mkt_share" FROM "part" AS "part" -CROSS JOIN "region" AS "region" +JOIN "region" AS "region" + ON "region"."r_name" = 'AMERICA' JOIN "nation" AS "nation" ON "nation"."n_regionkey" = "region"."r_regionkey" JOIN "customer" AS "customer" ON "customer"."c_nationkey" = "nation"."n_nationkey" JOIN "orders" AS "orders" ON "orders"."o_custkey" = "customer"."c_custkey" + AND "orders"."o_orderdate" BETWEEN CAST('1995-01-01' AS DATE) AND CAST('1996-12-31' AS DATE) JOIN "lineitem" AS "lineitem" ON "lineitem"."l_orderkey" = "orders"."o_orderkey" AND "part"."p_partkey" = "lineitem"."l_partkey" @@ -484,9 +485,7 @@ JOIN "supplier" AS "supplier" JOIN "nation" AS "nation_2" ON "supplier"."s_nationkey" = "nation_2"."n_nationkey" WHERE - "orders"."o_orderdate" BETWEEN CAST('1995-01-01' AS DATE) AND CAST('1996-12-31' AS DATE) - AND "part"."p_type" = 'ECONOMY ANODIZED STEEL' - AND "region"."r_name" = 'AMERICA' + "part"."p_type" = 'ECONOMY ANODIZED STEEL' GROUP BY EXTRACT(year FROM "orders"."o_orderdate") ORDER BY @@ -604,14 +603,13 @@ SELECT FROM "customer" AS "customer" JOIN "orders" AS "orders" ON "customer"."c_custkey" = "orders"."o_custkey" + AND "orders"."o_orderdate" < CAST('1994-01-01' AS DATE) + AND "orders"."o_orderdate" >= CAST('1993-10-01' AS DATE) JOIN "lineitem" AS "lineitem" ON "lineitem"."l_orderkey" = "orders"."o_orderkey" + AND "lineitem"."l_returnflag" = 'R' JOIN "nation" AS "nation" ON "customer"."c_nationkey" = "nation"."n_nationkey" -WHERE - "lineitem"."l_returnflag" = 'R' - AND "orders"."o_orderdate" < CAST('1994-01-01' AS DATE) - AND "orders"."o_orderdate" >= CAST('1993-10-01' AS DATE) GROUP BY "customer"."c_custkey", "customer"."c_name", @@ -654,12 +652,12 @@ group by ) order by value desc; -WITH "_e_0" AS ( +WITH "supplier_2" AS ( SELECT "supplier"."s_suppkey" AS "s_suppkey", "supplier"."s_nationkey" AS "s_nationkey" FROM "supplier" AS "supplier" -), "_e_1" AS ( +), "nation_2" AS ( SELECT "nation"."n_nationkey" AS "n_nationkey", "nation"."n_name" AS "n_name" @@ -671,9 +669,9 @@ SELECT "partsupp"."ps_partkey" AS "ps_partkey", SUM("partsupp"."ps_supplycost" * "partsupp"."ps_availqty") AS "value" FROM "partsupp" AS "partsupp" -JOIN "_e_0" AS "supplier" +JOIN "supplier_2" AS "supplier" ON "partsupp"."ps_suppkey" = "supplier"."s_suppkey" -JOIN "_e_1" AS "nation" +JOIN "nation_2" AS "nation" ON "supplier"."s_nationkey" = "nation"."n_nationkey" GROUP BY "partsupp"."ps_partkey" @@ -682,9 +680,9 @@ HAVING SELECT SUM("partsupp"."ps_supplycost" * "partsupp"."ps_availqty") * 0.0001 AS "_col_0" FROM "partsupp" AS "partsupp" - JOIN "_e_0" AS "supplier" + JOIN "supplier_2" AS "supplier" ON "partsupp"."ps_suppkey" = "supplier"."s_suppkey" - JOIN "_e_1" AS "nation" + JOIN "nation_2" AS "nation" ON "supplier"."s_nationkey" = "nation"."n_nationkey" ) ORDER BY @@ -737,13 +735,12 @@ SELECT END) AS "low_line_count" FROM "orders" AS "orders" JOIN "lineitem" AS "lineitem" - ON "orders"."o_orderkey" = "lineitem"."l_orderkey" -WHERE - "lineitem"."l_commitdate" < "lineitem"."l_receiptdate" + ON "lineitem"."l_commitdate" < "lineitem"."l_receiptdate" AND "lineitem"."l_receiptdate" < CAST('1995-01-01' AS DATE) AND "lineitem"."l_receiptdate" >= CAST('1994-01-01' AS DATE) AND "lineitem"."l_shipdate" < "lineitem"."l_commitdate" AND "lineitem"."l_shipmode" IN ('MAIL', 'SHIP') + AND "orders"."o_orderkey" = "lineitem"."l_orderkey" GROUP BY "lineitem"."l_shipmode" ORDER BY @@ -772,10 +769,7 @@ group by order by custdist desc, c_count desc; -SELECT - "c_orders"."c_count" AS "c_count", - COUNT(*) AS "custdist" -FROM ( +WITH "c_orders" AS ( SELECT COUNT("orders"."o_orderkey") AS "c_count" FROM "customer" AS "customer" @@ -784,7 +778,11 @@ FROM ( AND NOT "orders"."o_comment" LIKE '%special%requests%' GROUP BY "customer"."c_custkey" -) AS "c_orders" +) +SELECT + "c_orders"."c_count" AS "c_count", + COUNT(*) AS "custdist" +FROM "c_orders" AS "c_orders" GROUP BY "c_orders"."c_count" ORDER BY @@ -920,13 +918,7 @@ order by p_brand, p_type, p_size; -SELECT - "part"."p_brand" AS "p_brand", - "part"."p_type" AS "p_type", - "part"."p_size" AS "p_size", - COUNT(DISTINCT "partsupp"."ps_suppkey") AS "supplier_cnt" -FROM "partsupp" AS "partsupp" -LEFT JOIN ( +WITH "_u_0" AS ( SELECT "supplier"."s_suppkey" AS "s_suppkey" FROM "supplier" AS "supplier" @@ -934,15 +926,22 @@ LEFT JOIN ( "supplier"."s_comment" LIKE '%Customer%Complaints%' GROUP BY "supplier"."s_suppkey" -) AS "_u_0" +) +SELECT + "part"."p_brand" AS "p_brand", + "part"."p_type" AS "p_type", + "part"."p_size" AS "p_size", + COUNT(DISTINCT "partsupp"."ps_suppkey") AS "supplier_cnt" +FROM "partsupp" AS "partsupp" +LEFT JOIN "_u_0" AS "_u_0" ON "partsupp"."ps_suppkey" = "_u_0"."s_suppkey" JOIN "part" AS "part" - ON "part"."p_partkey" = "partsupp"."ps_partkey" -WHERE - "_u_0"."s_suppkey" IS NULL - AND "part"."p_brand" <> 'Brand#45' + ON "part"."p_brand" <> 'Brand#45' + AND "part"."p_partkey" = "partsupp"."ps_partkey" AND "part"."p_size" IN (49, 14, 23, 45, 19, 3, 36, 9) AND NOT "part"."p_type" LIKE 'MEDIUM POLISHED%' +WHERE + "_u_0"."s_suppkey" IS NULL GROUP BY "part"."p_brand", "part"."p_type", @@ -973,24 +972,25 @@ where where l_partkey = p_partkey ); -SELECT - SUM("lineitem"."l_extendedprice") / 7.0 AS "avg_yearly" -FROM "lineitem" AS "lineitem" -JOIN "part" AS "part" - ON "part"."p_partkey" = "lineitem"."l_partkey" -LEFT JOIN ( +WITH "_u_0" AS ( SELECT 0.2 * AVG("lineitem"."l_quantity") AS "_col_0", "lineitem"."l_partkey" AS "_u_1" FROM "lineitem" AS "lineitem" GROUP BY "lineitem"."l_partkey" -) AS "_u_0" +) +SELECT + SUM("lineitem"."l_extendedprice") / 7.0 AS "avg_yearly" +FROM "lineitem" AS "lineitem" +JOIN "part" AS "part" + ON "part"."p_brand" = 'Brand#23' + AND "part"."p_container" = 'MED BOX' + AND "part"."p_partkey" = "lineitem"."l_partkey" +LEFT JOIN "_u_0" AS "_u_0" ON "_u_0"."_u_1" = "part"."p_partkey" WHERE "lineitem"."l_quantity" < "_u_0"."_col_0" - AND "part"."p_brand" = 'Brand#23' - AND "part"."p_container" = 'MED BOX' AND NOT "_u_0"."_u_1" IS NULL; -------------------------------------- @@ -1030,6 +1030,16 @@ order by o_orderdate limit 100; +WITH "_u_0" AS ( + SELECT + "lineitem"."l_orderkey" AS "l_orderkey" + FROM "lineitem" AS "lineitem" + GROUP BY + "lineitem"."l_orderkey", + "lineitem"."l_orderkey" + HAVING + SUM("lineitem"."l_quantity") > 300 +) SELECT "customer"."c_name" AS "c_name", "customer"."c_custkey" AS "c_custkey", @@ -1040,16 +1050,7 @@ SELECT FROM "customer" AS "customer" JOIN "orders" AS "orders" ON "customer"."c_custkey" = "orders"."o_custkey" -LEFT JOIN ( - SELECT - "lineitem"."l_orderkey" AS "l_orderkey" - FROM "lineitem" AS "lineitem" - GROUP BY - "lineitem"."l_orderkey", - "lineitem"."l_orderkey" - HAVING - SUM("lineitem"."l_quantity") > 300 -) AS "_u_0" +LEFT JOIN "_u_0" AS "_u_0" ON "orders"."o_orderkey" = "_u_0"."l_orderkey" JOIN "lineitem" AS "lineitem" ON "orders"."o_orderkey" = "lineitem"."l_orderkey" @@ -1200,38 +1201,34 @@ where and n_name = 'CANADA' order by s_name; -SELECT - "supplier"."s_name" AS "s_name", - "supplier"."s_address" AS "s_address" -FROM "supplier" AS "supplier" -LEFT JOIN ( +WITH "_u_0" AS ( + SELECT + 0.5 * SUM("lineitem"."l_quantity") AS "_col_0", + "lineitem"."l_partkey" AS "_u_1", + "lineitem"."l_suppkey" AS "_u_2" + FROM "lineitem" AS "lineitem" + WHERE + "lineitem"."l_shipdate" < CAST('1995-01-01' AS DATE) + AND "lineitem"."l_shipdate" >= CAST('1994-01-01' AS DATE) + GROUP BY + "lineitem"."l_partkey", + "lineitem"."l_suppkey" +), "_u_3" AS ( + SELECT + "part"."p_partkey" AS "p_partkey" + FROM "part" AS "part" + WHERE + "part"."p_name" LIKE 'forest%' + GROUP BY + "part"."p_partkey" +), "_u_4" AS ( SELECT "partsupp"."ps_suppkey" AS "ps_suppkey" FROM "partsupp" AS "partsupp" - LEFT JOIN ( - SELECT - 0.5 * SUM("lineitem"."l_quantity") AS "_col_0", - "lineitem"."l_partkey" AS "_u_1", - "lineitem"."l_suppkey" AS "_u_2" - FROM "lineitem" AS "lineitem" - WHERE - "lineitem"."l_shipdate" < CAST('1995-01-01' AS DATE) - AND "lineitem"."l_shipdate" >= CAST('1994-01-01' AS DATE) - GROUP BY - "lineitem"."l_partkey", - "lineitem"."l_suppkey" - ) AS "_u_0" + LEFT JOIN "_u_0" AS "_u_0" ON "_u_0"."_u_1" = "partsupp"."ps_partkey" AND "_u_0"."_u_2" = "partsupp"."ps_suppkey" - LEFT JOIN ( - SELECT - "part"."p_partkey" AS "p_partkey" - FROM "part" AS "part" - WHERE - "part"."p_name" LIKE 'forest%' - GROUP BY - "part"."p_partkey" - ) AS "_u_3" + LEFT JOIN "_u_3" AS "_u_3" ON "partsupp"."ps_partkey" = "_u_3"."p_partkey" WHERE "partsupp"."ps_availqty" > "_u_0"."_col_0" @@ -1240,13 +1237,18 @@ LEFT JOIN ( AND NOT "_u_3"."p_partkey" IS NULL GROUP BY "partsupp"."ps_suppkey" -) AS "_u_4" +) +SELECT + "supplier"."s_name" AS "s_name", + "supplier"."s_address" AS "s_address" +FROM "supplier" AS "supplier" +LEFT JOIN "_u_4" AS "_u_4" ON "supplier"."s_suppkey" = "_u_4"."ps_suppkey" JOIN "nation" AS "nation" - ON "supplier"."s_nationkey" = "nation"."n_nationkey" + ON "nation"."n_name" = 'CANADA' + AND "supplier"."s_nationkey" = "nation"."n_nationkey" WHERE - "nation"."n_name" = 'CANADA' - AND NOT "_u_4"."ps_suppkey" IS NULL + NOT "_u_4"."ps_suppkey" IS NULL ORDER BY "s_name"; @@ -1294,22 +1296,14 @@ order by s_name limit 100; -SELECT - "supplier"."s_name" AS "s_name", - COUNT(*) AS "numwait" -FROM "supplier" AS "supplier" -JOIN "lineitem" AS "lineitem" - ON "supplier"."s_suppkey" = "lineitem"."l_suppkey" -LEFT JOIN ( +WITH "_u_0" AS ( SELECT "l2"."l_orderkey" AS "l_orderkey", ARRAY_AGG("l2"."l_suppkey") AS "_u_1" FROM "lineitem" AS "l2" GROUP BY "l2"."l_orderkey" -) AS "_u_0" - ON "_u_0"."l_orderkey" = "lineitem"."l_orderkey" -LEFT JOIN ( +), "_u_2" AS ( SELECT "l3"."l_orderkey" AS "l_orderkey", ARRAY_AGG("l3"."l_suppkey") AS "_u_3" @@ -1318,20 +1312,29 @@ LEFT JOIN ( "l3"."l_receiptdate" > "l3"."l_commitdate" GROUP BY "l3"."l_orderkey" -) AS "_u_2" +) +SELECT + "supplier"."s_name" AS "s_name", + COUNT(*) AS "numwait" +FROM "supplier" AS "supplier" +JOIN "lineitem" AS "lineitem" + ON "lineitem"."l_receiptdate" > "lineitem"."l_commitdate" + AND "supplier"."s_suppkey" = "lineitem"."l_suppkey" +LEFT JOIN "_u_0" AS "_u_0" + ON "_u_0"."l_orderkey" = "lineitem"."l_orderkey" +LEFT JOIN "_u_2" AS "_u_2" ON "_u_2"."l_orderkey" = "lineitem"."l_orderkey" JOIN "orders" AS "orders" ON "orders"."o_orderkey" = "lineitem"."l_orderkey" + AND "orders"."o_orderstatus" = 'F' JOIN "nation" AS "nation" - ON "supplier"."s_nationkey" = "nation"."n_nationkey" + ON "nation"."n_name" = 'SAUDI ARABIA' + AND "supplier"."s_nationkey" = "nation"."n_nationkey" WHERE ( "_u_2"."l_orderkey" IS NULL OR NOT ARRAY_ANY("_u_2"."_u_3", "_x" -> "_x" <> "lineitem"."l_suppkey") ) - AND "lineitem"."l_receiptdate" > "lineitem"."l_commitdate" - AND "nation"."n_name" = 'SAUDI ARABIA' - AND "orders"."o_orderstatus" = 'F' AND ARRAY_ANY("_u_0"."_u_1", "_x" -> "_x" <> "lineitem"."l_suppkey") AND NOT "_u_0"."l_orderkey" IS NULL GROUP BY @@ -1381,18 +1384,19 @@ group by cntrycode order by cntrycode; -SELECT - SUBSTRING("customer"."c_phone", 1, 2) AS "cntrycode", - COUNT(*) AS "numcust", - SUM("customer"."c_acctbal") AS "totacctbal" -FROM "customer" AS "customer" -LEFT JOIN ( +WITH "_u_0" AS ( SELECT "orders"."o_custkey" AS "_u_1" FROM "orders" AS "orders" GROUP BY "orders"."o_custkey" -) AS "_u_0" +) +SELECT + SUBSTRING("customer"."c_phone", 1, 2) AS "cntrycode", + COUNT(*) AS "numcust", + SUM("customer"."c_acctbal") AS "totacctbal" +FROM "customer" AS "customer" +LEFT JOIN "_u_0" AS "_u_0" ON "_u_0"."_u_1" = "customer"."c_custkey" WHERE "_u_0"."_u_1" IS NULL diff --git a/tests/fixtures/pretty.sql b/tests/fixtures/pretty.sql index 5ed74f4..19a7451 100644 --- a/tests/fixtures/pretty.sql +++ b/tests/fixtures/pretty.sql @@ -264,22 +264,3 @@ CREATE TABLE "t_customer_account" ( "account_no" VARCHAR(100) ); -CREATE TABLE "t_customer_account" ( - "id" int(11) NOT NULL AUTO_INCREMENT, - "customer_id" int(11) DEFAULT NULL COMMENT '客户id', - "bank" varchar(100) COLLATE utf8_bin DEFAULT NULL COMMENT '行别', - "account_no" varchar(100) COLLATE utf8_bin DEFAULT NULL COMMENT '账号', - PRIMARY KEY ("id") -) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='客户账户表'; -CREATE TABLE "t_customer_account" ( - "id" INT(11) NOT NULL AUTO_INCREMENT, - "customer_id" INT(11) DEFAULT NULL COMMENT '客户id', - "bank" VARCHAR(100) COLLATE utf8_bin DEFAULT NULL COMMENT '行别', - "account_no" VARCHAR(100) COLLATE utf8_bin DEFAULT NULL COMMENT '账号', - PRIMARY KEY("id") -) -ENGINE=InnoDB -AUTO_INCREMENT=1 -DEFAULT CHARACTER SET=utf8 -COLLATE=utf8_bin -COMMENT='客户账户表'; diff --git a/tests/test_build.py b/tests/test_build.py index 18c0e47..b5d657c 100644 --- a/tests/test_build.py +++ b/tests/test_build.py @@ -270,7 +270,7 @@ class TestBuild(unittest.TestCase): lambda: parse_one("SELECT * FROM y") .assert_is(exp.Select) .ctas("foo.x", properties={"format": "parquet", "y": "2"}), - "CREATE TABLE foo.x STORED AS PARQUET TBLPROPERTIES ('y' = '2') AS SELECT * FROM y", + "CREATE TABLE foo.x STORED AS PARQUET TBLPROPERTIES ('y'='2') AS SELECT * FROM y", "hive", ), (lambda: and_("x=1", "y=1"), "x = 1 AND y = 1"), @@ -308,6 +308,18 @@ class TestBuild(unittest.TestCase): lambda: exp.subquery("select x from tbl UNION select x from bar", "unioned").select("x"), "SELECT x FROM (SELECT x FROM tbl UNION SELECT x FROM bar) AS unioned", ), + ( + lambda: exp.update("tbl", {"x": None, "y": {"x": 1}}), + "UPDATE tbl SET x = NULL, y = MAP('x', 1)", + ), + ( + lambda: exp.update("tbl", {"x": 1}, where="y > 0"), + "UPDATE tbl SET x = 1 WHERE y > 0", + ), + ( + lambda: exp.update("tbl", {"x": 1}, from_="tbl2"), + "UPDATE tbl SET x = 1 FROM tbl2", + ), ]: with self.subTest(sql): self.assertEqual(expression().sql(dialect[0] if dialect else None), sql) diff --git a/tests/test_expressions.py b/tests/test_expressions.py index 64ad02d..cc41307 100644 --- a/tests/test_expressions.py +++ b/tests/test_expressions.py @@ -27,6 +27,8 @@ class TestExpressions(unittest.TestCase): parse_one("ROW() OVER (partition BY y)"), ) self.assertEqual(parse_one("TO_DATE(x)", read="hive"), parse_one("ts_or_ds_to_date(x)")) + self.assertEqual(exp.Table(pivots=[]), exp.Table()) + self.assertNotEqual(exp.Table(pivots=[None]), exp.Table()) def test_find(self): expression = parse_one("CREATE TABLE x STORED AS PARQUET AS SELECT * FROM y") @@ -280,6 +282,19 @@ class TestExpressions(unittest.TestCase): expression.find(exp.Table).replace(parse_one("y")) self.assertEqual(expression.sql(), "SELECT c, b FROM y") + def test_pop(self): + expression = parse_one("SELECT a, b FROM x") + expression.find(exp.Column).pop() + self.assertEqual(expression.sql(), "SELECT b FROM x") + expression.find(exp.Column).pop() + self.assertEqual(expression.sql(), "SELECT FROM x") + expression.pop() + self.assertEqual(expression.sql(), "SELECT FROM x") + + expression = parse_one("WITH x AS (SELECT a FROM x) SELECT * FROM x") + expression.find(exp.With).pop() + self.assertEqual(expression.sql(), "SELECT * FROM x") + def test_walk(self): expression = parse_one("SELECT * FROM (SELECT * FROM x)") self.assertEqual(len(list(expression.walk())), 9) @@ -316,6 +331,7 @@ class TestExpressions(unittest.TestCase): self.assertIsInstance(parse_one("MAX(a)"), exp.Max) self.assertIsInstance(parse_one("MIN(a)"), exp.Min) self.assertIsInstance(parse_one("MONTH(a)"), exp.Month) + self.assertIsInstance(parse_one("POSITION(' ' IN a)"), exp.StrPosition) self.assertIsInstance(parse_one("POW(a, 2)"), exp.Pow) self.assertIsInstance(parse_one("POWER(a, 2)"), exp.Pow) self.assertIsInstance(parse_one("QUANTILE(a, 0.90)"), exp.Quantile) @@ -420,7 +436,7 @@ class TestExpressions(unittest.TestCase): exp.Properties.from_dict( { "FORMAT": "parquet", - "PARTITIONED_BY": [exp.to_identifier("a"), exp.to_identifier("b")], + "PARTITIONED_BY": (exp.to_identifier("a"), exp.to_identifier("b")), "custom": 1, "TABLE_FORMAT": exp.to_identifier("test_format"), "ENGINE": None, @@ -444,4 +460,17 @@ class TestExpressions(unittest.TestCase): ), ) - self.assertRaises(ValueError, exp.Properties.from_dict, {"FORMAT": {"key": "value"}}) + self.assertRaises(ValueError, exp.Properties.from_dict, {"FORMAT": object}) + + def test_convert(self): + for value, expected in [ + (1, "1"), + ("1", "'1'"), + (None, "NULL"), + (True, "TRUE"), + ((1, "2", None), "(1, '2', NULL)"), + ([1, "2", None], "ARRAY(1, '2', NULL)"), + ({"x": None}, "MAP('x', NULL)"), + ]: + with self.subTest(value): + self.assertEqual(exp.convert(value).sql(), expected) diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py index 102e141..8d4aecc 100644 --- a/tests/test_optimizer.py +++ b/tests/test_optimizer.py @@ -1,9 +1,11 @@ import unittest +from functools import partial -from sqlglot import optimizer, parse_one, table +from sqlglot import exp, optimizer, parse_one, table from sqlglot.errors import OptimizeError +from sqlglot.optimizer.annotate_types import annotate_types from sqlglot.optimizer.schema import MappingSchema, ensure_schema -from sqlglot.optimizer.scope import traverse_scope +from sqlglot.optimizer.scope import build_scope, traverse_scope from tests.helpers import TPCH_SCHEMA, load_sql_fixture_pairs, load_sql_fixtures @@ -27,11 +29,17 @@ class TestOptimizer(unittest.TestCase): } def check_file(self, file, func, pretty=False, **kwargs): - for meta, sql, expected in load_sql_fixture_pairs(f"optimizer/{file}.sql"): + for i, (meta, sql, expected) in enumerate(load_sql_fixture_pairs(f"optimizer/{file}.sql"), start=1): dialect = meta.get("dialect") - with self.subTest(sql): + leave_tables_isolated = meta.get("leave_tables_isolated") + + func_kwargs = {**kwargs} + if leave_tables_isolated is not None: + func_kwargs["leave_tables_isolated"] = leave_tables_isolated.lower() in ("true", "1") + + with self.subTest(f"{i}, {sql}"): self.assertEqual( - func(parse_one(sql, read=dialect), **kwargs).sql(pretty=pretty, dialect=dialect), + func(parse_one(sql, read=dialect), **func_kwargs).sql(pretty=pretty, dialect=dialect), expected, ) @@ -123,21 +131,20 @@ class TestOptimizer(unittest.TestCase): optimizer.optimize_joins.optimize_joins, ) - def test_eliminate_subqueries(self): - self.check_file( - "eliminate_subqueries", - optimizer.eliminate_subqueries.eliminate_subqueries, - pretty=True, + def test_merge_subqueries(self): + optimize = partial( + optimizer.optimize, + rules=[ + optimizer.qualify_tables.qualify_tables, + optimizer.qualify_columns.qualify_columns, + optimizer.merge_subqueries.merge_subqueries, + ], ) - def test_merge_derived_tables(self): - def optimize(expression, **kwargs): - expression = optimizer.qualify_tables.qualify_tables(expression) - expression = optimizer.qualify_columns.qualify_columns(expression, **kwargs) - expression = optimizer.merge_derived_tables.merge_derived_tables(expression) - return expression + self.check_file("merge_subqueries", optimize, schema=self.schema) - self.check_file("merge_derived_tables", optimize, schema=self.schema) + def test_eliminate_subqueries(self): + self.check_file("eliminate_subqueries", optimizer.eliminate_subqueries.eliminate_subqueries) def test_tpch(self): self.check_file("tpc-h/tpc-h", optimizer.optimize, schema=TPCH_SCHEMA, pretty=True) @@ -257,17 +264,73 @@ FROM READ_CSV('tests/fixtures/optimizer/tpc-h/nation.csv.gz', 'delimiter', '|') ON s.b = r.b WHERE s.b > (SELECT MAX(x.a) FROM x WHERE x.b = s.b) """ - scopes = traverse_scope(parse_one(sql)) - self.assertEqual(len(scopes), 5) - self.assertEqual(scopes[0].expression.sql(), "SELECT x.b FROM x") - self.assertEqual(scopes[1].expression.sql(), "SELECT y.b FROM y") - self.assertEqual(scopes[2].expression.sql(), "SELECT MAX(x.a) FROM x WHERE x.b = s.b") - self.assertEqual(scopes[3].expression.sql(), "SELECT y.c AS b FROM y") - self.assertEqual(scopes[4].expression.sql(), parse_one(sql).sql()) - - self.assertEqual(set(scopes[4].sources), {"q", "r", "s"}) - self.assertEqual(len(scopes[4].columns), 6) - self.assertEqual(set(c.table for c in scopes[4].columns), {"r", "s"}) - self.assertEqual(scopes[4].source_columns("q"), []) - self.assertEqual(len(scopes[4].source_columns("r")), 2) - self.assertEqual(set(c.table for c in scopes[4].source_columns("r")), {"r"}) + for scopes in traverse_scope(parse_one(sql)), list(build_scope(parse_one(sql)).traverse()): + self.assertEqual(len(scopes), 5) + self.assertEqual(scopes[0].expression.sql(), "SELECT x.b FROM x") + self.assertEqual(scopes[1].expression.sql(), "SELECT y.b FROM y") + self.assertEqual(scopes[2].expression.sql(), "SELECT MAX(x.a) FROM x WHERE x.b = s.b") + self.assertEqual(scopes[3].expression.sql(), "SELECT y.c AS b FROM y") + self.assertEqual(scopes[4].expression.sql(), parse_one(sql).sql()) + + self.assertEqual(set(scopes[4].sources), {"q", "r", "s"}) + self.assertEqual(len(scopes[4].columns), 6) + self.assertEqual(set(c.table for c in scopes[4].columns), {"r", "s"}) + self.assertEqual(scopes[4].source_columns("q"), []) + self.assertEqual(len(scopes[4].source_columns("r")), 2) + self.assertEqual(set(c.table for c in scopes[4].source_columns("r")), {"r"}) + + def test_literal_type_annotation(self): + tests = { + "SELECT 5": exp.DataType.Type.INT, + "SELECT 5.3": exp.DataType.Type.DOUBLE, + "SELECT 'bla'": exp.DataType.Type.VARCHAR, + "5": exp.DataType.Type.INT, + "5.3": exp.DataType.Type.DOUBLE, + "'bla'": exp.DataType.Type.VARCHAR, + } + + for sql, target_type in tests.items(): + expression = parse_one(sql) + annotated_expression = annotate_types(expression) + + self.assertEqual(annotated_expression.find(exp.Literal).type, target_type) + + def test_boolean_type_annotation(self): + tests = { + "SELECT TRUE": exp.DataType.Type.BOOLEAN, + "FALSE": exp.DataType.Type.BOOLEAN, + } + + for sql, target_type in tests.items(): + expression = parse_one(sql) + annotated_expression = annotate_types(expression) + + self.assertEqual(annotated_expression.find(exp.Boolean).type, target_type) + + def test_cast_type_annotation(self): + expression = parse_one("CAST('2020-01-01' AS TIMESTAMPTZ(9))") + annotate_types(expression) + + self.assertEqual(expression.type, exp.DataType.Type.TIMESTAMPTZ) + self.assertEqual(expression.this.type, exp.DataType.Type.VARCHAR) + self.assertEqual(expression.args["to"].type, exp.DataType.Type.TIMESTAMPTZ) + self.assertEqual(expression.args["to"].expressions[0].type, exp.DataType.Type.INT) + + def test_cache_annotation(self): + expression = parse_one("CACHE LAZY TABLE x OPTIONS('storageLevel' = 'value') AS SELECT 1") + annotated_expression = annotate_types(expression) + + self.assertEqual(annotated_expression.expression.expressions[0].type, exp.DataType.Type.INT) + + def test_binary_annotation(self): + expression = parse_one("SELECT 0.0 + (2 + 3)") + annotate_types(expression) + + expression = expression.expressions[0] + + self.assertEqual(expression.type, exp.DataType.Type.DOUBLE) + self.assertEqual(expression.left.type, exp.DataType.Type.DOUBLE) + self.assertEqual(expression.right.type, exp.DataType.Type.INT) + self.assertEqual(expression.right.this.type, exp.DataType.Type.INT) + self.assertEqual(expression.right.this.left.type, exp.DataType.Type.INT) + self.assertEqual(expression.right.this.right.type, exp.DataType.Type.INT) diff --git a/tests/test_parser.py b/tests/test_parser.py index 9e430e2..4c46531 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -21,6 +21,11 @@ class TestParser(unittest.TestCase): self.assertIsNotNone(parse_one("date").find(exp.Column)) + def test_float(self): + self.assertEqual(parse_one(".2"), parse_one("0.2")) + self.assertEqual(parse_one("int 1"), parse_one("CAST(1 AS INT)")) + self.assertEqual(parse_one("int.5"), parse_one("CAST(0.5 AS INT)")) + def test_table(self): tables = [t.sql() for t in parse_one("select * from a, b.c, .d").find_all(exp.Table)] self.assertEqual(tables, ["a", "b.c", "d"]) diff --git a/tests/test_transforms.py b/tests/test_transforms.py index 2030109..1928d2c 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -6,11 +6,32 @@ from sqlglot.transforms import unalias_group class TestTime(unittest.TestCase): def validate(self, transform, sql, target): - self.assertEqual(parse_one(sql).transform(transform).sql(), target) + with self.subTest(sql): + self.assertEqual(parse_one(sql).transform(transform).sql(), target) def test_unalias_group(self): self.validate( unalias_group, "SELECT a, b AS b, c AS c, 4 FROM x GROUP BY a, b, x.c, 4", - "SELECT a, b AS b, c AS c, 4 FROM x GROUP BY a, 2, x.c, 4", + "SELECT a, b AS b, c AS c, 4 FROM x GROUP BY a, b, x.c, 4", + ) + self.validate( + unalias_group, + "SELECT TO_DATE(the_date) AS the_date, CUSTOM_UDF(other_col) AS other_col, last_col AS aliased_last, COUNT(*) AS the_count FROM x GROUP BY TO_DATE(the_date), CUSTOM_UDF(other_col), aliased_last", + "SELECT TO_DATE(the_date) AS the_date, CUSTOM_UDF(other_col) AS other_col, last_col AS aliased_last, COUNT(*) AS the_count FROM x GROUP BY TO_DATE(the_date), CUSTOM_UDF(other_col), 3", + ) + self.validate( + unalias_group, + "SELECT SOME_UDF(TO_DATE(the_date)) AS the_date, COUNT(*) AS the_count FROM x GROUP BY SOME_UDF(TO_DATE(the_date))", + "SELECT SOME_UDF(TO_DATE(the_date)) AS the_date, COUNT(*) AS the_count FROM x GROUP BY SOME_UDF(TO_DATE(the_date))", + ) + self.validate( + unalias_group, + "SELECT SOME_UDF(TO_DATE(the_date)) AS new_date, COUNT(*) AS the_count FROM x GROUP BY new_date", + "SELECT SOME_UDF(TO_DATE(the_date)) AS new_date, COUNT(*) AS the_count FROM x GROUP BY 1", + ) + self.validate( + unalias_group, + "SELECT the_date AS the_date, COUNT(*) AS the_count FROM x GROUP BY the_date", + "SELECT the_date AS the_date, COUNT(*) AS the_count FROM x GROUP BY the_date", ) -- cgit v1.2.3