From 90150543f9314be683d22a16339effd774192f6d Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Thu, 22 Sep 2022 06:31:28 +0200 Subject: Merging upstream version 6.1.1. Signed-off-by: Daniel Baumann --- tests/dialects/test_dialect.py | 133 +++- tests/dialects/test_hive.py | 15 + tests/dialects/test_mysql.py | 52 +- tests/dialects/test_postgres.py | 93 ++- tests/dialects/test_redshift.py | 64 ++ tests/dialects/test_snowflake.py | 32 + tests/dialects/test_sqlite.py | 18 + tests/fixtures/identity.sql | 7 + tests/fixtures/optimizer/merge_derived_tables.sql | 63 ++ tests/fixtures/optimizer/optimizer.sql | 57 +- tests/fixtures/optimizer/tpc-h/tpc-h.sql | 761 +++++----------------- tests/helpers.py | 8 +- tests/test_build.py | 127 +--- tests/test_executor.py | 20 +- tests/test_expressions.py | 53 +- tests/test_optimizer.py | 33 +- tests/test_parser.py | 37 +- tests/test_transpile.py | 51 +- 18 files changed, 749 insertions(+), 875 deletions(-) create mode 100644 tests/dialects/test_redshift.py create mode 100644 tests/fixtures/optimizer/merge_derived_tables.sql (limited to 'tests') diff --git a/tests/dialects/test_dialect.py b/tests/dialects/test_dialect.py index 3993565..6b7bfd3 100644 --- a/tests/dialects/test_dialect.py +++ b/tests/dialects/test_dialect.py @@ -36,9 +36,7 @@ class Validator(unittest.TestCase): for read_dialect, read_sql in (read or {}).items(): with self.subTest(f"{read_dialect} -> {sql}"): self.assertEqual( - parse_one(read_sql, read_dialect).sql( - self.dialect, unsupported_level=ErrorLevel.IGNORE - ), + parse_one(read_sql, read_dialect).sql(self.dialect, unsupported_level=ErrorLevel.IGNORE), sql, ) @@ -46,9 +44,7 @@ class Validator(unittest.TestCase): with self.subTest(f"{sql} -> {write_dialect}"): if write_sql is UnsupportedError: with self.assertRaises(UnsupportedError): - expression.sql( - write_dialect, unsupported_level=ErrorLevel.RAISE - ) + expression.sql(write_dialect, unsupported_level=ErrorLevel.RAISE) else: self.assertEqual( expression.sql( @@ -82,11 +78,19 @@ class TestDialect(Validator): "oracle": "CAST(a AS CLOB)", "postgres": "CAST(a AS TEXT)", "presto": "CAST(a AS VARCHAR)", + "redshift": "CAST(a AS TEXT)", "snowflake": "CAST(a AS TEXT)", "spark": "CAST(a AS STRING)", "starrocks": "CAST(a AS STRING)", }, ) + self.validate_all( + "CAST(a AS DATETIME)", + write={ + "postgres": "CAST(a AS TIMESTAMP)", + "sqlite": "CAST(a AS DATETIME)", + }, + ) self.validate_all( "CAST(a AS STRING)", write={ @@ -97,6 +101,7 @@ class TestDialect(Validator): "oracle": "CAST(a AS CLOB)", "postgres": "CAST(a AS TEXT)", "presto": "CAST(a AS VARCHAR)", + "redshift": "CAST(a AS TEXT)", "snowflake": "CAST(a AS TEXT)", "spark": "CAST(a AS STRING)", "starrocks": "CAST(a AS STRING)", @@ -112,6 +117,7 @@ class TestDialect(Validator): "oracle": "CAST(a AS VARCHAR2)", "postgres": "CAST(a AS VARCHAR)", "presto": "CAST(a AS VARCHAR)", + "redshift": "CAST(a AS VARCHAR)", "snowflake": "CAST(a AS VARCHAR)", "spark": "CAST(a AS STRING)", "starrocks": "CAST(a AS VARCHAR)", @@ -127,6 +133,7 @@ class TestDialect(Validator): "oracle": "CAST(a AS VARCHAR2(3))", "postgres": "CAST(a AS VARCHAR(3))", "presto": "CAST(a AS VARCHAR(3))", + "redshift": "CAST(a AS VARCHAR(3))", "snowflake": "CAST(a AS VARCHAR(3))", "spark": "CAST(a AS VARCHAR(3))", "starrocks": "CAST(a AS VARCHAR(3))", @@ -142,12 +149,26 @@ class TestDialect(Validator): "oracle": "CAST(a AS NUMBER)", "postgres": "CAST(a AS SMALLINT)", "presto": "CAST(a AS SMALLINT)", + "redshift": "CAST(a AS SMALLINT)", "snowflake": "CAST(a AS SMALLINT)", "spark": "CAST(a AS SHORT)", "sqlite": "CAST(a AS INTEGER)", "starrocks": "CAST(a AS SMALLINT)", }, ) + self.validate_all( + "TRY_CAST(a AS DOUBLE)", + read={ + "postgres": "CAST(a AS DOUBLE PRECISION)", + "redshift": "CAST(a AS DOUBLE PRECISION)", + }, + write={ + "duckdb": "TRY_CAST(a AS DOUBLE)", + "postgres": "CAST(a AS DOUBLE PRECISION)", + "redshift": "CAST(a AS DOUBLE PRECISION)", + }, + ) + self.validate_all( "CAST(a AS DOUBLE)", write={ @@ -159,16 +180,32 @@ class TestDialect(Validator): "oracle": "CAST(a AS DOUBLE PRECISION)", "postgres": "CAST(a AS DOUBLE PRECISION)", "presto": "CAST(a AS DOUBLE)", + "redshift": "CAST(a AS DOUBLE PRECISION)", "snowflake": "CAST(a AS DOUBLE)", "spark": "CAST(a AS DOUBLE)", "starrocks": "CAST(a AS DOUBLE)", }, ) self.validate_all( - "CAST(a AS TIMESTAMP)", write={"starrocks": "CAST(a AS DATETIME)"} + "CAST('1 DAY' AS INTERVAL)", + write={ + "postgres": "CAST('1 DAY' AS INTERVAL)", + "redshift": "CAST('1 DAY' AS INTERVAL)", + }, ) self.validate_all( - "CAST(a AS TIMESTAMPTZ)", write={"starrocks": "CAST(a AS DATETIME)"} + "CAST(a AS TIMESTAMP)", + write={ + "starrocks": "CAST(a AS DATETIME)", + "redshift": "CAST(a AS TIMESTAMP)", + }, + ) + self.validate_all( + "CAST(a AS TIMESTAMPTZ)", + write={ + "starrocks": "CAST(a AS DATETIME)", + "redshift": "CAST(a AS TIMESTAMPTZ)", + }, ) self.validate_all("CAST(a AS TINYINT)", write={"oracle": "CAST(a AS NUMBER)"}) self.validate_all("CAST(a AS SMALLINT)", write={"oracle": "CAST(a AS NUMBER)"}) @@ -552,6 +589,7 @@ class TestDialect(Validator): write={ "bigquery": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname", "duckdb": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname", + "oracle": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname", "presto": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname, lname NULLS FIRST", "hive": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname", "spark": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname", @@ -566,6 +604,7 @@ class TestDialect(Validator): "presto": "JSON_EXTRACT(x, 'y')", }, write={ + "oracle": "JSON_EXTRACT(x, 'y')", "postgres": "x->'y'", "presto": "JSON_EXTRACT(x, 'y')", }, @@ -623,6 +662,37 @@ class TestDialect(Validator): }, ) + # https://dev.mysql.com/doc/refman/8.0/en/join.html + # https://www.postgresql.org/docs/current/queries-table-expressions.html + def test_joined_tables(self): + self.validate_identity("SELECT * FROM (tbl1 LEFT JOIN tbl2 ON 1 = 1)") + self.validate_identity("SELECT * FROM (tbl1 JOIN tbl2 JOIN tbl3)") + self.validate_identity("SELECT * FROM (tbl1 JOIN (tbl2 JOIN tbl3) ON bla = foo)") + self.validate_identity("SELECT * FROM (tbl1 JOIN LATERAL (SELECT * FROM bla) AS tbl)") + + self.validate_all( + "SELECT * FROM (tbl1 LEFT JOIN tbl2 ON 1 = 1)", + write={ + "postgres": "SELECT * FROM (tbl1 LEFT JOIN tbl2 ON 1 = 1)", + "mysql": "SELECT * FROM (tbl1 LEFT JOIN tbl2 ON 1 = 1)", + }, + ) + self.validate_all( + "SELECT * FROM (tbl1 JOIN LATERAL (SELECT * FROM bla) AS tbl)", + write={ + "postgres": "SELECT * FROM (tbl1 JOIN LATERAL (SELECT * FROM bla) AS tbl)", + "mysql": "SELECT * FROM (tbl1 JOIN LATERAL (SELECT * FROM bla) AS tbl)", + }, + ) + + def test_lateral_subquery(self): + self.validate_identity( + "SELECT art FROM tbl1 INNER JOIN LATERAL (SELECT art FROM tbl2) AS tbl2 ON tbl1.art = tbl2.art" + ) + self.validate_identity( + "SELECT * FROM tbl AS t LEFT JOIN LATERAL (SELECT * FROM b WHERE b.t_id = t.t_id) AS t ON TRUE" + ) + def test_set_operators(self): self.validate_all( "SELECT * FROM a UNION SELECT * FROM b", @@ -731,6 +801,9 @@ class TestDialect(Validator): ) def test_operators(self): + self.validate_identity("some.column LIKE 'foo' || another.column || 'bar' || LOWER(x)") + self.validate_identity("some.column LIKE 'foo' + another.column + 'bar'") + self.validate_all( "x ILIKE '%y'", read={ @@ -874,16 +947,8 @@ class TestDialect(Validator): "spark": "FILTER(the_array, x -> x > 0)", }, ) - self.validate_all( - "SELECT a AS b FROM x GROUP BY b", - write={ - "duckdb": "SELECT a AS b FROM x GROUP BY b", - "presto": "SELECT a AS b FROM x GROUP BY 1", - "hive": "SELECT a AS b FROM x GROUP BY 1", - "oracle": "SELECT a AS b FROM x GROUP BY 1", - "spark": "SELECT a AS b FROM x GROUP BY 1", - }, - ) + + def test_limit(self): self.validate_all( "SELECT x FROM y LIMIT 10", write={ @@ -915,6 +980,7 @@ class TestDialect(Validator): read={ "clickhouse": '`x` + "y"', "sqlite": '`x` + "y"', + "redshift": '"x" + "y"', }, ) self.validate_all( @@ -977,5 +1043,36 @@ class TestDialect(Validator): "oracle": "CREATE TABLE t (b1 BLOB, b2 BLOB(1024), c1 CLOB, c2 CLOB(1024))", "postgres": "CREATE TABLE t (b1 BYTEA, b2 BYTEA(1024), c1 TEXT, c2 TEXT(1024))", "sqlite": "CREATE TABLE t (b1 BLOB, b2 BLOB(1024), c1 TEXT, c2 TEXT(1024))", + "redshift": "CREATE TABLE t (b1 VARBYTE, b2 VARBYTE(1024), c1 TEXT, c2 TEXT(1024))", + }, + ) + + def test_alias(self): + self.validate_all( + "SELECT a AS b FROM x GROUP BY b", + write={ + "duckdb": "SELECT a AS b FROM x GROUP BY b", + "presto": "SELECT a AS b FROM x GROUP BY 1", + "hive": "SELECT a AS b FROM x GROUP BY 1", + "oracle": "SELECT a AS b FROM x GROUP BY 1", + "spark": "SELECT a AS b FROM x GROUP BY 1", + }, + ) + self.validate_all( + "SELECT y x FROM my_table t", + write={ + "hive": "SELECT y AS x FROM my_table AS t", + "oracle": "SELECT y AS x FROM my_table t", + "postgres": "SELECT y AS x FROM my_table AS t", + "sqlite": "SELECT y AS x FROM my_table AS t", + }, + ) + self.validate_all( + "WITH cte1 AS (SELECT a, b FROM table1), cte2 AS (SELECT c, e AS d FROM table2) SELECT b, d AS dd FROM cte1 AS t JOIN cte2 WHERE cte1.a = cte2.c", + write={ + "hive": "WITH cte1 AS (SELECT a, b FROM table1), cte2 AS (SELECT c, e AS d FROM table2) SELECT b, d AS dd FROM cte1 AS t JOIN cte2 WHERE cte1.a = cte2.c", + "oracle": "WITH cte1 AS (SELECT a, b FROM table1), cte2 AS (SELECT c, e AS d FROM table2) SELECT b, d AS dd FROM cte1 t JOIN cte2 WHERE cte1.a = cte2.c", + "postgres": "WITH cte1 AS (SELECT a, b FROM table1), cte2 AS (SELECT c, e AS d FROM table2) SELECT b, d AS dd FROM cte1 AS t JOIN cte2 WHERE cte1.a = cte2.c", + "sqlite": "WITH cte1 AS (SELECT a, b FROM table1), cte2 AS (SELECT c, e AS d FROM table2) SELECT b, d AS dd FROM cte1 AS t JOIN cte2 WHERE cte1.a = cte2.c", }, ) diff --git a/tests/dialects/test_hive.py b/tests/dialects/test_hive.py index eccd75a..55086e3 100644 --- a/tests/dialects/test_hive.py +++ b/tests/dialects/test_hive.py @@ -341,6 +341,21 @@ class TestHive(Validator): "spark": "PERCENTILE(x, 0.5)", }, ) + self.validate_all( + "PERCENTILE_APPROX(x, 0.5)", + read={ + "hive": "PERCENTILE_APPROX(x, 0.5)", + "presto": "APPROX_PERCENTILE(x, 0.5)", + "duckdb": "APPROX_QUANTILE(x, 0.5)", + "spark": "PERCENTILE_APPROX(x, 0.5)", + }, + write={ + "hive": "PERCENTILE_APPROX(x, 0.5)", + "presto": "APPROX_PERCENTILE(x, 0.5)", + "duckdb": "APPROX_QUANTILE(x, 0.5)", + "spark": "PERCENTILE_APPROX(x, 0.5)", + }, + ) self.validate_all( "APPROX_COUNT_DISTINCT(a)", write={ diff --git a/tests/dialects/test_mysql.py b/tests/dialects/test_mysql.py index ee0c5f5..87a3d64 100644 --- a/tests/dialects/test_mysql.py +++ b/tests/dialects/test_mysql.py @@ -15,6 +15,10 @@ class TestMySQL(Validator): def test_identity(self): self.validate_identity("SELECT CAST(`a`.`b` AS INT) FROM foo") + self.validate_identity("SELECT TRIM(LEADING 'bla' FROM ' XXX ')") + self.validate_identity("SELECT TRIM(TRAILING 'bla' FROM ' XXX ')") + self.validate_identity("SELECT TRIM(BOTH 'bla' FROM ' XXX ')") + self.validate_identity("SELECT TRIM('bla' FROM ' XXX ')") def test_introducers(self): self.validate_all( @@ -27,12 +31,22 @@ class TestMySQL(Validator): }, ) - def test_binary_literal(self): + def test_hexadecimal_literal(self): self.validate_all( "SELECT 0xCC", write={ - "mysql": "SELECT b'11001100'", - "spark": "SELECT X'11001100'", + "mysql": "SELECT x'CC'", + "sqlite": "SELECT x'CC'", + "spark": "SELECT X'CC'", + "trino": "SELECT X'CC'", + "bigquery": "SELECT 0xCC", + "oracle": "SELECT 204", + }, + ) + self.validate_all( + "SELECT X'1A'", + write={ + "mysql": "SELECT x'1A'", }, ) self.validate_all( @@ -41,10 +55,22 @@ class TestMySQL(Validator): "mysql": "SELECT `0xz`", }, ) + + def test_bits_literal(self): + self.validate_all( + "SELECT 0b1011", + write={ + "mysql": "SELECT b'1011'", + "postgres": "SELECT b'1011'", + "oracle": "SELECT 11", + }, + ) self.validate_all( - "SELECT 0XCC", + "SELECT B'1011'", write={ - "mysql": "SELECT 0 AS XCC", + "mysql": "SELECT b'1011'", + "postgres": "SELECT b'1011'", + "oracle": "SELECT 11", }, ) @@ -77,3 +103,19 @@ class TestMySQL(Validator): "mysql": "SELECT 1", }, ) + + def test_mysql(self): + self.validate_all( + "GROUP_CONCAT(DISTINCT x ORDER BY y DESC)", + write={ + "mysql": "GROUP_CONCAT(DISTINCT x ORDER BY y DESC SEPARATOR ',')", + "sqlite": "GROUP_CONCAT(DISTINCT x ORDER BY y DESC)", + }, + ) + self.validate_all( + "GROUP_CONCAT(DISTINCT x ORDER BY y DESC SEPARATOR '')", + write={ + "mysql": "GROUP_CONCAT(DISTINCT x ORDER BY y DESC SEPARATOR '')", + "sqlite": "GROUP_CONCAT(DISTINCT x ORDER BY y DESC, '')", + }, + ) diff --git a/tests/dialects/test_postgres.py b/tests/dialects/test_postgres.py index 15dbfd0..e0934d7 100644 --- a/tests/dialects/test_postgres.py +++ b/tests/dialects/test_postgres.py @@ -8,9 +8,7 @@ class TestPostgres(Validator): def test_ddl(self): self.validate_all( "CREATE TABLE products (product_no INT UNIQUE, name TEXT, price DECIMAL)", - write={ - "postgres": "CREATE TABLE products (product_no INT UNIQUE, name TEXT, price DECIMAL)" - }, + write={"postgres": "CREATE TABLE products (product_no INT UNIQUE, name TEXT, price DECIMAL)"}, ) self.validate_all( "CREATE TABLE products (product_no INT CONSTRAINT must_be_different UNIQUE, name TEXT CONSTRAINT present NOT NULL, price DECIMAL)", @@ -42,11 +40,17 @@ class TestPostgres(Validator): " CONSTRAINT valid_discount CHECK (price > discounted_price))" }, ) + self.validate_all( + "CREATE TABLE products (x INT GENERATED BY DEFAULT AS IDENTITY)", + write={"postgres": "CREATE TABLE products (x INT GENERATED BY DEFAULT AS IDENTITY)"}, + ) + self.validate_all( + "CREATE TABLE products (x INT GENERATED ALWAYS AS IDENTITY)", + write={"postgres": "CREATE TABLE products (x INT GENERATED ALWAYS AS IDENTITY)"}, + ) with self.assertRaises(ParseError): - transpile( - "CREATE TABLE products (price DECIMAL CHECK price > 0)", read="postgres" - ) + transpile("CREATE TABLE products (price DECIMAL CHECK price > 0)", read="postgres") with self.assertRaises(ParseError): transpile( "CREATE TABLE products (price DECIMAL, CHECK price > 1)", @@ -54,11 +58,16 @@ class TestPostgres(Validator): ) def test_postgres(self): - self.validate_all( - "CREATE TABLE x (a INT SERIAL)", - read={"sqlite": "CREATE TABLE x (a INTEGER AUTOINCREMENT)"}, - write={"sqlite": "CREATE TABLE x (a INTEGER AUTOINCREMENT)"}, - ) + self.validate_identity("SELECT CASE WHEN SUBSTRING('abcdefg') IN ('ab') THEN 1 ELSE 0 END") + self.validate_identity("SELECT CASE WHEN SUBSTRING('abcdefg' FROM 1) IN ('ab') THEN 1 ELSE 0 END") + self.validate_identity("SELECT CASE WHEN SUBSTRING('abcdefg' FROM 1 FOR 2) IN ('ab') THEN 1 ELSE 0 END") + self.validate_identity('SELECT * FROM "x" WHERE SUBSTRING("x"."foo" FROM 1 FOR 2) IN (\'mas\')') + self.validate_identity("SELECT * FROM x WHERE SUBSTRING('Thomas' FROM '...$') IN ('mas')") + self.validate_identity("SELECT * FROM x WHERE SUBSTRING('Thomas' FROM '%#\"o_a#\"_' FOR '#') IN ('mas')") + self.validate_identity("SELECT SUBSTRING('bla' + 'foo' || 'bar' FROM 3 - 1 + 5 FOR 4 + SOME_FUNC(arg1, arg2))") + self.validate_identity("SELECT TRIM(' X' FROM ' XXX ')") + self.validate_identity("SELECT TRIM(LEADING 'bla' FROM ' XXX ' COLLATE utf8_bin)") + self.validate_all( "CREATE TABLE x (a UUID, b BYTEA)", write={ @@ -91,3 +100,65 @@ class TestPostgres(Validator): "spark": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname NULLS LAST", }, ) + self.validate_all( + "SELECT CASE WHEN SUBSTRING('abcdefg' FROM 1 FOR 2) IN ('ab') THEN 1 ELSE 0 END", + write={ + "hive": "SELECT CASE WHEN SUBSTRING('abcdefg', 1, 2) IN ('ab') THEN 1 ELSE 0 END", + "spark": "SELECT CASE WHEN SUBSTRING('abcdefg', 1, 2) IN ('ab') THEN 1 ELSE 0 END", + }, + ) + self.validate_all( + "SELECT * FROM x WHERE SUBSTRING(col1 FROM 3 + LENGTH(col1) - 10 FOR 10) IN (col2)", + write={ + "hive": "SELECT * FROM x WHERE SUBSTRING(col1, 3 + LENGTH(col1) - 10, 10) IN (col2)", + "spark": "SELECT * FROM x WHERE SUBSTRING(col1, 3 + LENGTH(col1) - 10, 10) IN (col2)", + }, + ) + self.validate_all( + "SELECT SUBSTRING(CAST(2022 AS CHAR(4)) || LPAD(CAST(3 AS CHAR(2)), 2, '0') FROM 3 FOR 4)", + read={ + "postgres": "SELECT SUBSTRING(2022::CHAR(4) || LPAD(3::CHAR(2), 2, '0') FROM 3 FOR 4)", + }, + ) + self.validate_all( + "SELECT TRIM(BOTH ' XXX ')", + write={ + "mysql": "SELECT TRIM(' XXX ')", + "postgres": "SELECT TRIM(' XXX ')", + "hive": "SELECT TRIM(' XXX ')", + }, + ) + self.validate_all( + "TRIM(LEADING FROM ' XXX ')", + write={ + "mysql": "LTRIM(' XXX ')", + "postgres": "LTRIM(' XXX ')", + "hive": "LTRIM(' XXX ')", + "presto": "LTRIM(' XXX ')", + }, + ) + self.validate_all( + "TRIM(TRAILING FROM ' XXX ')", + write={ + "mysql": "RTRIM(' XXX ')", + "postgres": "RTRIM(' XXX ')", + "hive": "RTRIM(' XXX ')", + "presto": "RTRIM(' XXX ')", + }, + ) + self.validate_all( + "SELECT * FROM foo, LATERAL (SELECT * FROM bar WHERE bar.id = foo.bar_id) AS ss", + read={"postgres": "SELECT * FROM foo, LATERAL (SELECT * FROM bar WHERE bar.id = foo.bar_id) AS ss"}, + ) + self.validate_all( + "SELECT m.name FROM manufacturers AS m LEFT JOIN LATERAL GET_PRODUCT_NAMES(m.id) AS pname ON TRUE WHERE pname IS NULL", + read={ + "postgres": "SELECT m.name FROM manufacturers AS m LEFT JOIN LATERAL GET_PRODUCT_NAMES(m.id) AS pname ON TRUE WHERE pname IS NULL", + }, + ) + self.validate_all( + "SELECT p1.id, p2.id, v1, v2 FROM polygons AS p1, polygons AS p2, LATERAL VERTICES(p1.poly) v1, LATERAL VERTICES(p2.poly) v2 WHERE (v1 <-> v2) < 10 AND p1.id <> p2.id", + read={ + "postgres": "SELECT p1.id, p2.id, v1, v2 FROM polygons p1, polygons p2, LATERAL VERTICES(p1.poly) v1, LATERAL VERTICES(p2.poly) v2 WHERE (v1 <-> v2) < 10 AND p1.id != p2.id", + }, + ) diff --git a/tests/dialects/test_redshift.py b/tests/dialects/test_redshift.py new file mode 100644 index 0000000..1ed2bb6 --- /dev/null +++ b/tests/dialects/test_redshift.py @@ -0,0 +1,64 @@ +from tests.dialects.test_dialect import Validator + + +class TestRedshift(Validator): + dialect = "redshift" + + def test_redshift(self): + self.validate_all( + 'create table "group" ("col" char(10))', + write={ + "redshift": 'CREATE TABLE "group" ("col" CHAR(10))', + "mysql": "CREATE TABLE `group` (`col` CHAR(10))", + }, + ) + self.validate_all( + 'create table if not exists city_slash_id("city/id" integer not null, state char(2) not null)', + write={ + "redshift": 'CREATE TABLE IF NOT EXISTS city_slash_id ("city/id" INTEGER NOT NULL, state CHAR(2) NOT NULL)', + "presto": 'CREATE TABLE IF NOT EXISTS city_slash_id ("city/id" INTEGER NOT NULL, state CHAR(2) NOT NULL)', + }, + ) + self.validate_all( + "SELECT ST_AsEWKT(ST_GeomFromEWKT('SRID=4326;POINT(10 20)')::geography)", + write={ + "redshift": "SELECT ST_ASEWKT(CAST(ST_GEOMFROMEWKT('SRID=4326;POINT(10 20)') AS GEOGRAPHY))", + "bigquery": "SELECT ST_ASEWKT(CAST(ST_GEOMFROMEWKT('SRID=4326;POINT(10 20)') AS GEOGRAPHY))", + }, + ) + self.validate_all( + "SELECT ST_AsEWKT(ST_GeogFromText('LINESTRING(110 40, 2 3, -10 80, -7 9)')::geometry)", + write={ + "redshift": "SELECT ST_ASEWKT(CAST(ST_GEOGFROMTEXT('LINESTRING(110 40, 2 3, -10 80, -7 9)') AS GEOMETRY))", + }, + ) + self.validate_all( + "SELECT 'abc'::BINARY", + write={ + "redshift": "SELECT CAST('abc' AS VARBYTE)", + }, + ) + self.validate_all( + "SELECT * FROM venue WHERE (venuecity, venuestate) IN (('Miami', 'FL'), ('Tampa', 'FL')) ORDER BY venueid", + write={ + "redshift": "SELECT * FROM venue WHERE (venuecity, venuestate) IN (('Miami', 'FL'), ('Tampa', 'FL')) ORDER BY venueid", + }, + ) + self.validate_all( + 'SELECT tablename, "column" FROM pg_table_def WHERE "column" LIKE \'%start\\_%\' LIMIT 5', + write={ + "redshift": 'SELECT tablename, "column" FROM pg_table_def WHERE "column" LIKE \'%start\\\\_%\' LIMIT 5' + }, + ) + + def test_identity(self): + self.validate_identity("CAST('bla' AS SUPER)") + self.validate_identity("CREATE TABLE real1 (realcol REAL)") + self.validate_identity("CAST('foo' AS HLLSKETCH)") + self.validate_identity("SELECT DATEADD(day, 1, 'today')") + self.validate_identity("'abc' SIMILAR TO '(b|c)%'") + self.validate_identity( + "SELECT caldate + INTERVAL '1 second' AS dateplus FROM date WHERE caldate = '12-31-2008'" + ) + self.validate_identity("CREATE TABLE datetable (start_date DATE, end_date DATE)") + self.validate_identity("SELECT COUNT(*) FROM event WHERE eventname LIKE '%Ring%' OR eventname LIKE '%Die%'") diff --git a/tests/dialects/test_snowflake.py b/tests/dialects/test_snowflake.py index 62f78e1..2eeff52 100644 --- a/tests/dialects/test_snowflake.py +++ b/tests/dialects/test_snowflake.py @@ -143,3 +143,35 @@ class TestSnowflake(Validator): "snowflake": r"SELECT 'a \' \\ \\t \\x21 z $ '", }, ) + + def test_null_treatment(self): + self.validate_all( + r"SELECT FIRST_VALUE(TABLE1.COLUMN1) OVER (PARTITION BY RANDOM_COLUMN1, RANDOM_COLUMN2 ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS MY_ALIAS FROM TABLE1", + write={ + "snowflake": r"SELECT FIRST_VALUE(TABLE1.COLUMN1) OVER (PARTITION BY RANDOM_COLUMN1, RANDOM_COLUMN2 ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS MY_ALIAS FROM TABLE1" + }, + ) + self.validate_all( + r"SELECT FIRST_VALUE(TABLE1.COLUMN1 RESPECT NULLS) OVER (PARTITION BY RANDOM_COLUMN1, RANDOM_COLUMN2 ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS MY_ALIAS FROM TABLE1", + write={ + "snowflake": r"SELECT FIRST_VALUE(TABLE1.COLUMN1) OVER (PARTITION BY RANDOM_COLUMN1, RANDOM_COLUMN2 ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS MY_ALIAS FROM TABLE1" + }, + ) + self.validate_all( + r"SELECT FIRST_VALUE(TABLE1.COLUMN1) RESPECT NULLS OVER (PARTITION BY RANDOM_COLUMN1, RANDOM_COLUMN2 ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS MY_ALIAS FROM TABLE1", + write={ + "snowflake": r"SELECT FIRST_VALUE(TABLE1.COLUMN1) RESPECT NULLS OVER (PARTITION BY RANDOM_COLUMN1, RANDOM_COLUMN2 ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS MY_ALIAS FROM TABLE1" + }, + ) + self.validate_all( + r"SELECT FIRST_VALUE(TABLE1.COLUMN1 IGNORE NULLS) OVER (PARTITION BY RANDOM_COLUMN1, RANDOM_COLUMN2 ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS MY_ALIAS FROM TABLE1", + write={ + "snowflake": r"SELECT FIRST_VALUE(TABLE1.COLUMN1 IGNORE NULLS) OVER (PARTITION BY RANDOM_COLUMN1, RANDOM_COLUMN2 ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS MY_ALIAS FROM TABLE1" + }, + ) + self.validate_all( + r"SELECT FIRST_VALUE(TABLE1.COLUMN1) IGNORE NULLS OVER (PARTITION BY RANDOM_COLUMN1, RANDOM_COLUMN2 ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS MY_ALIAS FROM TABLE1", + write={ + "snowflake": r"SELECT FIRST_VALUE(TABLE1.COLUMN1) IGNORE NULLS OVER (PARTITION BY RANDOM_COLUMN1, RANDOM_COLUMN2 ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS MY_ALIAS FROM TABLE1" + }, + ) diff --git a/tests/dialects/test_sqlite.py b/tests/dialects/test_sqlite.py index a0576de..3cc974c 100644 --- a/tests/dialects/test_sqlite.py +++ b/tests/dialects/test_sqlite.py @@ -34,6 +34,7 @@ class TestSQLite(Validator): write={ "sqlite": "CREATE TABLE z (a INTEGER UNIQUE PRIMARY KEY AUTOINCREMENT)", "mysql": "CREATE TABLE z (a INT UNIQUE PRIMARY KEY AUTO_INCREMENT)", + "postgres": "CREATE TABLE z (a INT GENERATED BY DEFAULT AS IDENTITY NOT NULL UNIQUE PRIMARY KEY)", }, ) self.validate_all( @@ -70,3 +71,20 @@ class TestSQLite(Validator): "sqlite": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname", }, ) + + def test_hexadecimal_literal(self): + self.validate_all( + "SELECT 0XCC", + write={ + "sqlite": "SELECT x'CC'", + "mysql": "SELECT x'CC'", + }, + ) + + def test_window_null_treatment(self): + self.validate_all( + "SELECT FIRST_VALUE(Name) OVER (PARTITION BY AlbumId ORDER BY Bytes DESC) AS LargestTrack FROM tracks", + write={ + "sqlite": "SELECT FIRST_VALUE(Name) OVER (PARTITION BY AlbumId ORDER BY Bytes DESC) AS LargestTrack FROM tracks" + }, + ) diff --git a/tests/fixtures/identity.sql b/tests/fixtures/identity.sql index 40f11a2..1b4168c 100644 --- a/tests/fixtures/identity.sql +++ b/tests/fixtures/identity.sql @@ -318,6 +318,9 @@ SELECT 1 FROM a JOIN b ON a.foo = b.bar JOIN c ON a.foo = c.bar SELECT 1 FROM a LEFT JOIN b ON a.foo = b.bar JOIN c ON a.foo = c.bar SELECT 1 FROM a LEFT INNER JOIN b ON a.foo = b.bar SELECT 1 FROM a LEFT OUTER JOIN b ON a.foo = b.bar +SELECT 1 FROM a NATURAL JOIN b +SELECT 1 FROM a NATURAL LEFT JOIN b +SELECT 1 FROM a NATURAL LEFT OUTER JOIN b SELECT 1 FROM a OUTER JOIN b ON a.foo = b.bar SELECT 1 FROM a FULL JOIN b ON a.foo = b.bar SELECT 1 UNION ALL SELECT 2 @@ -329,6 +332,7 @@ SELECT 1 AS delete, 2 AS alter SELECT * FROM (x) SELECT * FROM ((x)) SELECT * FROM ((SELECT 1)) +SELECT * FROM (x LATERAL VIEW EXPLODE(y) JOIN foo) SELECT * FROM (SELECT 1) AS x SELECT * FROM (SELECT 1 UNION SELECT 2) AS x SELECT * FROM (SELECT 1 UNION ALL SELECT 2) AS x @@ -430,6 +434,7 @@ CREATE TEMPORARY VIEW x AS SELECT a FROM d CREATE TEMPORARY VIEW IF NOT EXISTS x AS SELECT a FROM d CREATE TEMPORARY VIEW x AS WITH y AS (SELECT 1) SELECT * FROM y CREATE TABLE z (a INT, b VARCHAR, c VARCHAR(100), d DECIMAL(5, 3)) +CREATE TABLE z (end INT) CREATE TABLE z (a ARRAY, b MAP, c DECIMAL(5, 3)) CREATE TABLE z (a INT, b VARCHAR COMMENT 'z', c VARCHAR(100) COMMENT 'z', d DECIMAL(5, 3)) CREATE TABLE z (a INT(11) DEFAULT UUID()) @@ -466,6 +471,7 @@ CACHE LAZY TABLE x OPTIONS('storageLevel' = 'value') AS SELECT 1 CACHE LAZY TABLE x OPTIONS('storageLevel' = 'value') AS WITH a AS (SELECT 1) SELECT a.* FROM a CACHE LAZY TABLE x AS WITH a AS (SELECT 1) SELECT a.* FROM a CACHE TABLE x AS WITH a AS (SELECT 1) SELECT a.* FROM a +CACHE TABLE x AS (SELECT 1 AS y) CALL catalog.system.iceberg_procedure_name(named_arg_1 => 'arg_1', named_arg_2 => 'arg_2') INSERT OVERWRITE TABLE a.b PARTITION(ds) SELECT x FROM y INSERT OVERWRITE TABLE a.b PARTITION(ds='YYYY-MM-DD') SELECT x FROM y @@ -512,3 +518,4 @@ SELECT ? AS ? FROM x WHERE b BETWEEN ? AND ? GROUP BY ?, 1 LIMIT ? WITH a AS ((SELECT b.foo AS foo, b.bar AS bar FROM b) UNION ALL (SELECT c.foo AS foo, c.bar AS bar FROM c)) SELECT * FROM a WITH a AS ((SELECT 1 AS b) UNION ALL (SELECT 1 AS b)) SELECT * FROM a SELECT (WITH x AS (SELECT 1 AS y) SELECT * FROM x) AS z +SELECT ((SELECT 1) + 1) diff --git a/tests/fixtures/optimizer/merge_derived_tables.sql b/tests/fixtures/optimizer/merge_derived_tables.sql new file mode 100644 index 0000000..c5aa7e9 --- /dev/null +++ b/tests/fixtures/optimizer/merge_derived_tables.sql @@ -0,0 +1,63 @@ +-- Simple +SELECT a, b FROM (SELECT a, b FROM x); +SELECT x.a AS a, x.b AS b FROM x AS x; + +-- Inner table alias is merged +SELECT a, b FROM (SELECT a, b FROM x AS q) AS r; +SELECT q.a AS a, q.b AS b FROM x AS q; + +-- Double nesting +SELECT a, b FROM (SELECT a, b FROM (SELECT a, b FROM x)); +SELECT x.a AS a, x.b AS b FROM x AS x; + +-- WHERE clause is merged +SELECT a, SUM(b) FROM (SELECT a, b FROM x WHERE a > 1) GROUP BY a; +SELECT x.a AS a, SUM(x.b) AS "_col_1" FROM x AS x WHERE x.a > 1 GROUP BY x.a; + +-- Outer query has join +SELECT a, c FROM (SELECT a, b FROM x WHERE a > 1) AS x JOIN y ON x.b = y.b; +SELECT x.a AS a, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b WHERE x.a > 1; + +-- Join on derived table +SELECT a, c FROM x JOIN (SELECT b, c FROM y) AS y ON x.b = y.b; +SELECT x.a AS a, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b; + +-- Inner query has a join +SELECT a, c FROM (SELECT a, c FROM x JOIN y ON x.b = y.b); +SELECT x.a AS a, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b; + +-- Inner query has conflicting name in outer query +SELECT a, c FROM (SELECT q.a, q.b FROM x AS q) AS x JOIN y AS q ON x.b = q.b; +SELECT q_2.a AS a, q.c AS c FROM x AS q_2 JOIN y AS q ON q_2.b = q.b; + +-- Inner query has conflicting name in joined source +SELECT x.a, q.c FROM (SELECT a, x.b FROM x JOIN y AS q ON x.b = q.b) AS x JOIN y AS q ON x.b = q.b; +SELECT x.a AS a, q.c AS c FROM x AS x JOIN y AS q_2 ON x.b = q_2.b JOIN y AS q ON x.b = q.b; + +-- Inner query has multiple conflicting names +SELECT x.a, q.c, r.c FROM (SELECT q.a, r.b FROM x AS q JOIN y AS r ON q.b = r.b) AS x JOIN y AS q ON x.b = q.b JOIN y AS r ON x.b = r.b; +SELECT q_2.a AS a, q.c AS c, r.c AS c FROM x AS q_2 JOIN y AS r_2 ON q_2.b = r_2.b JOIN y AS q ON r_2.b = q.b JOIN y AS r ON r_2.b = r.b; + +-- Inner queries have conflicting names with each other +SELECT r.b FROM (SELECT b FROM x AS x) AS q JOIN (SELECT b FROM x) AS r ON q.b = r.b; +SELECT x_2.b AS b FROM x AS x JOIN x AS x_2 ON x.b = x_2.b; + +-- WHERE clause in joined derived table is merged +SELECT x.a, y.c FROM x JOIN (SELECT b, c FROM y WHERE c > 1) AS y; +SELECT x.a AS a, y.c AS c FROM x AS x JOIN y AS y WHERE y.c > 1; + +-- WHERE clause in outer joined derived table is merged to ON clause +SELECT x.a, y.c FROM x LEFT JOIN (SELECT b, c FROM y WHERE c > 1) AS y; +SELECT x.a AS a, y.c AS c FROM x AS x LEFT JOIN y AS y ON y.c > 1; + +-- Comma JOIN in outer query +SELECT x.a, y.c FROM (SELECT a FROM x) AS x, (SELECT c FROM y) AS y; +SELECT x.a AS a, y.c AS c FROM x AS x, y AS y; + +-- Comma JOIN in inner query +SELECT x.a, x.c FROM (SELECT x.a, z.c FROM x, y AS z) AS x; +SELECT x.a AS a, z.c AS c FROM x AS x CROSS JOIN y AS z; + +-- (Regression) Column in ORDER BY +SELECT * FROM (SELECT * FROM (SELECT * FROM x)) ORDER BY a LIMIT 1; +SELECT x.a AS a, x.b AS b FROM x AS x ORDER BY x.a LIMIT 1; diff --git a/tests/fixtures/optimizer/optimizer.sql b/tests/fixtures/optimizer/optimizer.sql index f7bbdda..f1d0f7d 100644 --- a/tests/fixtures/optimizer/optimizer.sql +++ b/tests/fixtures/optimizer/optimizer.sql @@ -2,11 +2,7 @@ SELECT a, m FROM z LATERAL VIEW EXPLODE([1, 2]) q AS m; SELECT "z"."a" AS "a", "q"."m" AS "m" -FROM ( - SELECT - "z"."a" AS "a" - FROM "z" AS "z" -) AS "z" +FROM "z" AS "z" LATERAL VIEW EXPLODE(ARRAY(1, 2)) q AS "m"; @@ -91,41 +87,26 @@ FROM ( WHERE (TRUE AND TRUE OR 'a' = 'b') AND a > 1 GROUP BY a; SELECT - "d"."a" AS "a", - SUM("d"."b") AS "_col_1" -FROM ( + "x"."a" AS "a", + SUM("y"."b") AS "_col_1" +FROM "x" AS "x" +LEFT JOIN ( SELECT - "x"."a" AS "a", - "y"."b" AS "b" - FROM ( - SELECT - "x"."a" AS "a" - FROM "x" AS "x" - WHERE - "x"."a" > 1 - ) AS "x" - LEFT JOIN ( - SELECT - MAX("y"."b") AS "_col_0", - "y"."a" AS "_u_1" - FROM "y" AS "y" - GROUP BY - "y"."a" - ) AS "_u_0" - ON "x"."a" = "_u_0"."_u_1" - JOIN ( - SELECT - "y"."a" AS "a", - "y"."b" AS "b" - FROM "y" AS "y" - ) AS "y" - ON "x"."a" = "y"."a" - WHERE - "_u_0"."_col_0" >= 0 - AND NOT "_u_0"."_u_1" IS NULL -) AS "d" + MAX("y"."b") AS "_col_0", + "y"."a" AS "_u_1" + FROM "y" AS "y" + GROUP BY + "y"."a" +) AS "_u_0" + ON "x"."a" = "_u_0"."_u_1" +JOIN "y" AS "y" + ON "x"."a" = "y"."a" +WHERE + "_u_0"."_col_0" >= 0 + AND "x"."a" > 1 + AND NOT "_u_0"."_u_1" IS NULL GROUP BY - "d"."a"; + "x"."a"; (SELECT a FROM x) LIMIT 1; ( diff --git a/tests/fixtures/optimizer/tpc-h/tpc-h.sql b/tests/fixtures/optimizer/tpc-h/tpc-h.sql index 482e231..0b6d382 100644 --- a/tests/fixtures/optimizer/tpc-h/tpc-h.sql +++ b/tests/fixtures/optimizer/tpc-h/tpc-h.sql @@ -120,36 +120,16 @@ SELECT "supplier"."s_address" AS "s_address", "supplier"."s_phone" AS "s_phone", "supplier"."s_comment" AS "s_comment" -FROM ( - SELECT - "part"."p_partkey" AS "p_partkey", - "part"."p_mfgr" AS "p_mfgr", - "part"."p_type" AS "p_type", - "part"."p_size" AS "p_size" - FROM "part" AS "part" - WHERE - "part"."p_size" = 15 - AND "part"."p_type" LIKE '%BRASS' -) AS "part" +FROM "part" AS "part" LEFT JOIN ( SELECT MIN("partsupp"."ps_supplycost") AS "_col_0", "partsupp"."ps_partkey" AS "_u_1" FROM "_e_0" AS "partsupp" CROSS JOIN "_e_1" AS "region" - JOIN ( - SELECT - "nation"."n_nationkey" AS "n_nationkey", - "nation"."n_regionkey" AS "n_regionkey" - FROM "nation" AS "nation" - ) AS "nation" + JOIN "nation" AS "nation" ON "nation"."n_regionkey" = "region"."r_regionkey" - JOIN ( - SELECT - "supplier"."s_suppkey" AS "s_suppkey", - "supplier"."s_nationkey" AS "s_nationkey" - FROM "supplier" AS "supplier" - ) AS "supplier" + JOIN "supplier" AS "supplier" ON "supplier"."s_nationkey" = "nation"."n_nationkey" AND "supplier"."s_suppkey" = "partsupp"."ps_suppkey" GROUP BY @@ -157,31 +137,17 @@ LEFT JOIN ( ) AS "_u_0" ON "part"."p_partkey" = "_u_0"."_u_1" CROSS JOIN "_e_1" AS "region" -JOIN ( - SELECT - "nation"."n_nationkey" AS "n_nationkey", - "nation"."n_name" AS "n_name", - "nation"."n_regionkey" AS "n_regionkey" - FROM "nation" AS "nation" -) AS "nation" +JOIN "nation" AS "nation" ON "nation"."n_regionkey" = "region"."r_regionkey" JOIN "_e_0" AS "partsupp" ON "part"."p_partkey" = "partsupp"."ps_partkey" -JOIN ( - SELECT - "supplier"."s_suppkey" AS "s_suppkey", - "supplier"."s_name" AS "s_name", - "supplier"."s_address" AS "s_address", - "supplier"."s_nationkey" AS "s_nationkey", - "supplier"."s_phone" AS "s_phone", - "supplier"."s_acctbal" AS "s_acctbal", - "supplier"."s_comment" AS "s_comment" - FROM "supplier" AS "supplier" -) AS "supplier" +JOIN "supplier" AS "supplier" ON "supplier"."s_nationkey" = "nation"."n_nationkey" AND "supplier"."s_suppkey" = "partsupp"."ps_suppkey" WHERE - "partsupp"."ps_supplycost" = "_u_0"."_col_0" + "part"."p_size" = 15 + AND "part"."p_type" LIKE '%BRASS' + AND "partsupp"."ps_supplycost" = "_u_0"."_col_0" AND NOT "_u_0"."_u_1" IS NULL ORDER BY "s_acctbal" DESC, @@ -224,36 +190,15 @@ SELECT )) AS "revenue", CAST("orders"."o_orderdate" AS TEXT) AS "o_orderdate", "orders"."o_shippriority" AS "o_shippriority" -FROM ( - SELECT - "customer"."c_custkey" AS "c_custkey", - "customer"."c_mktsegment" AS "c_mktsegment" - FROM "customer" AS "customer" - WHERE - "customer"."c_mktsegment" = 'BUILDING' -) AS "customer" -JOIN ( - SELECT - "orders"."o_orderkey" AS "o_orderkey", - "orders"."o_custkey" AS "o_custkey", - "orders"."o_orderdate" AS "o_orderdate", - "orders"."o_shippriority" AS "o_shippriority" - FROM "orders" AS "orders" - WHERE - "orders"."o_orderdate" < '1995-03-15' -) AS "orders" +FROM "customer" AS "customer" +JOIN "orders" AS "orders" ON "customer"."c_custkey" = "orders"."o_custkey" -JOIN ( - SELECT - "lineitem"."l_orderkey" AS "l_orderkey", - "lineitem"."l_extendedprice" AS "l_extendedprice", - "lineitem"."l_discount" AS "l_discount", - "lineitem"."l_shipdate" AS "l_shipdate" - FROM "lineitem" AS "lineitem" - WHERE - "lineitem"."l_shipdate" > '1995-03-15' -) AS "lineitem" +JOIN "lineitem" AS "lineitem" ON "lineitem"."l_orderkey" = "orders"."o_orderkey" +WHERE + "customer"."c_mktsegment" = 'BUILDING' + AND "lineitem"."l_shipdate" > '1995-03-15' + AND "orders"."o_orderdate" < '1995-03-15' GROUP BY "lineitem"."l_orderkey", "orders"."o_orderdate", @@ -342,57 +287,22 @@ SELECT SUM("lineitem"."l_extendedprice" * ( 1 - "lineitem"."l_discount" )) AS "revenue" -FROM ( - SELECT - "customer"."c_custkey" AS "c_custkey", - "customer"."c_nationkey" AS "c_nationkey" - FROM "customer" AS "customer" -) AS "customer" -JOIN ( - SELECT - "orders"."o_orderkey" AS "o_orderkey", - "orders"."o_custkey" AS "o_custkey", - "orders"."o_orderdate" AS "o_orderdate" - FROM "orders" AS "orders" - WHERE - "orders"."o_orderdate" < CAST('1995-01-01' AS DATE) - AND "orders"."o_orderdate" >= CAST('1994-01-01' AS DATE) -) AS "orders" +FROM "customer" AS "customer" +JOIN "orders" AS "orders" ON "customer"."c_custkey" = "orders"."o_custkey" -CROSS JOIN ( - SELECT - "region"."r_regionkey" AS "r_regionkey", - "region"."r_name" AS "r_name" - FROM "region" AS "region" - WHERE - "region"."r_name" = 'ASIA' -) AS "region" -JOIN ( - SELECT - "nation"."n_nationkey" AS "n_nationkey", - "nation"."n_name" AS "n_name", - "nation"."n_regionkey" AS "n_regionkey" - FROM "nation" AS "nation" -) AS "nation" +CROSS JOIN "region" AS "region" +JOIN "nation" AS "nation" ON "nation"."n_regionkey" = "region"."r_regionkey" -JOIN ( - SELECT - "supplier"."s_suppkey" AS "s_suppkey", - "supplier"."s_nationkey" AS "s_nationkey" - FROM "supplier" AS "supplier" -) AS "supplier" +JOIN "supplier" AS "supplier" ON "customer"."c_nationkey" = "supplier"."s_nationkey" AND "supplier"."s_nationkey" = "nation"."n_nationkey" -JOIN ( - SELECT - "lineitem"."l_orderkey" AS "l_orderkey", - "lineitem"."l_suppkey" AS "l_suppkey", - "lineitem"."l_extendedprice" AS "l_extendedprice", - "lineitem"."l_discount" AS "l_discount" - FROM "lineitem" AS "lineitem" -) AS "lineitem" +JOIN "lineitem" AS "lineitem" ON "lineitem"."l_orderkey" = "orders"."o_orderkey" AND "lineitem"."l_suppkey" = "supplier"."s_suppkey" +WHERE + "orders"."o_orderdate" < CAST('1995-01-01' AS DATE) + AND "orders"."o_orderdate" >= CAST('1994-01-01' AS DATE) + AND "region"."r_name" = 'ASIA' GROUP BY "nation"."n_name" ORDER BY @@ -471,67 +381,37 @@ WITH "_e_0" AS ( OR "nation"."n_name" = 'GERMANY' ) SELECT - "shipping"."supp_nation" AS "supp_nation", - "shipping"."cust_nation" AS "cust_nation", - "shipping"."l_year" AS "l_year", - SUM("shipping"."volume") AS "revenue" -FROM ( - SELECT - "n1"."n_name" AS "supp_nation", - "n2"."n_name" AS "cust_nation", - EXTRACT(year FROM "lineitem"."l_shipdate") AS "l_year", - "lineitem"."l_extendedprice" * ( + "n1"."n_name" AS "supp_nation", + "n2"."n_name" AS "cust_nation", + EXTRACT(year FROM "lineitem"."l_shipdate") AS "l_year", + SUM("lineitem"."l_extendedprice" * ( 1 - "lineitem"."l_discount" - ) AS "volume" - FROM ( - SELECT - "supplier"."s_suppkey" AS "s_suppkey", - "supplier"."s_nationkey" AS "s_nationkey" - FROM "supplier" AS "supplier" - ) AS "supplier" - JOIN ( - SELECT - "lineitem"."l_orderkey" AS "l_orderkey", - "lineitem"."l_suppkey" AS "l_suppkey", - "lineitem"."l_extendedprice" AS "l_extendedprice", - "lineitem"."l_discount" AS "l_discount", - "lineitem"."l_shipdate" AS "l_shipdate" - FROM "lineitem" AS "lineitem" - WHERE - "lineitem"."l_shipdate" BETWEEN CAST('1995-01-01' AS DATE) AND CAST('1996-12-31' AS DATE) - ) AS "lineitem" - ON "supplier"."s_suppkey" = "lineitem"."l_suppkey" - JOIN ( - SELECT - "orders"."o_orderkey" AS "o_orderkey", - "orders"."o_custkey" AS "o_custkey" - FROM "orders" AS "orders" - ) AS "orders" - ON "orders"."o_orderkey" = "lineitem"."l_orderkey" - JOIN ( - SELECT - "customer"."c_custkey" AS "c_custkey", - "customer"."c_nationkey" AS "c_nationkey" - FROM "customer" AS "customer" - ) AS "customer" - ON "customer"."c_custkey" = "orders"."o_custkey" - JOIN "_e_0" AS "n1" - ON "supplier"."s_nationkey" = "n1"."n_nationkey" - JOIN "_e_0" AS "n2" - ON "customer"."c_nationkey" = "n2"."n_nationkey" - AND ( - "n1"."n_name" = 'FRANCE' - OR "n2"."n_name" = 'FRANCE' - ) - AND ( - "n1"."n_name" = 'GERMANY' - OR "n2"."n_name" = 'GERMANY' - ) -) AS "shipping" + )) AS "revenue" +FROM "supplier" AS "supplier" +JOIN "lineitem" AS "lineitem" + ON "supplier"."s_suppkey" = "lineitem"."l_suppkey" +JOIN "orders" AS "orders" + ON "orders"."o_orderkey" = "lineitem"."l_orderkey" +JOIN "customer" AS "customer" + ON "customer"."c_custkey" = "orders"."o_custkey" +JOIN "_e_0" AS "n1" + ON "supplier"."s_nationkey" = "n1"."n_nationkey" +JOIN "_e_0" AS "n2" + ON "customer"."c_nationkey" = "n2"."n_nationkey" + AND ( + "n1"."n_name" = 'FRANCE' + OR "n2"."n_name" = 'FRANCE' + ) + AND ( + "n1"."n_name" = 'GERMANY' + OR "n2"."n_name" = 'GERMANY' + ) +WHERE + "lineitem"."l_shipdate" BETWEEN CAST('1995-01-01' AS DATE) AND CAST('1996-12-31' AS DATE) GROUP BY - "shipping"."supp_nation", - "shipping"."cust_nation", - "shipping"."l_year" + "n1"."n_name", + "n2"."n_name", + EXTRACT(year FROM "lineitem"."l_shipdate") ORDER BY "supp_nation", "cust_nation", @@ -578,87 +458,37 @@ group by order by o_year; SELECT - "all_nations"."o_year" AS "o_year", + EXTRACT(year FROM "orders"."o_orderdate") AS "o_year", SUM(CASE - WHEN "all_nations"."nation" = 'BRAZIL' - THEN "all_nations"."volume" + WHEN "nation_2"."n_name" = 'BRAZIL' + THEN "lineitem"."l_extendedprice" * ( + 1 - "lineitem"."l_discount" + ) ELSE 0 - END) / SUM("all_nations"."volume") AS "mkt_share" -FROM ( - SELECT - EXTRACT(year FROM "orders"."o_orderdate") AS "o_year", - "lineitem"."l_extendedprice" * ( + END) / SUM("lineitem"."l_extendedprice" * ( 1 - "lineitem"."l_discount" - ) AS "volume", - "n2"."n_name" AS "nation" - FROM ( - SELECT - "part"."p_partkey" AS "p_partkey", - "part"."p_type" AS "p_type" - FROM "part" AS "part" - WHERE - "part"."p_type" = 'ECONOMY ANODIZED STEEL' - ) AS "part" - CROSS JOIN ( - SELECT - "region"."r_regionkey" AS "r_regionkey", - "region"."r_name" AS "r_name" - FROM "region" AS "region" - WHERE - "region"."r_name" = 'AMERICA' - ) AS "region" - JOIN ( - SELECT - "nation"."n_nationkey" AS "n_nationkey", - "nation"."n_regionkey" AS "n_regionkey" - FROM "nation" AS "nation" - ) AS "n1" - ON "n1"."n_regionkey" = "region"."r_regionkey" - JOIN ( - SELECT - "customer"."c_custkey" AS "c_custkey", - "customer"."c_nationkey" AS "c_nationkey" - FROM "customer" AS "customer" - ) AS "customer" - ON "customer"."c_nationkey" = "n1"."n_nationkey" - JOIN ( - SELECT - "orders"."o_orderkey" AS "o_orderkey", - "orders"."o_custkey" AS "o_custkey", - "orders"."o_orderdate" AS "o_orderdate" - FROM "orders" AS "orders" - WHERE - "orders"."o_orderdate" BETWEEN CAST('1995-01-01' AS DATE) AND CAST('1996-12-31' AS DATE) - ) AS "orders" - ON "orders"."o_custkey" = "customer"."c_custkey" - JOIN ( - SELECT - "lineitem"."l_orderkey" AS "l_orderkey", - "lineitem"."l_partkey" AS "l_partkey", - "lineitem"."l_suppkey" AS "l_suppkey", - "lineitem"."l_extendedprice" AS "l_extendedprice", - "lineitem"."l_discount" AS "l_discount" - FROM "lineitem" AS "lineitem" - ) AS "lineitem" - ON "lineitem"."l_orderkey" = "orders"."o_orderkey" - AND "part"."p_partkey" = "lineitem"."l_partkey" - JOIN ( - SELECT - "supplier"."s_suppkey" AS "s_suppkey", - "supplier"."s_nationkey" AS "s_nationkey" - FROM "supplier" AS "supplier" - ) AS "supplier" - ON "supplier"."s_suppkey" = "lineitem"."l_suppkey" - JOIN ( - SELECT - "nation"."n_nationkey" AS "n_nationkey", - "nation"."n_name" AS "n_name" - FROM "nation" AS "nation" - ) AS "n2" - ON "supplier"."s_nationkey" = "n2"."n_nationkey" -) AS "all_nations" + )) AS "mkt_share" +FROM "part" AS "part" +CROSS JOIN "region" AS "region" +JOIN "nation" AS "nation" + ON "nation"."n_regionkey" = "region"."r_regionkey" +JOIN "customer" AS "customer" + ON "customer"."c_nationkey" = "nation"."n_nationkey" +JOIN "orders" AS "orders" + ON "orders"."o_custkey" = "customer"."c_custkey" +JOIN "lineitem" AS "lineitem" + ON "lineitem"."l_orderkey" = "orders"."o_orderkey" + AND "part"."p_partkey" = "lineitem"."l_partkey" +JOIN "supplier" AS "supplier" + ON "supplier"."s_suppkey" = "lineitem"."l_suppkey" +JOIN "nation" AS "nation_2" + ON "supplier"."s_nationkey" = "nation_2"."n_nationkey" +WHERE + "orders"."o_orderdate" BETWEEN CAST('1995-01-01' AS DATE) AND CAST('1996-12-31' AS DATE) + AND "part"."p_type" = 'ECONOMY ANODIZED STEEL' + AND "region"."r_name" = 'AMERICA' GROUP BY - "all_nations"."o_year" + EXTRACT(year FROM "orders"."o_orderdate") ORDER BY "o_year"; @@ -698,69 +528,28 @@ order by nation, o_year desc; SELECT - "profit"."nation" AS "nation", - "profit"."o_year" AS "o_year", - SUM("profit"."amount") AS "sum_profit" -FROM ( - SELECT - "nation"."n_name" AS "nation", - EXTRACT(year FROM "orders"."o_orderdate") AS "o_year", - "lineitem"."l_extendedprice" * ( + "nation"."n_name" AS "nation", + EXTRACT(year FROM "orders"."o_orderdate") AS "o_year", + SUM("lineitem"."l_extendedprice" * ( 1 - "lineitem"."l_discount" - ) - "partsupp"."ps_supplycost" * "lineitem"."l_quantity" AS "amount" - FROM ( - SELECT - "part"."p_partkey" AS "p_partkey", - "part"."p_name" AS "p_name" - FROM "part" AS "part" - WHERE - "part"."p_name" LIKE '%green%' - ) AS "part" - JOIN ( - SELECT - "lineitem"."l_orderkey" AS "l_orderkey", - "lineitem"."l_partkey" AS "l_partkey", - "lineitem"."l_suppkey" AS "l_suppkey", - "lineitem"."l_quantity" AS "l_quantity", - "lineitem"."l_extendedprice" AS "l_extendedprice", - "lineitem"."l_discount" AS "l_discount" - FROM "lineitem" AS "lineitem" - ) AS "lineitem" - ON "part"."p_partkey" = "lineitem"."l_partkey" - JOIN ( - SELECT - "supplier"."s_suppkey" AS "s_suppkey", - "supplier"."s_nationkey" AS "s_nationkey" - FROM "supplier" AS "supplier" - ) AS "supplier" - ON "supplier"."s_suppkey" = "lineitem"."l_suppkey" - JOIN ( - SELECT - "partsupp"."ps_partkey" AS "ps_partkey", - "partsupp"."ps_suppkey" AS "ps_suppkey", - "partsupp"."ps_supplycost" AS "ps_supplycost" - FROM "partsupp" AS "partsupp" - ) AS "partsupp" - ON "partsupp"."ps_partkey" = "lineitem"."l_partkey" - AND "partsupp"."ps_suppkey" = "lineitem"."l_suppkey" - JOIN ( - SELECT - "orders"."o_orderkey" AS "o_orderkey", - "orders"."o_orderdate" AS "o_orderdate" - FROM "orders" AS "orders" - ) AS "orders" - ON "orders"."o_orderkey" = "lineitem"."l_orderkey" - JOIN ( - SELECT - "nation"."n_nationkey" AS "n_nationkey", - "nation"."n_name" AS "n_name" - FROM "nation" AS "nation" - ) AS "nation" - ON "supplier"."s_nationkey" = "nation"."n_nationkey" -) AS "profit" + ) - "partsupp"."ps_supplycost" * "lineitem"."l_quantity") AS "sum_profit" +FROM "part" AS "part" +JOIN "lineitem" AS "lineitem" + ON "part"."p_partkey" = "lineitem"."l_partkey" +JOIN "supplier" AS "supplier" + ON "supplier"."s_suppkey" = "lineitem"."l_suppkey" +JOIN "partsupp" AS "partsupp" + ON "partsupp"."ps_partkey" = "lineitem"."l_partkey" + AND "partsupp"."ps_suppkey" = "lineitem"."l_suppkey" +JOIN "orders" AS "orders" + ON "orders"."o_orderkey" = "lineitem"."l_orderkey" +JOIN "nation" AS "nation" + ON "supplier"."s_nationkey" = "nation"."n_nationkey" +WHERE + "part"."p_name" LIKE '%green%' GROUP BY - "profit"."nation", - "profit"."o_year" + "nation"."n_name", + EXTRACT(year FROM "orders"."o_orderdate") ORDER BY "nation", "o_year" DESC; @@ -812,46 +601,17 @@ SELECT "customer"."c_address" AS "c_address", "customer"."c_phone" AS "c_phone", "customer"."c_comment" AS "c_comment" -FROM ( - SELECT - "customer"."c_custkey" AS "c_custkey", - "customer"."c_name" AS "c_name", - "customer"."c_address" AS "c_address", - "customer"."c_nationkey" AS "c_nationkey", - "customer"."c_phone" AS "c_phone", - "customer"."c_acctbal" AS "c_acctbal", - "customer"."c_comment" AS "c_comment" - FROM "customer" AS "customer" -) AS "customer" -JOIN ( - SELECT - "orders"."o_orderkey" AS "o_orderkey", - "orders"."o_custkey" AS "o_custkey", - "orders"."o_orderdate" AS "o_orderdate" - FROM "orders" AS "orders" - WHERE - "orders"."o_orderdate" < CAST('1994-01-01' AS DATE) - AND "orders"."o_orderdate" >= CAST('1993-10-01' AS DATE) -) AS "orders" +FROM "customer" AS "customer" +JOIN "orders" AS "orders" ON "customer"."c_custkey" = "orders"."o_custkey" -JOIN ( - SELECT - "lineitem"."l_orderkey" AS "l_orderkey", - "lineitem"."l_extendedprice" AS "l_extendedprice", - "lineitem"."l_discount" AS "l_discount", - "lineitem"."l_returnflag" AS "l_returnflag" - FROM "lineitem" AS "lineitem" - WHERE - "lineitem"."l_returnflag" = 'R' -) AS "lineitem" +JOIN "lineitem" AS "lineitem" ON "lineitem"."l_orderkey" = "orders"."o_orderkey" -JOIN ( - SELECT - "nation"."n_nationkey" AS "n_nationkey", - "nation"."n_name" AS "n_name" - FROM "nation" AS "nation" -) AS "nation" +JOIN "nation" AS "nation" ON "customer"."c_nationkey" = "nation"."n_nationkey" +WHERE + "lineitem"."l_returnflag" = 'R' + AND "orders"."o_orderdate" < CAST('1994-01-01' AS DATE) + AND "orders"."o_orderdate" >= CAST('1993-10-01' AS DATE) GROUP BY "customer"."c_custkey", "customer"."c_name", @@ -910,14 +670,7 @@ WITH "_e_0" AS ( SELECT "partsupp"."ps_partkey" AS "ps_partkey", SUM("partsupp"."ps_supplycost" * "partsupp"."ps_availqty") AS "value" -FROM ( - SELECT - "partsupp"."ps_partkey" AS "ps_partkey", - "partsupp"."ps_suppkey" AS "ps_suppkey", - "partsupp"."ps_availqty" AS "ps_availqty", - "partsupp"."ps_supplycost" AS "ps_supplycost" - FROM "partsupp" AS "partsupp" -) AS "partsupp" +FROM "partsupp" AS "partsupp" JOIN "_e_0" AS "supplier" ON "partsupp"."ps_suppkey" = "supplier"."s_suppkey" JOIN "_e_1" AS "nation" @@ -928,13 +681,7 @@ HAVING SUM("partsupp"."ps_supplycost" * "partsupp"."ps_availqty") > ( SELECT SUM("partsupp"."ps_supplycost" * "partsupp"."ps_availqty") * 0.0001 AS "_col_0" - FROM ( - SELECT - "partsupp"."ps_suppkey" AS "ps_suppkey", - "partsupp"."ps_availqty" AS "ps_availqty", - "partsupp"."ps_supplycost" AS "ps_supplycost" - FROM "partsupp" AS "partsupp" - ) AS "partsupp" + FROM "partsupp" AS "partsupp" JOIN "_e_0" AS "supplier" ON "partsupp"."ps_suppkey" = "supplier"."s_suppkey" JOIN "_e_1" AS "nation" @@ -988,28 +735,15 @@ SELECT THEN 1 ELSE 0 END) AS "low_line_count" -FROM ( - SELECT - "orders"."o_orderkey" AS "o_orderkey", - "orders"."o_orderpriority" AS "o_orderpriority" - FROM "orders" AS "orders" -) AS "orders" -JOIN ( - SELECT - "lineitem"."l_orderkey" AS "l_orderkey", - "lineitem"."l_shipdate" AS "l_shipdate", - "lineitem"."l_commitdate" AS "l_commitdate", - "lineitem"."l_receiptdate" AS "l_receiptdate", - "lineitem"."l_shipmode" AS "l_shipmode" - FROM "lineitem" AS "lineitem" - WHERE - "lineitem"."l_commitdate" < "lineitem"."l_receiptdate" - AND "lineitem"."l_receiptdate" < CAST('1995-01-01' AS DATE) - AND "lineitem"."l_receiptdate" >= CAST('1994-01-01' AS DATE) - AND "lineitem"."l_shipdate" < "lineitem"."l_commitdate" - AND "lineitem"."l_shipmode" IN ('MAIL', 'SHIP') -) AS "lineitem" +FROM "orders" AS "orders" +JOIN "lineitem" AS "lineitem" ON "orders"."o_orderkey" = "lineitem"."l_orderkey" +WHERE + "lineitem"."l_commitdate" < "lineitem"."l_receiptdate" + AND "lineitem"."l_receiptdate" < CAST('1995-01-01' AS DATE) + AND "lineitem"."l_receiptdate" >= CAST('1994-01-01' AS DATE) + AND "lineitem"."l_shipdate" < "lineitem"."l_commitdate" + AND "lineitem"."l_shipmode" IN ('MAIL', 'SHIP') GROUP BY "lineitem"."l_shipmode" ORDER BY @@ -1044,21 +778,10 @@ SELECT FROM ( SELECT COUNT("orders"."o_orderkey") AS "c_count" - FROM ( - SELECT - "customer"."c_custkey" AS "c_custkey" - FROM "customer" AS "customer" - ) AS "customer" - LEFT JOIN ( - SELECT - "orders"."o_orderkey" AS "o_orderkey", - "orders"."o_custkey" AS "o_custkey", - "orders"."o_comment" AS "o_comment" - FROM "orders" AS "orders" - WHERE - NOT "orders"."o_comment" LIKE '%special%requests%' - ) AS "orders" + FROM "customer" AS "customer" + LEFT JOIN "orders" AS "orders" ON "customer"."c_custkey" = "orders"."o_custkey" + AND NOT "orders"."o_comment" LIKE '%special%requests%' GROUP BY "customer"."c_custkey" ) AS "c_orders" @@ -1094,24 +817,12 @@ SELECT END) / SUM("lineitem"."l_extendedprice" * ( 1 - "lineitem"."l_discount" )) AS "promo_revenue" -FROM ( - SELECT - "lineitem"."l_partkey" AS "l_partkey", - "lineitem"."l_extendedprice" AS "l_extendedprice", - "lineitem"."l_discount" AS "l_discount", - "lineitem"."l_shipdate" AS "l_shipdate" - FROM "lineitem" AS "lineitem" - WHERE - "lineitem"."l_shipdate" < CAST('1995-10-01' AS DATE) - AND "lineitem"."l_shipdate" >= CAST('1995-09-01' AS DATE) -) AS "lineitem" -JOIN ( - SELECT - "part"."p_partkey" AS "p_partkey", - "part"."p_type" AS "p_type" - FROM "part" AS "part" -) AS "part" - ON "lineitem"."l_partkey" = "part"."p_partkey"; +FROM "lineitem" AS "lineitem" +JOIN "part" AS "part" + ON "lineitem"."l_partkey" = "part"."p_partkey" +WHERE + "lineitem"."l_shipdate" < CAST('1995-10-01' AS DATE) + AND "lineitem"."l_shipdate" >= CAST('1995-09-01' AS DATE); -------------------------------------- -- TPC-H 15 @@ -1165,14 +876,7 @@ SELECT "supplier"."s_address" AS "s_address", "supplier"."s_phone" AS "s_phone", "revenue"."total_revenue" AS "total_revenue" -FROM ( - SELECT - "supplier"."s_suppkey" AS "s_suppkey", - "supplier"."s_name" AS "s_name", - "supplier"."s_address" AS "s_address", - "supplier"."s_phone" AS "s_phone" - FROM "supplier" AS "supplier" -) AS "supplier" +FROM "supplier" AS "supplier" JOIN "revenue" ON "revenue"."total_revenue" = ( SELECT @@ -1221,12 +925,7 @@ SELECT "part"."p_type" AS "p_type", "part"."p_size" AS "p_size", COUNT(DISTINCT "partsupp"."ps_suppkey") AS "supplier_cnt" -FROM ( - SELECT - "partsupp"."ps_partkey" AS "ps_partkey", - "partsupp"."ps_suppkey" AS "ps_suppkey" - FROM "partsupp" AS "partsupp" -) AS "partsupp" +FROM "partsupp" AS "partsupp" LEFT JOIN ( SELECT "supplier"."s_suppkey" AS "s_suppkey" @@ -1237,21 +936,13 @@ LEFT JOIN ( "supplier"."s_suppkey" ) AS "_u_0" ON "partsupp"."ps_suppkey" = "_u_0"."s_suppkey" -JOIN ( - SELECT - "part"."p_partkey" AS "p_partkey", - "part"."p_brand" AS "p_brand", - "part"."p_type" AS "p_type", - "part"."p_size" AS "p_size" - FROM "part" AS "part" - WHERE - "part"."p_brand" <> 'Brand#45' - AND "part"."p_size" IN (49, 14, 23, 45, 19, 3, 36, 9) - AND NOT "part"."p_type" LIKE 'MEDIUM POLISHED%' -) AS "part" +JOIN "part" AS "part" ON "part"."p_partkey" = "partsupp"."ps_partkey" WHERE "_u_0"."s_suppkey" IS NULL + AND "part"."p_brand" <> 'Brand#45' + AND "part"."p_size" IN (49, 14, 23, 45, 19, 3, 36, 9) + AND NOT "part"."p_type" LIKE 'MEDIUM POLISHED%' GROUP BY "part"."p_brand", "part"."p_type", @@ -1284,23 +975,8 @@ where ); SELECT SUM("lineitem"."l_extendedprice") / 7.0 AS "avg_yearly" -FROM ( - SELECT - "lineitem"."l_partkey" AS "l_partkey", - "lineitem"."l_quantity" AS "l_quantity", - "lineitem"."l_extendedprice" AS "l_extendedprice" - FROM "lineitem" AS "lineitem" -) AS "lineitem" -JOIN ( - SELECT - "part"."p_partkey" AS "p_partkey", - "part"."p_brand" AS "p_brand", - "part"."p_container" AS "p_container" - FROM "part" AS "part" - WHERE - "part"."p_brand" = 'Brand#23' - AND "part"."p_container" = 'MED BOX' -) AS "part" +FROM "lineitem" AS "lineitem" +JOIN "part" AS "part" ON "part"."p_partkey" = "lineitem"."l_partkey" LEFT JOIN ( SELECT @@ -1313,6 +989,8 @@ LEFT JOIN ( ON "_u_0"."_u_1" = "part"."p_partkey" WHERE "lineitem"."l_quantity" < "_u_0"."_col_0" + AND "part"."p_brand" = 'Brand#23' + AND "part"."p_container" = 'MED BOX' AND NOT "_u_0"."_u_1" IS NULL; -------------------------------------- @@ -1359,20 +1037,8 @@ SELECT "orders"."o_orderdate" AS "o_orderdate", "orders"."o_totalprice" AS "o_totalprice", SUM("lineitem"."l_quantity") AS "_col_5" -FROM ( - SELECT - "customer"."c_custkey" AS "c_custkey", - "customer"."c_name" AS "c_name" - FROM "customer" AS "customer" -) AS "customer" -JOIN ( - SELECT - "orders"."o_orderkey" AS "o_orderkey", - "orders"."o_custkey" AS "o_custkey", - "orders"."o_totalprice" AS "o_totalprice", - "orders"."o_orderdate" AS "o_orderdate" - FROM "orders" AS "orders" -) AS "orders" +FROM "customer" AS "customer" +JOIN "orders" AS "orders" ON "customer"."c_custkey" = "orders"."o_custkey" LEFT JOIN ( SELECT @@ -1385,12 +1051,7 @@ LEFT JOIN ( SUM("lineitem"."l_quantity") > 300 ) AS "_u_0" ON "orders"."o_orderkey" = "_u_0"."l_orderkey" -JOIN ( - SELECT - "lineitem"."l_orderkey" AS "l_orderkey", - "lineitem"."l_quantity" AS "l_quantity" - FROM "lineitem" AS "lineitem" -) AS "lineitem" +JOIN "lineitem" AS "lineitem" ON "orders"."o_orderkey" = "lineitem"."l_orderkey" WHERE NOT "_u_0"."l_orderkey" IS NULL @@ -1447,24 +1108,8 @@ SELECT SUM("lineitem"."l_extendedprice" * ( 1 - "lineitem"."l_discount" )) AS "revenue" -FROM ( - SELECT - "lineitem"."l_partkey" AS "l_partkey", - "lineitem"."l_quantity" AS "l_quantity", - "lineitem"."l_extendedprice" AS "l_extendedprice", - "lineitem"."l_discount" AS "l_discount", - "lineitem"."l_shipinstruct" AS "l_shipinstruct", - "lineitem"."l_shipmode" AS "l_shipmode" - FROM "lineitem" AS "lineitem" -) AS "lineitem" -JOIN ( - SELECT - "part"."p_partkey" AS "p_partkey", - "part"."p_brand" AS "p_brand", - "part"."p_size" AS "p_size", - "part"."p_container" AS "p_container" - FROM "part" AS "part" -) AS "part" +FROM "lineitem" AS "lineitem" +JOIN "part" AS "part" ON ( "part"."p_brand" = 'Brand#12' AND "part"."p_container" IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') @@ -1558,14 +1203,7 @@ order by SELECT "supplier"."s_name" AS "s_name", "supplier"."s_address" AS "s_address" -FROM ( - SELECT - "supplier"."s_suppkey" AS "s_suppkey", - "supplier"."s_name" AS "s_name", - "supplier"."s_address" AS "s_address", - "supplier"."s_nationkey" AS "s_nationkey" - FROM "supplier" AS "supplier" -) AS "supplier" +FROM "supplier" AS "supplier" LEFT JOIN ( SELECT "partsupp"."ps_suppkey" AS "ps_suppkey" @@ -1604,17 +1242,11 @@ LEFT JOIN ( "partsupp"."ps_suppkey" ) AS "_u_4" ON "supplier"."s_suppkey" = "_u_4"."ps_suppkey" -JOIN ( - SELECT - "nation"."n_nationkey" AS "n_nationkey", - "nation"."n_name" AS "n_name" - FROM "nation" AS "nation" - WHERE - "nation"."n_name" = 'CANADA' -) AS "nation" +JOIN "nation" AS "nation" ON "supplier"."s_nationkey" = "nation"."n_nationkey" WHERE - NOT "_u_4"."ps_suppkey" IS NULL + "nation"."n_name" = 'CANADA' + AND NOT "_u_4"."ps_suppkey" IS NULL ORDER BY "s_name"; @@ -1665,24 +1297,9 @@ limit SELECT "supplier"."s_name" AS "s_name", COUNT(*) AS "numwait" -FROM ( - SELECT - "supplier"."s_suppkey" AS "s_suppkey", - "supplier"."s_name" AS "s_name", - "supplier"."s_nationkey" AS "s_nationkey" - FROM "supplier" AS "supplier" -) AS "supplier" -JOIN ( - SELECT - "lineitem"."l_orderkey" AS "l_orderkey", - "lineitem"."l_suppkey" AS "l_suppkey", - "lineitem"."l_commitdate" AS "l_commitdate", - "lineitem"."l_receiptdate" AS "l_receiptdate" - FROM "lineitem" AS "lineitem" - WHERE - "lineitem"."l_receiptdate" > "lineitem"."l_commitdate" -) AS "l1" - ON "supplier"."s_suppkey" = "l1"."l_suppkey" +FROM "supplier" AS "supplier" +JOIN "lineitem" AS "lineitem" + ON "supplier"."s_suppkey" = "lineitem"."l_suppkey" LEFT JOIN ( SELECT "l2"."l_orderkey" AS "l_orderkey", @@ -1691,7 +1308,7 @@ LEFT JOIN ( GROUP BY "l2"."l_orderkey" ) AS "_u_0" - ON "_u_0"."l_orderkey" = "l1"."l_orderkey" + ON "_u_0"."l_orderkey" = "lineitem"."l_orderkey" LEFT JOIN ( SELECT "l3"."l_orderkey" AS "l_orderkey", @@ -1702,31 +1319,20 @@ LEFT JOIN ( GROUP BY "l3"."l_orderkey" ) AS "_u_2" - ON "_u_2"."l_orderkey" = "l1"."l_orderkey" -JOIN ( - SELECT - "orders"."o_orderkey" AS "o_orderkey", - "orders"."o_orderstatus" AS "o_orderstatus" - FROM "orders" AS "orders" - WHERE - "orders"."o_orderstatus" = 'F' -) AS "orders" - ON "orders"."o_orderkey" = "l1"."l_orderkey" -JOIN ( - SELECT - "nation"."n_nationkey" AS "n_nationkey", - "nation"."n_name" AS "n_name" - FROM "nation" AS "nation" - WHERE - "nation"."n_name" = 'SAUDI ARABIA' -) AS "nation" + ON "_u_2"."l_orderkey" = "lineitem"."l_orderkey" +JOIN "orders" AS "orders" + ON "orders"."o_orderkey" = "lineitem"."l_orderkey" +JOIN "nation" AS "nation" ON "supplier"."s_nationkey" = "nation"."n_nationkey" WHERE ( "_u_2"."l_orderkey" IS NULL - OR NOT ARRAY_ANY("_u_2"."_u_3", "_x" -> "_x" <> "l1"."l_suppkey") + OR NOT ARRAY_ANY("_u_2"."_u_3", "_x" -> "_x" <> "lineitem"."l_suppkey") ) - AND ARRAY_ANY("_u_0"."_u_1", "_x" -> "_x" <> "l1"."l_suppkey") + AND "lineitem"."l_receiptdate" > "lineitem"."l_commitdate" + AND "nation"."n_name" = 'SAUDI ARABIA' + AND "orders"."o_orderstatus" = 'F' + AND ARRAY_ANY("_u_0"."_u_1", "_x" -> "_x" <> "lineitem"."l_suppkey") AND NOT "_u_0"."l_orderkey" IS NULL GROUP BY "supplier"."s_name" @@ -1776,35 +1382,30 @@ group by order by cntrycode; SELECT - "custsale"."cntrycode" AS "cntrycode", + SUBSTRING("customer"."c_phone", 1, 2) AS "cntrycode", COUNT(*) AS "numcust", - SUM("custsale"."c_acctbal") AS "totacctbal" -FROM ( + SUM("customer"."c_acctbal") AS "totacctbal" +FROM "customer" AS "customer" +LEFT JOIN ( SELECT - SUBSTRING("customer"."c_phone", 1, 2) AS "cntrycode", - "customer"."c_acctbal" AS "c_acctbal" - FROM "customer" AS "customer" - LEFT JOIN ( + "orders"."o_custkey" AS "_u_1" + FROM "orders" AS "orders" + GROUP BY + "orders"."o_custkey" +) AS "_u_0" + ON "_u_0"."_u_1" = "customer"."c_custkey" +WHERE + "_u_0"."_u_1" IS NULL + AND "customer"."c_acctbal" > ( SELECT - "orders"."o_custkey" AS "_u_1" - FROM "orders" AS "orders" - GROUP BY - "orders"."o_custkey" - ) AS "_u_0" - ON "_u_0"."_u_1" = "customer"."c_custkey" - WHERE - "_u_0"."_u_1" IS NULL - AND "customer"."c_acctbal" > ( - SELECT - AVG("customer"."c_acctbal") AS "_col_0" - FROM "customer" AS "customer" - WHERE - "customer"."c_acctbal" > 0.00 - AND SUBSTRING("customer"."c_phone", 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') - ) - AND SUBSTRING("customer"."c_phone", 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') -) AS "custsale" + AVG("customer"."c_acctbal") AS "_col_0" + FROM "customer" AS "customer" + WHERE + "customer"."c_acctbal" > 0.00 + AND SUBSTRING("customer"."c_phone", 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') + ) + AND SUBSTRING("customer"."c_phone", 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') GROUP BY - "custsale"."cntrycode" + SUBSTRING("customer"."c_phone", 1, 2) ORDER BY "cntrycode"; diff --git a/tests/helpers.py b/tests/helpers.py index d4edb14..ad50483 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -5,9 +5,7 @@ FIXTURES_DIR = os.path.join(FILE_DIR, "fixtures") def _filter_comments(s): - return "\n".join( - [line for line in s.splitlines() if line and not line.startswith("--")] - ) + return "\n".join([line for line in s.splitlines() if line and not line.startswith("--")]) def _extract_meta(sql): @@ -23,9 +21,7 @@ def _extract_meta(sql): def assert_logger_contains(message, logger, level="error"): - output = "\n".join( - str(args[0][0]) for args in getattr(logger, level).call_args_list - ) + output = "\n".join(str(args[0][0]) for args in getattr(logger, level).call_args_list) assert message in output diff --git a/tests/test_build.py b/tests/test_build.py index a4cffde..18c0e47 100644 --- a/tests/test_build.py +++ b/tests/test_build.py @@ -46,10 +46,7 @@ class TestBuild(unittest.TestCase): "SELECT x FROM tbl WHERE FALSE", ), ( - lambda: select("x") - .from_("tbl") - .where("x > 0") - .where("x < 9", append=False), + lambda: select("x").from_("tbl").where("x > 0").where("x < 9", append=False), "SELECT x FROM tbl WHERE x < 9", ), ( @@ -61,10 +58,7 @@ class TestBuild(unittest.TestCase): "SELECT x, y FROM tbl GROUP BY x, y", ), ( - lambda: select("x", "y", "z", "a") - .from_("tbl") - .group_by("x, y", "z") - .group_by("a"), + lambda: select("x", "y", "z", "a").from_("tbl").group_by("x, y", "z").group_by("a"), "SELECT x, y, z, a FROM tbl GROUP BY x, y, z, a", ), ( @@ -85,9 +79,7 @@ class TestBuild(unittest.TestCase): "SELECT x FROM tbl JOIN tbl2 ON tbl.y = tbl2.y", ), ( - lambda: select("x") - .from_("tbl") - .join("tbl2", on=["tbl.y = tbl2.y", "a = b"]), + lambda: select("x").from_("tbl").join("tbl2", on=["tbl.y = tbl2.y", "a = b"]), "SELECT x FROM tbl JOIN tbl2 ON tbl.y = tbl2.y AND a = b", ), ( @@ -95,21 +87,15 @@ class TestBuild(unittest.TestCase): "SELECT x FROM tbl LEFT OUTER JOIN tbl2", ), ( - lambda: select("x") - .from_("tbl") - .join(exp.Table(this="tbl2"), join_type="left outer"), + lambda: select("x").from_("tbl").join(exp.Table(this="tbl2"), join_type="left outer"), "SELECT x FROM tbl LEFT OUTER JOIN tbl2", ), ( - lambda: select("x") - .from_("tbl") - .join(exp.Table(this="tbl2"), join_type="left outer", join_alias="foo"), + lambda: select("x").from_("tbl").join(exp.Table(this="tbl2"), join_type="left outer", join_alias="foo"), "SELECT x FROM tbl LEFT OUTER JOIN tbl2 AS foo", ), ( - lambda: select("x") - .from_("tbl") - .join(select("y").from_("tbl2"), join_type="left outer"), + lambda: select("x").from_("tbl").join(select("y").from_("tbl2"), join_type="left outer"), "SELECT x FROM tbl LEFT OUTER JOIN (SELECT y FROM tbl2)", ), ( @@ -132,9 +118,7 @@ class TestBuild(unittest.TestCase): "SELECT x FROM tbl LEFT OUTER JOIN (SELECT y FROM tbl2) AS aliased", ), ( - lambda: select("x") - .from_("tbl") - .join(parse_one("left join x", into=exp.Join), on="a=b"), + lambda: select("x").from_("tbl").join(parse_one("left join x", into=exp.Join), on="a=b"), "SELECT x FROM tbl LEFT JOIN x ON a = b", ), ( @@ -142,9 +126,7 @@ class TestBuild(unittest.TestCase): "SELECT x FROM tbl LEFT JOIN x ON a = b", ), ( - lambda: select("x") - .from_("tbl") - .join("select b from tbl2", on="a=b", join_type="left"), + lambda: select("x").from_("tbl").join("select b from tbl2", on="a=b", join_type="left"), "SELECT x FROM tbl LEFT JOIN (SELECT b FROM tbl2) ON a = b", ), ( @@ -159,10 +141,7 @@ class TestBuild(unittest.TestCase): "SELECT x FROM tbl LEFT JOIN (SELECT b FROM tbl2) AS aliased ON a = b", ), ( - lambda: select("x", "COUNT(y)") - .from_("tbl") - .group_by("x") - .having("COUNT(y) > 0"), + lambda: select("x", "COUNT(y)").from_("tbl").group_by("x").having("COUNT(y) > 0"), "SELECT x, COUNT(y) FROM tbl GROUP BY x HAVING COUNT(y) > 0", ), ( @@ -190,24 +169,15 @@ class TestBuild(unittest.TestCase): "SELECT x FROM tbl SORT BY x, y DESC", ), ( - lambda: select("x", "y", "z", "a") - .from_("tbl") - .order_by("x, y", "z") - .order_by("a"), + lambda: select("x", "y", "z", "a").from_("tbl").order_by("x, y", "z").order_by("a"), "SELECT x, y, z, a FROM tbl ORDER BY x, y, z, a", ), ( - lambda: select("x", "y", "z", "a") - .from_("tbl") - .cluster_by("x, y", "z") - .cluster_by("a"), + lambda: select("x", "y", "z", "a").from_("tbl").cluster_by("x, y", "z").cluster_by("a"), "SELECT x, y, z, a FROM tbl CLUSTER BY x, y, z, a", ), ( - lambda: select("x", "y", "z", "a") - .from_("tbl") - .sort_by("x, y", "z") - .sort_by("a"), + lambda: select("x", "y", "z", "a").from_("tbl").sort_by("x, y", "z").sort_by("a"), "SELECT x, y, z, a FROM tbl SORT BY x, y, z, a", ), (lambda: select("x").from_("tbl").limit(10), "SELECT x FROM tbl LIMIT 10"), @@ -220,21 +190,15 @@ class TestBuild(unittest.TestCase): "WITH tbl AS (SELECT x FROM tbl2) SELECT x FROM tbl", ), ( - lambda: select("x") - .from_("tbl") - .with_("tbl", as_="SELECT x FROM tbl2", recursive=True), + lambda: select("x").from_("tbl").with_("tbl", as_="SELECT x FROM tbl2", recursive=True), "WITH RECURSIVE tbl AS (SELECT x FROM tbl2) SELECT x FROM tbl", ), ( - lambda: select("x") - .from_("tbl") - .with_("tbl", as_=select("x").from_("tbl2")), + lambda: select("x").from_("tbl").with_("tbl", as_=select("x").from_("tbl2")), "WITH tbl AS (SELECT x FROM tbl2) SELECT x FROM tbl", ), ( - lambda: select("x") - .from_("tbl") - .with_("tbl (x, y)", as_=select("x", "y").from_("tbl2")), + lambda: select("x").from_("tbl").with_("tbl (x, y)", as_=select("x", "y").from_("tbl2")), "WITH tbl(x, y) AS (SELECT x, y FROM tbl2) SELECT x FROM tbl", ), ( @@ -245,72 +209,43 @@ class TestBuild(unittest.TestCase): "WITH tbl AS (SELECT x FROM tbl2), tbl2 AS (SELECT x FROM tbl3) SELECT x FROM tbl", ), ( - lambda: select("x") - .from_("tbl") - .with_("tbl", as_=select("x", "y").from_("tbl2")) - .select("y"), + lambda: select("x").from_("tbl").with_("tbl", as_=select("x", "y").from_("tbl2")).select("y"), "WITH tbl AS (SELECT x, y FROM tbl2) SELECT x, y FROM tbl", ), ( - lambda: select("x") - .with_("tbl", as_=select("x").from_("tbl2")) - .from_("tbl"), + lambda: select("x").with_("tbl", as_=select("x").from_("tbl2")).from_("tbl"), "WITH tbl AS (SELECT x FROM tbl2) SELECT x FROM tbl", ), ( - lambda: select("x") - .with_("tbl", as_=select("x").from_("tbl2")) - .from_("tbl") - .group_by("x"), + lambda: select("x").with_("tbl", as_=select("x").from_("tbl2")).from_("tbl").group_by("x"), "WITH tbl AS (SELECT x FROM tbl2) SELECT x FROM tbl GROUP BY x", ), ( - lambda: select("x") - .with_("tbl", as_=select("x").from_("tbl2")) - .from_("tbl") - .order_by("x"), + lambda: select("x").with_("tbl", as_=select("x").from_("tbl2")).from_("tbl").order_by("x"), "WITH tbl AS (SELECT x FROM tbl2) SELECT x FROM tbl ORDER BY x", ), ( - lambda: select("x") - .with_("tbl", as_=select("x").from_("tbl2")) - .from_("tbl") - .limit(10), + lambda: select("x").with_("tbl", as_=select("x").from_("tbl2")).from_("tbl").limit(10), "WITH tbl AS (SELECT x FROM tbl2) SELECT x FROM tbl LIMIT 10", ), ( - lambda: select("x") - .with_("tbl", as_=select("x").from_("tbl2")) - .from_("tbl") - .offset(10), + lambda: select("x").with_("tbl", as_=select("x").from_("tbl2")).from_("tbl").offset(10), "WITH tbl AS (SELECT x FROM tbl2) SELECT x FROM tbl OFFSET 10", ), ( - lambda: select("x") - .with_("tbl", as_=select("x").from_("tbl2")) - .from_("tbl") - .join("tbl3"), + lambda: select("x").with_("tbl", as_=select("x").from_("tbl2")).from_("tbl").join("tbl3"), "WITH tbl AS (SELECT x FROM tbl2) SELECT x FROM tbl JOIN tbl3", ), ( - lambda: select("x") - .with_("tbl", as_=select("x").from_("tbl2")) - .from_("tbl") - .distinct(), + lambda: select("x").with_("tbl", as_=select("x").from_("tbl2")).from_("tbl").distinct(), "WITH tbl AS (SELECT x FROM tbl2) SELECT DISTINCT x FROM tbl", ), ( - lambda: select("x") - .with_("tbl", as_=select("x").from_("tbl2")) - .from_("tbl") - .where("x > 10"), + lambda: select("x").with_("tbl", as_=select("x").from_("tbl2")).from_("tbl").where("x > 10"), "WITH tbl AS (SELECT x FROM tbl2) SELECT x FROM tbl WHERE x > 10", ), ( - lambda: select("x") - .with_("tbl", as_=select("x").from_("tbl2")) - .from_("tbl") - .having("x > 20"), + lambda: select("x").with_("tbl", as_=select("x").from_("tbl2")).from_("tbl").having("x > 20"), "WITH tbl AS (SELECT x FROM tbl2) SELECT x FROM tbl HAVING x > 20", ), (lambda: select("x").from_("tbl").subquery(), "(SELECT x FROM tbl)"), @@ -324,9 +259,7 @@ class TestBuild(unittest.TestCase): ), (lambda: from_("tbl").select("x"), "SELECT x FROM tbl"), ( - lambda: parse_one("SELECT a FROM tbl") - .assert_is(exp.Select) - .select("b"), + lambda: parse_one("SELECT a FROM tbl").assert_is(exp.Select).select("b"), "SELECT a, b FROM tbl", ), ( @@ -368,15 +301,11 @@ class TestBuild(unittest.TestCase): "SELECT * FROM x WHERE y = 1 AND z = 1", ), ( - lambda: exp.subquery("select x from tbl", "foo") - .select("x") - .where("x > 0"), + lambda: exp.subquery("select x from tbl", "foo").select("x").where("x > 0"), "SELECT x FROM (SELECT x FROM tbl) AS foo WHERE x > 0", ), ( - lambda: exp.subquery( - "select x from tbl UNION select x from bar", "unioned" - ).select("x"), + lambda: exp.subquery("select x from tbl UNION select x from bar", "unioned").select("x"), "SELECT x FROM (SELECT x FROM tbl UNION SELECT x FROM bar) AS unioned", ), ]: diff --git a/tests/test_executor.py b/tests/test_executor.py index 9afa225..c5841d3 100644 --- a/tests/test_executor.py +++ b/tests/test_executor.py @@ -27,10 +27,7 @@ class TestExecutor(unittest.TestCase): ) cls.cache = {} - cls.sqls = [ - (sql, expected) - for _, sql, expected in load_sql_fixture_pairs("optimizer/tpc-h/tpc-h.sql") - ] + cls.sqls = [(sql, expected) for _, sql, expected in load_sql_fixture_pairs("optimizer/tpc-h/tpc-h.sql")] @classmethod def tearDownClass(cls): @@ -50,18 +47,17 @@ class TestExecutor(unittest.TestCase): self.assertEqual(Python().generate(parse_one("'x '''")), r"'x \''") def test_optimized_tpch(self): - for sql, optimized in self.sqls[0:20]: - a = self.cached_execute(sql) - b = self.conn.execute(optimized).fetchdf() - self.rename_anonymous(b, a) - assert_frame_equal(a, b) + for i, (sql, optimized) in enumerate(self.sqls[:20], start=1): + with self.subTest(f"{i}, {sql}"): + a = self.cached_execute(sql) + b = self.conn.execute(optimized).fetchdf() + self.rename_anonymous(b, a) + assert_frame_equal(a, b) def test_execute_tpch(self): def to_csv(expression): if isinstance(expression, exp.Table): - return parse_one( - f"READ_CSV('{DIR}{expression.name}.csv.gz', 'delimiter', '|') AS {expression.name}" - ) + return parse_one(f"READ_CSV('{DIR}{expression.name}.csv.gz', 'delimiter', '|') AS {expression.name}") return expression for sql, _ in self.sqls[0:3]: diff --git a/tests/test_expressions.py b/tests/test_expressions.py index eaef022..716e457 100644 --- a/tests/test_expressions.py +++ b/tests/test_expressions.py @@ -26,9 +26,7 @@ class TestExpressions(unittest.TestCase): parse_one("ROW() OVER(Partition by y)"), parse_one("ROW() OVER (partition BY y)"), ) - self.assertEqual( - parse_one("TO_DATE(x)", read="hive"), parse_one("ts_or_ds_to_date(x)") - ) + self.assertEqual(parse_one("TO_DATE(x)", read="hive"), parse_one("ts_or_ds_to_date(x)")) def test_find(self): expression = parse_one("CREATE TABLE x STORED AS PARQUET AS SELECT * FROM y") @@ -87,9 +85,7 @@ class TestExpressions(unittest.TestCase): self.assertIsNone(column.find_ancestor(exp.Join)) def test_alias_or_name(self): - expression = parse_one( - "SELECT a, b AS B, c + d AS e, *, 'zz', 'zz' AS z FROM foo as bar, baz" - ) + expression = parse_one("SELECT a, b AS B, c + d AS e, *, 'zz', 'zz' AS z FROM foo as bar, baz") self.assertEqual( [e.alias_or_name for e in expression.expressions], ["a", "B", "e", "*", "zz", "z"], @@ -118,9 +114,7 @@ class TestExpressions(unittest.TestCase): ) def test_named_selects(self): - expression = parse_one( - "SELECT a, b AS B, c + d AS e, *, 'zz', 'zz' AS z FROM foo as bar, baz" - ) + expression = parse_one("SELECT a, b AS B, c + d AS e, *, 'zz', 'zz' AS z FROM foo as bar, baz") self.assertEqual(expression.named_selects, ["a", "B", "e", "*", "zz", "z"]) expression = parse_one( @@ -196,15 +190,9 @@ class TestExpressions(unittest.TestCase): def test_sql(self): self.assertEqual(parse_one("x + y * 2").sql(), "x + y * 2") - self.assertEqual( - parse_one('select "x"').sql(dialect="hive", pretty=True), "SELECT\n `x`" - ) - self.assertEqual( - parse_one("X + y").sql(identify=True, normalize=True), '"x" + "y"' - ) - self.assertEqual( - parse_one("SUM(X)").sql(identify=True, normalize=True), 'SUM("x")' - ) + self.assertEqual(parse_one('select "x"').sql(dialect="hive", pretty=True), "SELECT\n `x`") + self.assertEqual(parse_one("X + y").sql(identify=True, normalize=True), '"x" + "y"') + self.assertEqual(parse_one("SUM(X)").sql(identify=True, normalize=True), 'SUM("x")') def test_transform_with_arguments(self): expression = parse_one("a") @@ -229,15 +217,11 @@ class TestExpressions(unittest.TestCase): return node actual_expression_1 = expression.transform(fun) - self.assertEqual( - actual_expression_1.sql(dialect="presto"), "IF(c - 2 > 0, c - 2, b)" - ) + self.assertEqual(actual_expression_1.sql(dialect="presto"), "IF(c - 2 > 0, c - 2, b)") self.assertIsNot(actual_expression_1, expression) actual_expression_2 = expression.transform(fun, copy=False) - self.assertEqual( - actual_expression_2.sql(dialect="presto"), "IF(c - 2 > 0, c - 2, b)" - ) + self.assertEqual(actual_expression_2.sql(dialect="presto"), "IF(c - 2 > 0, c - 2, b)") self.assertIs(actual_expression_2, expression) with self.assertRaises(ValueError): @@ -274,12 +258,8 @@ class TestExpressions(unittest.TestCase): expression = parse_one("SELECT * FROM (SELECT * FROM x)") self.assertEqual(len(list(expression.walk())), 9) self.assertEqual(len(list(expression.walk(bfs=False))), 9) - self.assertTrue( - all(isinstance(e, exp.Expression) for e, _, _ in expression.walk()) - ) - self.assertTrue( - all(isinstance(e, exp.Expression) for e, _, _ in expression.walk(bfs=False)) - ) + self.assertTrue(all(isinstance(e, exp.Expression) for e, _, _ in expression.walk())) + self.assertTrue(all(isinstance(e, exp.Expression) for e, _, _ in expression.walk(bfs=False))) def test_functions(self): self.assertIsInstance(parse_one("ABS(a)"), exp.Abs) @@ -303,9 +283,7 @@ class TestExpressions(unittest.TestCase): self.assertIsInstance(parse_one("IF(a, b, c)"), exp.If) self.assertIsInstance(parse_one("INITCAP(a)"), exp.Initcap) self.assertIsInstance(parse_one("JSON_EXTRACT(a, '$.name')"), exp.JSONExtract) - self.assertIsInstance( - parse_one("JSON_EXTRACT_SCALAR(a, '$.name')"), exp.JSONExtractScalar - ) + self.assertIsInstance(parse_one("JSON_EXTRACT_SCALAR(a, '$.name')"), exp.JSONExtractScalar) self.assertIsInstance(parse_one("LEAST(a, b)"), exp.Least) self.assertIsInstance(parse_one("LN(a)"), exp.Ln) self.assertIsInstance(parse_one("LOG10(a)"), exp.Log10) @@ -334,6 +312,7 @@ class TestExpressions(unittest.TestCase): self.assertIsInstance(parse_one("TIME_STR_TO_DATE(a)"), exp.TimeStrToDate) self.assertIsInstance(parse_one("TIME_STR_TO_TIME(a)"), exp.TimeStrToTime) self.assertIsInstance(parse_one("TIME_STR_TO_UNIX(a)"), exp.TimeStrToUnix) + self.assertIsInstance(parse_one("TRIM(LEADING 'b' FROM 'bla')"), exp.Trim) self.assertIsInstance(parse_one("TS_OR_DS_ADD(a, 1, 'day')"), exp.TsOrDsAdd) self.assertIsInstance(parse_one("TS_OR_DS_TO_DATE(a)"), exp.TsOrDsToDate) self.assertIsInstance(parse_one("TS_OR_DS_TO_DATE_STR(a)"), exp.Substring) @@ -404,12 +383,8 @@ class TestExpressions(unittest.TestCase): self.assertFalse(exp.to_identifier("x").quoted) def test_function_normalizer(self): - self.assertEqual( - parse_one("HELLO()").sql(normalize_functions="lower"), "hello()" - ) - self.assertEqual( - parse_one("hello()").sql(normalize_functions="upper"), "HELLO()" - ) + self.assertEqual(parse_one("HELLO()").sql(normalize_functions="lower"), "hello()") + self.assertEqual(parse_one("hello()").sql(normalize_functions="upper"), "HELLO()") self.assertEqual(parse_one("heLLO()").sql(normalize_functions=None), "heLLO()") self.assertEqual(parse_one("SUM(x)").sql(normalize_functions="lower"), "sum(x)") self.assertEqual(parse_one("sum(x)").sql(normalize_functions="upper"), "SUM(x)") diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py index 40540b3..102e141 100644 --- a/tests/test_optimizer.py +++ b/tests/test_optimizer.py @@ -31,9 +31,7 @@ class TestOptimizer(unittest.TestCase): dialect = meta.get("dialect") with self.subTest(sql): self.assertEqual( - func(parse_one(sql, read=dialect), **kwargs).sql( - pretty=pretty, dialect=dialect - ), + func(parse_one(sql, read=dialect), **kwargs).sql(pretty=pretty, dialect=dialect), expected, ) @@ -86,9 +84,7 @@ class TestOptimizer(unittest.TestCase): for sql in load_sql_fixtures("optimizer/qualify_columns__invalid.sql"): with self.subTest(sql): with self.assertRaises(OptimizeError): - optimizer.qualify_columns.qualify_columns( - parse_one(sql), schema=self.schema - ) + optimizer.qualify_columns.qualify_columns(parse_one(sql), schema=self.schema) def test_quote_identities(self): self.check_file("quote_identities", optimizer.quote_identities.quote_identities) @@ -100,9 +96,7 @@ class TestOptimizer(unittest.TestCase): expression = optimizer.pushdown_projections.pushdown_projections(expression) return expression - self.check_file( - "pushdown_projections", pushdown_projections, schema=self.schema - ) + self.check_file("pushdown_projections", pushdown_projections, schema=self.schema) def test_simplify(self): self.check_file("simplify", optimizer.simplify.simplify) @@ -115,9 +109,7 @@ class TestOptimizer(unittest.TestCase): ) def test_pushdown_predicates(self): - self.check_file( - "pushdown_predicates", optimizer.pushdown_predicates.pushdown_predicates - ) + self.check_file("pushdown_predicates", optimizer.pushdown_predicates.pushdown_predicates) def test_expand_multi_table_selects(self): self.check_file( @@ -138,10 +130,17 @@ class TestOptimizer(unittest.TestCase): pretty=True, ) + def test_merge_derived_tables(self): + def optimize(expression, **kwargs): + expression = optimizer.qualify_tables.qualify_tables(expression) + expression = optimizer.qualify_columns.qualify_columns(expression, **kwargs) + expression = optimizer.merge_derived_tables.merge_derived_tables(expression) + return expression + + self.check_file("merge_derived_tables", optimize, schema=self.schema) + def test_tpch(self): - self.check_file( - "tpc-h/tpc-h", optimizer.optimize, schema=TPCH_SCHEMA, pretty=True - ) + self.check_file("tpc-h/tpc-h", optimizer.optimize, schema=TPCH_SCHEMA, pretty=True) def test_schema(self): schema = ensure_schema( @@ -262,9 +261,7 @@ FROM READ_CSV('tests/fixtures/optimizer/tpc-h/nation.csv.gz', 'delimiter', '|') self.assertEqual(len(scopes), 5) self.assertEqual(scopes[0].expression.sql(), "SELECT x.b FROM x") self.assertEqual(scopes[1].expression.sql(), "SELECT y.b FROM y") - self.assertEqual( - scopes[2].expression.sql(), "SELECT MAX(x.a) FROM x WHERE x.b = s.b" - ) + self.assertEqual(scopes[2].expression.sql(), "SELECT MAX(x.a) FROM x WHERE x.b = s.b") self.assertEqual(scopes[3].expression.sql(), "SELECT y.c AS b FROM y") self.assertEqual(scopes[4].expression.sql(), parse_one(sql).sql()) diff --git a/tests/test_parser.py b/tests/test_parser.py index 779083d..1054103 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -16,28 +16,23 @@ class TestParser(unittest.TestCase): self.assertIsInstance(parse_one("array", into=exp.DataType), exp.DataType) def test_column(self): - columns = parse_one("select a, ARRAY[1] b, case when 1 then 1 end").find_all( - exp.Column - ) + columns = parse_one("select a, ARRAY[1] b, case when 1 then 1 end").find_all(exp.Column) assert len(list(columns)) == 1 self.assertIsNotNone(parse_one("date").find(exp.Column)) def test_table(self): - tables = [ - t.sql() for t in parse_one("select * from a, b.c, .d").find_all(exp.Table) - ] + tables = [t.sql() for t in parse_one("select * from a, b.c, .d").find_all(exp.Table)] self.assertEqual(tables, ["a", "b.c", "d"]) def test_select(self): - self.assertIsNotNone( - parse_one("select * from (select 1) x order by x.y").args["order"] - ) - self.assertIsNotNone( - parse_one("select * from x where a = (select 1) order by x.y").args["order"] - ) + self.assertIsNotNone(parse_one("select 1 natural")) + self.assertIsNotNone(parse_one("select * from (select 1) x order by x.y").args["order"]) + self.assertIsNotNone(parse_one("select * from x where a = (select 1) order by x.y").args["order"]) + self.assertEqual(len(parse_one("select * from (select 1) x cross join y").args["joins"]), 1) self.assertEqual( - len(parse_one("select * from (select 1) x cross join y").args["joins"]), 1 + parse_one("""SELECT * FROM x CROSS JOIN y, z LATERAL VIEW EXPLODE(y)""").sql(), + """SELECT * FROM x, z LATERAL VIEW EXPLODE(y) CROSS JOIN y""", ) def test_command(self): @@ -72,12 +67,8 @@ class TestParser(unittest.TestCase): ) assert len(expressions) == 2 - assert ( - expressions[0].args["from"].expressions[0].args["this"].args["this"] == "a" - ) - assert ( - expressions[1].args["from"].expressions[0].args["this"].args["this"] == "b" - ) + assert expressions[0].args["from"].expressions[0].args["this"].args["this"] == "a" + assert expressions[1].args["from"].expressions[0].args["this"].args["this"] == "b" def test_expression(self): ignore = Parser(error_level=ErrorLevel.IGNORE) @@ -147,13 +138,9 @@ class TestParser(unittest.TestCase): def test_pretty_config_override(self): self.assertEqual(parse_one("SELECT col FROM x").sql(), "SELECT col FROM x") with patch("sqlglot.pretty", True): - self.assertEqual( - parse_one("SELECT col FROM x").sql(), "SELECT\n col\nFROM x" - ) + self.assertEqual(parse_one("SELECT col FROM x").sql(), "SELECT\n col\nFROM x") - self.assertEqual( - parse_one("SELECT col FROM x").sql(pretty=True), "SELECT\n col\nFROM x" - ) + self.assertEqual(parse_one("SELECT col FROM x").sql(pretty=True), "SELECT\n col\nFROM x") @patch("sqlglot.parser.logger") def test_comment_error_n(self, logger): diff --git a/tests/test_transpile.py b/tests/test_transpile.py index 28bcc7a..4bec2ac 100644 --- a/tests/test_transpile.py +++ b/tests/test_transpile.py @@ -42,6 +42,20 @@ class TestTranspile(unittest.TestCase): "SELECT * FROM x WHERE a = ANY (SELECT 1)", ) + def test_leading_comma(self): + self.validate( + "SELECT FOO, BAR, BAZ", + "SELECT\n FOO\n , BAR\n , BAZ", + leading_comma=True, + pretty=True, + ) + # without pretty, this should be a no-op + self.validate( + "SELECT FOO, BAR, BAZ", + "SELECT FOO, BAR, BAZ", + leading_comma=True, + ) + def test_space(self): self.validate("SELECT MIN(3)>MIN(2)", "SELECT MIN(3) > MIN(2)") self.validate("SELECT MIN(3)>=MIN(2)", "SELECT MIN(3) >= MIN(2)") @@ -108,6 +122,11 @@ class TestTranspile(unittest.TestCase): "extract(month from '2021-01-31'::timestamp without time zone)", "EXTRACT(month FROM CAST('2021-01-31' AS TIMESTAMP))", ) + self.validate("extract(week from current_date + 2)", "EXTRACT(week FROM CURRENT_DATE + 2)") + self.validate( + "EXTRACT(minute FROM datetime1 - datetime2)", + "EXTRACT(minute FROM datetime1 - datetime2)", + ) def test_if(self): self.validate( @@ -122,18 +141,14 @@ class TestTranspile(unittest.TestCase): "SELECT IF a > 1 THEN b ELSE c END", "SELECT CASE WHEN a > 1 THEN b ELSE c END", ) - self.validate( - "SELECT IF(a > 1, 1) FROM foo", "SELECT CASE WHEN a > 1 THEN 1 END FROM foo" - ) + self.validate("SELECT IF(a > 1, 1) FROM foo", "SELECT CASE WHEN a > 1 THEN 1 END FROM foo") def test_ignore_nulls(self): self.validate("SELECT COUNT(x RESPECT NULLS)", "SELECT COUNT(x)") def test_time(self): self.validate("TIMESTAMP '2020-01-01'", "CAST('2020-01-01' AS TIMESTAMP)") - self.validate( - "TIMESTAMP WITH TIME ZONE '2020-01-01'", "CAST('2020-01-01' AS TIMESTAMPTZ)" - ) + self.validate("TIMESTAMP WITH TIME ZONE '2020-01-01'", "CAST('2020-01-01' AS TIMESTAMPTZ)") self.validate( "TIMESTAMP(9) WITH TIME ZONE '2020-01-01'", "CAST('2020-01-01' AS TIMESTAMPTZ(9))", @@ -159,9 +174,7 @@ class TestTranspile(unittest.TestCase): self.validate("DATE '2020-01-01'", "CAST('2020-01-01' AS DATE)") self.validate("'2020-01-01'::DATE", "CAST('2020-01-01' AS DATE)") self.validate("STR_TO_TIME('x', 'y')", "STRPTIME('x', 'y')", write="duckdb") - self.validate( - "STR_TO_UNIX('x', 'y')", "EPOCH(STRPTIME('x', 'y'))", write="duckdb" - ) + self.validate("STR_TO_UNIX('x', 'y')", "EPOCH(STRPTIME('x', 'y'))", write="duckdb") self.validate("TIME_TO_STR(x, 'y')", "STRFTIME(x, 'y')", write="duckdb") self.validate("TIME_TO_UNIX(x)", "EPOCH(x)", write="duckdb") self.validate( @@ -209,12 +222,8 @@ class TestTranspile(unittest.TestCase): self.validate("TIME_STR_TO_DATE(x)", "TIME_STR_TO_DATE(x)", write=None) self.validate("TIME_STR_TO_DATE(x)", "TO_DATE(x)", write="hive") - self.validate( - "UNIX_TO_STR(x, 'yyyy-MM-dd HH:mm:ss')", "FROM_UNIXTIME(x)", write="hive" - ) - self.validate( - "STR_TO_UNIX(x, 'yyyy-MM-dd HH:mm:ss')", "UNIX_TIMESTAMP(x)", write="hive" - ) + self.validate("UNIX_TO_STR(x, 'yyyy-MM-dd HH:mm:ss')", "FROM_UNIXTIME(x)", write="hive") + self.validate("STR_TO_UNIX(x, 'yyyy-MM-dd HH:mm:ss')", "UNIX_TIMESTAMP(x)", write="hive") self.validate("IF(x > 1, x + 1)", "IF(x > 1, x + 1)", write="presto") self.validate("IF(x > 1, 1 + 1)", "IF(x > 1, 1 + 1)", write="hive") self.validate("IF(x > 1, 1, 0)", "IF(x > 1, 1, 0)", write="hive") @@ -232,9 +241,7 @@ class TestTranspile(unittest.TestCase): ) self.validate("STR_TO_TIME('x', 'y')", "DATE_PARSE('x', 'y')", write="presto") - self.validate( - "STR_TO_UNIX('x', 'y')", "TO_UNIXTIME(DATE_PARSE('x', 'y'))", write="presto" - ) + self.validate("STR_TO_UNIX('x', 'y')", "TO_UNIXTIME(DATE_PARSE('x', 'y'))", write="presto") self.validate("TIME_TO_STR(x, 'y')", "DATE_FORMAT(x, 'y')", write="presto") self.validate("TIME_TO_UNIX(x)", "TO_UNIXTIME(x)", write="presto") self.validate( @@ -245,9 +252,7 @@ class TestTranspile(unittest.TestCase): self.validate("UNIX_TO_TIME(123)", "FROM_UNIXTIME(123)", write="presto") self.validate("STR_TO_TIME('x', 'y')", "TO_TIMESTAMP('x', 'y')", write="spark") - self.validate( - "STR_TO_UNIX('x', 'y')", "UNIX_TIMESTAMP('x', 'y')", write="spark" - ) + self.validate("STR_TO_UNIX('x', 'y')", "UNIX_TIMESTAMP('x', 'y')", write="spark") self.validate("TIME_TO_STR(x, 'y')", "DATE_FORMAT(x, 'y')", write="spark") self.validate( @@ -283,9 +288,7 @@ class TestTranspile(unittest.TestCase): def test_partial(self): for sql in load_sql_fixtures("partial.sql"): with self.subTest(sql): - self.assertEqual( - transpile(sql, error_level=ErrorLevel.IGNORE)[0], sql.strip() - ) + self.assertEqual(transpile(sql, error_level=ErrorLevel.IGNORE)[0], sql.strip()) def test_pretty(self): for _, sql, pretty in load_sql_fixture_pairs("pretty.sql"): -- cgit v1.2.3