summaryrefslogtreecommitdiffstats
path: root/tests
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2022-09-30 05:07:13 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2022-09-30 05:07:13 +0000
commit276f5723c8ab7e0b2938693db546dbd30be3f01a (patch)
treee6294c63de34a03e373245ec4cb1efbca1edfe61 /tests
parentAdding upstream version 6.2.1. (diff)
downloadsqlglot-276f5723c8ab7e0b2938693db546dbd30be3f01a.tar.xz
sqlglot-276f5723c8ab7e0b2938693db546dbd30be3f01a.zip
Adding upstream version 6.2.6.upstream/6.2.6
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--tests/dialects/test_bigquery.py21
-rw-r--r--tests/dialects/test_dialect.py17
-rw-r--r--tests/dialects/test_duckdb.py8
-rw-r--r--tests/dialects/test_hive.py10
-rw-r--r--tests/dialects/test_mysql.py36
-rw-r--r--tests/dialects/test_presto.py27
-rw-r--r--tests/dialects/test_snowflake.py73
-rw-r--r--tests/dialects/test_spark.py14
-rw-r--r--tests/dialects/test_tsql.py18
-rw-r--r--tests/fixtures/identity.sql24
-rw-r--r--tests/fixtures/optimizer/eliminate_subqueries.sql121
-rw-r--r--tests/fixtures/optimizer/merge_subqueries.sql (renamed from tests/fixtures/optimizer/merge_derived_tables.sql)48
-rw-r--r--tests/fixtures/optimizer/optimizer.sql34
-rw-r--r--tests/fixtures/optimizer/qualify_columns.sql10
-rw-r--r--tests/fixtures/optimizer/qualify_columns__invalid.sql1
-rw-r--r--tests/fixtures/optimizer/tpc-h/tpc-h.sql290
-rw-r--r--tests/fixtures/pretty.sql19
-rw-r--r--tests/test_build.py14
-rw-r--r--tests/test_expressions.py33
-rw-r--r--tests/test_optimizer.py125
-rw-r--r--tests/test_parser.py5
-rw-r--r--tests/test_transforms.py25
22 files changed, 680 insertions, 293 deletions
diff --git a/tests/dialects/test_bigquery.py b/tests/dialects/test_bigquery.py
index 1337c3d..c929e59 100644
--- a/tests/dialects/test_bigquery.py
+++ b/tests/dialects/test_bigquery.py
@@ -236,3 +236,24 @@ class TestBigQuery(Validator):
"snowflake": "SELECT a FROM test WHERE a = 1 GROUP BY a HAVING a = 2 QUALIFY z ORDER BY a NULLS FIRST LIMIT 10",
},
)
+ self.validate_all(
+ "SELECT cola, colb FROM (VALUES (1, 'test')) AS tab(cola, colb)",
+ write={
+ "spark": "SELECT cola, colb FROM (VALUES (1, 'test')) AS tab(cola, colb)",
+ "bigquery": "SELECT cola, colb FROM UNNEST([STRUCT(1 AS cola, 'test' AS colb)])",
+ "snowflake": "SELECT cola, colb FROM (VALUES (1, 'test')) AS tab(cola, colb)",
+ },
+ )
+ self.validate_all(
+ "SELECT * FROM (SELECT a, b, c FROM test) PIVOT(SUM(b) d, COUNT(*) e FOR c IN ('x', 'y'))",
+ write={
+ "bigquery": "SELECT * FROM (SELECT a, b, c FROM test) PIVOT(SUM(b) AS d, COUNT(*) AS e FOR c IN ('x', 'y'))",
+ },
+ )
+
+ def test_user_defined_functions(self):
+ self.validate_identity(
+ "CREATE TEMPORARY FUNCTION a(x FLOAT64, y FLOAT64) RETURNS FLOAT64 LANGUAGE js AS 'return x*y;'"
+ )
+ self.validate_identity("CREATE TEMPORARY FUNCTION a(x FLOAT64, y FLOAT64) AS ((x + 4) / y)")
+ self.validate_identity("CREATE TABLE FUNCTION a(x INT64) RETURNS TABLE <q STRING, r INT64> AS SELECT s, t")
diff --git a/tests/dialects/test_dialect.py b/tests/dialects/test_dialect.py
index 4e0a3c6..e0ec824 100644
--- a/tests/dialects/test_dialect.py
+++ b/tests/dialects/test_dialect.py
@@ -13,9 +13,6 @@ from sqlglot import (
class Validator(unittest.TestCase):
dialect = None
- def validate(self, sql, target, **kwargs):
- self.assertEqual(transpile(sql, **kwargs)[0], target)
-
def validate_identity(self, sql):
self.assertEqual(transpile(sql, read=self.dialect, write=self.dialect)[0], sql)
@@ -258,6 +255,7 @@ class TestDialect(Validator):
"duckdb": "EPOCH(STRPTIME('2020-01-01', '%Y-%M-%d'))",
"hive": "UNIX_TIMESTAMP('2020-01-01', 'yyyy-mm-dd')",
"presto": "TO_UNIXTIME(DATE_PARSE('2020-01-01', '%Y-%i-%d'))",
+ "starrocks": "UNIX_TIMESTAMP('2020-01-01', '%Y-%i-%d')",
},
)
self.validate_all(
@@ -266,6 +264,7 @@ class TestDialect(Validator):
"duckdb": "CAST('2020-01-01' AS DATE)",
"hive": "TO_DATE('2020-01-01')",
"presto": "DATE_PARSE('2020-01-01', '%Y-%m-%d %H:%i:%s')",
+ "starrocks": "TO_DATE('2020-01-01')",
},
)
self.validate_all(
@@ -341,6 +340,7 @@ class TestDialect(Validator):
"duckdb": "STRFTIME(TO_TIMESTAMP(CAST(x AS BIGINT)), y)",
"hive": "FROM_UNIXTIME(x, y)",
"presto": "DATE_FORMAT(FROM_UNIXTIME(x), y)",
+ "starrocks": "FROM_UNIXTIME(x, y)",
},
)
self.validate_all(
@@ -349,6 +349,7 @@ class TestDialect(Validator):
"duckdb": "TO_TIMESTAMP(CAST(x AS BIGINT))",
"hive": "FROM_UNIXTIME(x)",
"presto": "FROM_UNIXTIME(x)",
+ "starrocks": "FROM_UNIXTIME(x)",
},
)
self.validate_all(
@@ -841,9 +842,19 @@ class TestDialect(Validator):
},
)
self.validate_all(
+ "POSITION(' ' in x)",
+ write={
+ "duckdb": "STRPOS(x, ' ')",
+ "postgres": "STRPOS(x, ' ')",
+ "presto": "STRPOS(x, ' ')",
+ "spark": "LOCATE(' ', x)",
+ },
+ )
+ self.validate_all(
"STR_POSITION(x, 'a')",
write={
"duckdb": "STRPOS(x, 'a')",
+ "postgres": "STRPOS(x, 'a')",
"presto": "STRPOS(x, 'a')",
"spark": "LOCATE('a', x)",
},
diff --git a/tests/dialects/test_duckdb.py b/tests/dialects/test_duckdb.py
index f52decb..96e51df 100644
--- a/tests/dialects/test_duckdb.py
+++ b/tests/dialects/test_duckdb.py
@@ -1,3 +1,4 @@
+from sqlglot import ErrorLevel, UnsupportedError, transpile
from tests.dialects.test_dialect import Validator
@@ -250,3 +251,10 @@ class TestDuckDB(Validator):
"spark": "MONTH('2021-03-01')",
},
)
+
+ with self.assertRaises(UnsupportedError):
+ transpile(
+ "SELECT a FROM b PIVOT(SUM(x) FOR y IN ('z', 'q'))",
+ read="duckdb",
+ unsupported_level=ErrorLevel.IMMEDIATE,
+ )
diff --git a/tests/dialects/test_hive.py b/tests/dialects/test_hive.py
index a9b5168..d335921 100644
--- a/tests/dialects/test_hive.py
+++ b/tests/dialects/test_hive.py
@@ -127,17 +127,17 @@ class TestHive(Validator):
def test_ddl(self):
self.validate_all(
- "CREATE TABLE test STORED AS parquet TBLPROPERTIES ('x' = '1', 'Z' = '2') AS SELECT 1",
+ "CREATE TABLE test STORED AS parquet TBLPROPERTIES ('x'='1', 'Z'='2') AS SELECT 1",
write={
- "presto": "CREATE TABLE test WITH (FORMAT = 'parquet', x = '1', Z = '2') AS SELECT 1",
- "hive": "CREATE TABLE test STORED AS PARQUET TBLPROPERTIES ('x' = '1', 'Z' = '2') AS SELECT 1",
- "spark": "CREATE TABLE test USING PARQUET TBLPROPERTIES ('x' = '1', 'Z' = '2') AS SELECT 1",
+ "presto": "CREATE TABLE test WITH (FORMAT='parquet', x='1', Z='2') AS SELECT 1",
+ "hive": "CREATE TABLE test STORED AS PARQUET TBLPROPERTIES ('x'='1', 'Z'='2') AS SELECT 1",
+ "spark": "CREATE TABLE test USING PARQUET TBLPROPERTIES ('x'='1', 'Z'='2') AS SELECT 1",
},
)
self.validate_all(
"CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)",
write={
- "presto": "CREATE TABLE x (w VARCHAR, y INTEGER, z INTEGER) WITH (PARTITIONED_BY = ARRAY['y', 'z'])",
+ "presto": "CREATE TABLE x (w VARCHAR, y INTEGER, z INTEGER) WITH (PARTITIONED_BY=ARRAY['y', 'z'])",
"hive": "CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)",
"spark": "CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)",
},
diff --git a/tests/dialects/test_mysql.py b/tests/dialects/test_mysql.py
index 87a3d64..02dc1ad 100644
--- a/tests/dialects/test_mysql.py
+++ b/tests/dialects/test_mysql.py
@@ -119,3 +119,39 @@ class TestMySQL(Validator):
"sqlite": "GROUP_CONCAT(DISTINCT x ORDER BY y DESC, '')",
},
)
+ self.validate_identity(
+ "CREATE TABLE z (a INT) ENGINE=InnoDB AUTO_INCREMENT=1 CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x'"
+ )
+ self.validate_identity(
+ "CREATE TABLE z (a INT) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x'"
+ )
+ self.validate_identity(
+ "CREATE TABLE z (a INT DEFAULT NULL, PRIMARY KEY(a)) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x'"
+ )
+
+ self.validate_all(
+ """
+ CREATE TABLE `t_customer_account` (
+ "id" int(11) NOT NULL AUTO_INCREMENT,
+ "customer_id" int(11) DEFAULT NULL COMMENT '客户id',
+ "bank" varchar(100) COLLATE utf8_bin DEFAULT NULL COMMENT '行别',
+ "account_no" varchar(100) COLLATE utf8_bin DEFAULT NULL COMMENT '账号',
+ PRIMARY KEY ("id")
+ ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='客户账户表'
+ """,
+ write={
+ "mysql": """CREATE TABLE `t_customer_account` (
+ 'id' INT(11) NOT NULL AUTO_INCREMENT,
+ 'customer_id' INT(11) DEFAULT NULL COMMENT '客户id',
+ 'bank' VARCHAR(100) COLLATE utf8_bin DEFAULT NULL COMMENT '行别',
+ 'account_no' VARCHAR(100) COLLATE utf8_bin DEFAULT NULL COMMENT '账号',
+ PRIMARY KEY('id')
+)
+ENGINE=InnoDB
+AUTO_INCREMENT=1
+DEFAULT CHARACTER SET=utf8
+COLLATE=utf8_bin
+COMMENT='客户账户表'"""
+ },
+ pretty=True,
+ )
diff --git a/tests/dialects/test_presto.py b/tests/dialects/test_presto.py
index 96c299d..b0d9ad9 100644
--- a/tests/dialects/test_presto.py
+++ b/tests/dialects/test_presto.py
@@ -171,7 +171,7 @@ class TestPresto(Validator):
self.validate_all(
"CREATE TABLE test WITH (FORMAT = 'PARQUET') AS SELECT 1",
write={
- "presto": "CREATE TABLE test WITH (FORMAT = 'PARQUET') AS SELECT 1",
+ "presto": "CREATE TABLE test WITH (FORMAT='PARQUET') AS SELECT 1",
"hive": "CREATE TABLE test STORED AS PARQUET AS SELECT 1",
"spark": "CREATE TABLE test USING PARQUET AS SELECT 1",
},
@@ -179,15 +179,15 @@ class TestPresto(Validator):
self.validate_all(
"CREATE TABLE test WITH (FORMAT = 'PARQUET', X = '1', Z = '2') AS SELECT 1",
write={
- "presto": "CREATE TABLE test WITH (FORMAT = 'PARQUET', X = '1', Z = '2') AS SELECT 1",
- "hive": "CREATE TABLE test STORED AS PARQUET TBLPROPERTIES ('X' = '1', 'Z' = '2') AS SELECT 1",
- "spark": "CREATE TABLE test USING PARQUET TBLPROPERTIES ('X' = '1', 'Z' = '2') AS SELECT 1",
+ "presto": "CREATE TABLE test WITH (FORMAT='PARQUET', X='1', Z='2') AS SELECT 1",
+ "hive": "CREATE TABLE test STORED AS PARQUET TBLPROPERTIES ('X'='1', 'Z'='2') AS SELECT 1",
+ "spark": "CREATE TABLE test USING PARQUET TBLPROPERTIES ('X'='1', 'Z'='2') AS SELECT 1",
},
)
self.validate_all(
- "CREATE TABLE x (w VARCHAR, y INTEGER, z INTEGER) WITH (PARTITIONED_BY = ARRAY['y', 'z'])",
+ "CREATE TABLE x (w VARCHAR, y INTEGER, z INTEGER) WITH (PARTITIONED_BY=ARRAY['y', 'z'])",
write={
- "presto": "CREATE TABLE x (w VARCHAR, y INTEGER, z INTEGER) WITH (PARTITIONED_BY = ARRAY['y', 'z'])",
+ "presto": "CREATE TABLE x (w VARCHAR, y INTEGER, z INTEGER) WITH (PARTITIONED_BY=ARRAY['y', 'z'])",
"hive": "CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)",
"spark": "CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)",
},
@@ -195,9 +195,9 @@ class TestPresto(Validator):
self.validate_all(
"CREATE TABLE x WITH (bucket_by = ARRAY['y'], bucket_count = 64) AS SELECT 1 AS y",
write={
- "presto": "CREATE TABLE x WITH (bucket_by = ARRAY['y'], bucket_count = 64) AS SELECT 1 AS y",
- "hive": "CREATE TABLE x TBLPROPERTIES ('bucket_by' = ARRAY('y'), 'bucket_count' = 64) AS SELECT 1 AS y",
- "spark": "CREATE TABLE x TBLPROPERTIES ('bucket_by' = ARRAY('y'), 'bucket_count' = 64) AS SELECT 1 AS y",
+ "presto": "CREATE TABLE x WITH (bucket_by=ARRAY['y'], bucket_count=64) AS SELECT 1 AS y",
+ "hive": "CREATE TABLE x TBLPROPERTIES ('bucket_by'=ARRAY('y'), 'bucket_count'=64) AS SELECT 1 AS y",
+ "spark": "CREATE TABLE x TBLPROPERTIES ('bucket_by'=ARRAY('y'), 'bucket_count'=64) AS SELECT 1 AS y",
},
)
self.validate_all(
@@ -217,11 +217,12 @@ class TestPresto(Validator):
},
)
- self.validate(
+ self.validate_all(
"SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname",
- "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname, lname",
- read="presto",
- write="presto",
+ write={
+ "presto": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname, lname",
+ "spark": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname NULLS LAST",
+ },
)
def test_quotes(self):
diff --git a/tests/dialects/test_snowflake.py b/tests/dialects/test_snowflake.py
index 165f8e2..b7e39a7 100644
--- a/tests/dialects/test_snowflake.py
+++ b/tests/dialects/test_snowflake.py
@@ -143,6 +143,31 @@ class TestSnowflake(Validator):
"snowflake": r"SELECT 'a \' \\ \\t \\x21 z $ '",
},
)
+ self.validate_identity("SELECT REGEXP_LIKE(a, b, c)")
+ self.validate_all(
+ "SELECT RLIKE(a, b)",
+ write={
+ "snowflake": "SELECT REGEXP_LIKE(a, b)",
+ },
+ )
+ self.validate_all(
+ "SELECT a FROM test SAMPLE BLOCK (0.5) SEED (42)",
+ write={
+ "snowflake": "SELECT a FROM test TABLESAMPLE BLOCK (0.5) SEED (42)",
+ },
+ )
+ self.validate_all(
+ "SELECT a FROM test pivot",
+ write={
+ "snowflake": "SELECT a FROM test AS pivot",
+ },
+ )
+ self.validate_all(
+ "SELECT a FROM test unpivot",
+ write={
+ "snowflake": "SELECT a FROM test AS unpivot",
+ },
+ )
def test_null_treatment(self):
self.validate_all(
@@ -220,3 +245,51 @@ class TestSnowflake(Validator):
"snowflake": "SELECT EXTRACT(month FROM CAST(a AS DATETIME))",
},
)
+
+ def test_semi_structured_types(self):
+ self.validate_identity("SELECT CAST(a AS VARIANT)")
+ self.validate_all(
+ "SELECT a::VARIANT",
+ write={
+ "snowflake": "SELECT CAST(a AS VARIANT)",
+ "tsql": "SELECT CAST(a AS SQL_VARIANT)",
+ },
+ )
+ self.validate_identity("SELECT CAST(a AS ARRAY)")
+ self.validate_all(
+ "ARRAY_CONSTRUCT(0, 1, 2)",
+ write={
+ "snowflake": "[0, 1, 2]",
+ "bigquery": "[0, 1, 2]",
+ "duckdb": "LIST_VALUE(0, 1, 2)",
+ "presto": "ARRAY[0, 1, 2]",
+ "spark": "ARRAY(0, 1, 2)",
+ },
+ )
+ self.validate_all(
+ "SELECT a::OBJECT",
+ write={
+ "snowflake": "SELECT CAST(a AS OBJECT)",
+ },
+ )
+
+ def test_ddl(self):
+ self.validate_identity(
+ "CREATE TABLE a (x DATE, y BIGINT) WITH (PARTITION BY (x), integration='q', auto_refresh=TRUE, file_format=(type = parquet))"
+ )
+ self.validate_identity("CREATE MATERIALIZED VIEW a COMMENT='...' AS SELECT 1 FROM x")
+
+ def test_user_defined_functions(self):
+ self.validate_all(
+ "CREATE FUNCTION a(x DATE, y BIGINT) RETURNS ARRAY LANGUAGE JAVASCRIPT AS $$ SELECT 1 $$",
+ write={
+ "snowflake": "CREATE FUNCTION a(x DATE, y BIGINT) RETURNS ARRAY LANGUAGE JAVASCRIPT AS ' SELECT 1 '",
+ },
+ )
+ self.validate_all(
+ "CREATE FUNCTION a() RETURNS TABLE (b INT) AS 'SELECT 1'",
+ write={
+ "snowflake": "CREATE FUNCTION a() RETURNS TABLE (b INT) AS 'SELECT 1'",
+ "bigquery": "CREATE TABLE FUNCTION a() RETURNS TABLE <b INT64> AS SELECT 1",
+ },
+ )
diff --git a/tests/dialects/test_spark.py b/tests/dialects/test_spark.py
index 22f6947..8377e47 100644
--- a/tests/dialects/test_spark.py
+++ b/tests/dialects/test_spark.py
@@ -34,7 +34,7 @@ class TestSpark(Validator):
self.validate_all(
"CREATE TABLE x USING ICEBERG PARTITIONED BY (MONTHS(y)) LOCATION 's3://z'",
write={
- "presto": "CREATE TABLE x WITH (TABLE_FORMAT = 'ICEBERG', PARTITIONED_BY = ARRAY['MONTHS'])",
+ "presto": "CREATE TABLE x WITH (TABLE_FORMAT = 'ICEBERG', PARTITIONED_BY=ARRAY['MONTHS'])",
"hive": "CREATE TABLE x USING ICEBERG PARTITIONED BY (MONTHS(y)) LOCATION 's3://z'",
"spark": "CREATE TABLE x USING ICEBERG PARTITIONED BY (MONTHS(y)) LOCATION 's3://z'",
},
@@ -42,7 +42,7 @@ class TestSpark(Validator):
self.validate_all(
"CREATE TABLE test STORED AS PARQUET AS SELECT 1",
write={
- "presto": "CREATE TABLE test WITH (FORMAT = 'PARQUET') AS SELECT 1",
+ "presto": "CREATE TABLE test WITH (FORMAT='PARQUET') AS SELECT 1",
"hive": "CREATE TABLE test STORED AS PARQUET AS SELECT 1",
"spark": "CREATE TABLE test USING PARQUET AS SELECT 1",
},
@@ -56,9 +56,9 @@ class TestSpark(Validator):
)
COMMENT='Test comment: blah'
WITH (
- PARTITIONED_BY = ARRAY['date'],
- FORMAT = 'ICEBERG',
- x = '1'
+ PARTITIONED_BY=ARRAY['date'],
+ FORMAT='ICEBERG',
+ x='1'
)""",
"hive": """CREATE TABLE blah (
col_a INT
@@ -69,7 +69,7 @@ PARTITIONED BY (
)
STORED AS ICEBERG
TBLPROPERTIES (
- 'x' = '1'
+ 'x'='1'
)""",
"spark": """CREATE TABLE blah (
col_a INT
@@ -80,7 +80,7 @@ PARTITIONED BY (
)
USING ICEBERG
TBLPROPERTIES (
- 'x' = '1'
+ 'x'='1'
)""",
},
pretty=True,
diff --git a/tests/dialects/test_tsql.py b/tests/dialects/test_tsql.py
index 0619eaa..6b0b39b 100644
--- a/tests/dialects/test_tsql.py
+++ b/tests/dialects/test_tsql.py
@@ -15,6 +15,14 @@ class TestTSQL(Validator):
},
)
+ self.validate_all(
+ "CONVERT(INT, CONVERT(NUMERIC, '444.75'))",
+ write={
+ "mysql": "CAST(CAST('444.75' AS DECIMAL) AS INT)",
+ "tsql": "CAST(CAST('444.75' AS NUMERIC) AS INTEGER)",
+ },
+ )
+
def test_types(self):
self.validate_identity("CAST(x AS XML)")
self.validate_identity("CAST(x AS UNIQUEIDENTIFIER)")
@@ -24,3 +32,13 @@ class TestTSQL(Validator):
self.validate_identity("CAST(x AS IMAGE)")
self.validate_identity("CAST(x AS SQL_VARIANT)")
self.validate_identity("CAST(x AS BIT)")
+ self.validate_all(
+ "CAST(x AS DATETIME2)",
+ read={
+ "": "CAST(x AS DATETIME)",
+ },
+ write={
+ "mysql": "CAST(x AS DATETIME)",
+ "tsql": "CAST(x AS DATETIME2)",
+ },
+ )
diff --git a/tests/fixtures/identity.sql b/tests/fixtures/identity.sql
index 1b4168c..2654be1 100644
--- a/tests/fixtures/identity.sql
+++ b/tests/fixtures/identity.sql
@@ -8,6 +8,7 @@ SUM(CASE WHEN x > 1 THEN 1 ELSE 0 END) / y
1.1E10
1.12e-10
-11.023E7 * 3
+0.2
(1 * 2) / (3 - 5)
((TRUE))
''
@@ -167,7 +168,7 @@ SELECT LEAD(a) OVER (ORDER BY b) AS a
SELECT LEAD(a, 1) OVER (PARTITION BY a ORDER BY a) AS x
SELECT LEAD(a, 1, b) OVER (PARTITION BY a ORDER BY a) AS x
SELECT X((a, b) -> a + b, z -> z) AS x
-SELECT X(a -> "a" + ("z" - 1))
+SELECT X(a -> a + ("z" - 1))
SELECT EXISTS(ARRAY(2, 3), x -> x % 2 = 0)
SELECT test.* FROM test
SELECT a AS b FROM test
@@ -258,15 +259,24 @@ SELECT a FROM test TABLESAMPLE(100)
SELECT a FROM test TABLESAMPLE(100 ROWS)
SELECT a FROM test TABLESAMPLE BERNOULLI (50)
SELECT a FROM test TABLESAMPLE SYSTEM (75)
+SELECT a FROM test PIVOT(SUM(x) FOR y IN ('z', 'q'))
+SELECT a FROM test PIVOT(SOMEAGG(x, y, z) FOR q IN (1))
+SELECT a FROM test PIVOT(SUM(x) FOR y IN ('z', 'q')) PIVOT(MAX(b) FOR c IN ('d'))
+SELECT a FROM (SELECT a, b FROM test) PIVOT(SUM(x) FOR y IN ('z', 'q'))
+SELECT a FROM test UNPIVOT(x FOR y IN (z, q)) AS x
+SELECT a FROM test PIVOT(SUM(x) FOR y IN ('z', 'q')) AS x TABLESAMPLE(0.1)
+SELECT a FROM test PIVOT(SUM(x) FOR y IN ('z', 'q')) UNPIVOT(x FOR y IN (z, q)) AS x
SELECT ABS(a) FROM test
SELECT AVG(a) FROM test
SELECT CEIL(a) FROM test
+SELECT CEIL(a, b) FROM test
SELECT COUNT(a) FROM test
SELECT COUNT(1) FROM test
SELECT COUNT(*) FROM test
SELECT COUNT(DISTINCT a) FROM test
SELECT EXP(a) FROM test
SELECT FLOOR(a) FROM test
+SELECT FLOOR(a, b) FROM test
SELECT FIRST(a) FROM test
SELECT GREATEST(a, b, c) FROM test
SELECT LAST(a) FROM test
@@ -299,6 +309,7 @@ SELECT CAST(a AS MAP<INT, INT>) FROM test
SELECT CAST(a AS TIMESTAMP) FROM test
SELECT CAST(a AS DATE) FROM test
SELECT CAST(a AS ARRAY<INT>) FROM test
+SELECT CAST(a AS VARIANT) FROM test
SELECT TRY_CAST(a AS INT) FROM test
SELECT COALESCE(a, b, c) FROM test
SELECT IFNULL(a, b) FROM test
@@ -442,13 +453,10 @@ CREATE TABLE z (a INT(11) DEFAULT NULL COMMENT '客户id')
CREATE TABLE z (a INT(11) NOT NULL DEFAULT 1)
CREATE TABLE z (a INT(11) NOT NULL COLLATE utf8_bin AUTO_INCREMENT)
CREATE TABLE z (a INT, PRIMARY KEY(a))
-CREATE TABLE z (a INT) ENGINE=InnoDB AUTO_INCREMENT=1 CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x'
-CREATE TABLE z (a INT) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x'
-CREATE TABLE z (a INT DEFAULT NULL, PRIMARY KEY(a)) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x'
CREATE TABLE z WITH (FORMAT='parquet') AS SELECT 1
-CREATE TABLE z WITH (FORMAT='ORC', x = '2') AS SELECT 1
+CREATE TABLE z WITH (FORMAT='ORC', x='2') AS SELECT 1
CREATE TABLE z WITH (TABLE_FORMAT='iceberg', FORMAT='parquet') AS SELECT 1
-CREATE TABLE z WITH (TABLE_FORMAT='iceberg', FORMAT='ORC', x = '2') AS SELECT 1
+CREATE TABLE z WITH (TABLE_FORMAT='iceberg', FORMAT='ORC', x='2') AS SELECT 1
CREATE TABLE z (z INT) WITH (PARTITIONED_BY=(x INT, y INT))
CREATE TABLE z (z INT) WITH (PARTITIONED_BY=(x INT)) AS SELECT 1
CREATE TABLE z AS (WITH cte AS (SELECT 1) SELECT * FROM cte)
@@ -460,6 +468,9 @@ CREATE TEMPORARY FUNCTION f
CREATE TEMPORARY FUNCTION f AS 'g'
CREATE FUNCTION f
CREATE FUNCTION f AS 'g'
+CREATE FUNCTION a(b INT, c VARCHAR) AS 'SELECT 1'
+CREATE FUNCTION a() LANGUAGE sql
+CREATE FUNCTION a() LANGUAGE sql RETURNS INT
CREATE INDEX abc ON t (a)
CREATE INDEX abc ON t (a, b, b)
CREATE UNIQUE INDEX abc ON t (a, b, b)
@@ -519,3 +530,4 @@ WITH a AS ((SELECT b.foo AS foo, b.bar AS bar FROM b) UNION ALL (SELECT c.foo AS
WITH a AS ((SELECT 1 AS b) UNION ALL (SELECT 1 AS b)) SELECT * FROM a
SELECT (WITH x AS (SELECT 1 AS y) SELECT * FROM x) AS z
SELECT ((SELECT 1) + 1)
+SELECT * FROM project.dataset.INFORMATION_SCHEMA.TABLES
diff --git a/tests/fixtures/optimizer/eliminate_subqueries.sql b/tests/fixtures/optimizer/eliminate_subqueries.sql
index aae5f2a..f395c0a 100644
--- a/tests/fixtures/optimizer/eliminate_subqueries.sql
+++ b/tests/fixtures/optimizer/eliminate_subqueries.sql
@@ -1,42 +1,79 @@
-SELECT 1 AS x, 2 AS y
-UNION ALL
-SELECT 1 AS x, 2 AS y;
-WITH _e_0 AS (
- SELECT
- 1 AS x,
- 2 AS y
-)
-SELECT
- *
-FROM _e_0
-UNION ALL
-SELECT
- *
-FROM _e_0;
-
-SELECT x.id
-FROM (
- SELECT *
- FROM x AS x
- JOIN y AS y
- ON x.id = y.id
-) AS x
-JOIN (
- SELECT *
- FROM x AS x
- JOIN y AS y
- ON x.id = y.id
-) AS y
-ON x.id = y.id;
-WITH _e_0 AS (
- SELECT
- *
- FROM x AS x
- JOIN y AS y
- ON x.id = y.id
-)
-SELECT
- x.id
-FROM "_e_0" AS x
-JOIN "_e_0" AS y
- ON x.id = y.id;
+-- No derived tables
+SELECT * FROM x;
+SELECT * FROM x;
+
+-- Unaliased derived tables
+SELECT a FROM (SELECT b FROM (SELECT c FROM x));
+WITH cte AS (SELECT c FROM x), cte_2 AS (SELECT b FROM cte AS cte) SELECT a FROM cte_2 AS cte_2;
+
+-- Joined derived table inside nested derived table
+SELECT b FROM (SELECT b FROM (SELECT b FROM x JOIN (SELECT b FROM y) AS y ON x.b = y.b));
+WITH y_2 AS (SELECT b FROM y), cte AS (SELECT b FROM x JOIN y_2 AS y ON x.b = y.b), cte_2 AS (SELECT b FROM cte AS cte) SELECT b FROM cte_2 AS cte_2;
+
+-- Aliased derived tables
+SELECT a FROM (SELECT b FROM (SELECT c FROM x) AS y) AS z;
+WITH y AS (SELECT c FROM x), z AS (SELECT b FROM y AS y) SELECT a FROM z AS z;
+
+-- Existing CTEs
+WITH q AS (SELECT c FROM x) SELECT a FROM (SELECT b FROM q AS y) AS z;
+WITH q AS (SELECT c FROM x), z AS (SELECT b FROM q AS y) SELECT a FROM z AS z;
+
+-- Derived table inside CTE
+WITH x AS (SELECT a FROM (SELECT a FROM x) AS y) SELECT a FROM x;
+WITH y AS (SELECT a FROM x), x AS (SELECT a FROM y AS y) SELECT a FROM x;
+
+-- Name conflicts with existing outer derived table
+SELECT a FROM (SELECT b FROM (SELECT c FROM x) AS y) AS y;
+WITH y AS (SELECT c FROM x), y_2 AS (SELECT b FROM y AS y) SELECT a FROM y_2 AS y;
+
+-- Name conflicts with outer join
+SELECT a, b FROM (SELECT c FROM (SELECT d FROM x) AS x) AS y JOIN x ON x.a = y.a;
+WITH x_2 AS (SELECT d FROM x), y AS (SELECT c FROM x_2 AS x) SELECT a, b FROM y AS y JOIN x ON x.a = y.a;
+
+-- Name conflicts with table name that is selected in another branch
+SELECT * FROM (SELECT * FROM (SELECT a FROM x) AS x) AS y JOIN (SELECT * FROM x) AS z ON x.a = y.a;
+WITH x_2 AS (SELECT a FROM x), y AS (SELECT * FROM x_2 AS x), z AS (SELECT * FROM x) SELECT * FROM y AS y JOIN z AS z ON x.a = y.a;
+
+-- Name conflicts with table alias
+SELECT a FROM (SELECT a FROM (SELECT a FROM x) AS y) AS z JOIN q AS y;
+WITH y AS (SELECT a FROM x), z AS (SELECT a FROM y AS y) SELECT a FROM z AS z JOIN q AS y;
+
+-- Name conflicts with existing CTE
+WITH y AS (SELECT a FROM (SELECT a FROM x) AS y) SELECT a FROM y;
+WITH y_2 AS (SELECT a FROM x), y AS (SELECT a FROM y_2 AS y) SELECT a FROM y;
+
+-- Union
+SELECT 1 AS x, 2 AS y UNION ALL SELECT 1 AS x, 2 AS y;
+WITH cte AS (SELECT 1 AS x, 2 AS y) SELECT cte.x AS x, cte.y AS y FROM cte AS cte UNION ALL SELECT cte.x AS x, cte.y AS y FROM cte AS cte;
+
+-- Union of selects with derived tables
+(SELECT a FROM (SELECT b FROM x)) UNION (SELECT a FROM (SELECT b FROM y));
+WITH cte AS (SELECT b FROM x), cte_2 AS (SELECT a FROM cte AS cte), cte_3 AS (SELECT b FROM y), cte_4 AS (SELECT a FROM cte_3 AS cte_3) (SELECT cte_2.a AS a FROM cte_2 AS cte_2) UNION (SELECT cte_4.a AS a FROM cte_4 AS cte_4);
+
+-- Subquery
+SELECT a FROM x WHERE b = (SELECT y.c FROM y);
+SELECT a FROM x WHERE b = (SELECT y.c FROM y);
+
+-- Correlated subquery
+SELECT a FROM x WHERE b = (SELECT c FROM y WHERE y.a = x.a);
+SELECT a FROM x WHERE b = (SELECT c FROM y WHERE y.a = x.a);
+
+-- Duplicate CTE
+SELECT a FROM (SELECT b FROM x) AS y JOIN (SELECT b FROM x) AS z;
+WITH y AS (SELECT b FROM x) SELECT a FROM y AS y JOIN y AS z;
+
+-- Doubly duplicate CTE
+SELECT * FROM (SELECT * FROM x JOIN (SELECT * FROM x) AS y) AS z JOIN (SELECT * FROM x JOIN (SELECT * FROM x) AS y) AS q;
+WITH y AS (SELECT * FROM x), z AS (SELECT * FROM x JOIN y AS y) SELECT * FROM z AS z JOIN z AS q;
+
+-- Another duplicate...
+SELECT x.id FROM (SELECT * FROM x AS x JOIN y AS y ON x.id = y.id) AS x JOIN (SELECT * FROM x AS x JOIN y AS y ON x.id = y.id) AS y ON x.id = y.id;
+WITH x_2 AS (SELECT * FROM x AS x JOIN y AS y ON x.id = y.id) SELECT x.id FROM x_2 AS x JOIN x_2 AS y ON x.id = y.id;
+
+-- Root subquery
+(SELECT * FROM (SELECT * FROM x)) LIMIT 1;
+(WITH cte AS (SELECT * FROM x) SELECT * FROM cte AS cte) LIMIT 1;
+
+-- Existing duplicate CTE
+WITH y AS (SELECT a FROM x) SELECT a FROM (SELECT a FROM x) AS y JOIN y AS z;
+WITH y AS (SELECT a FROM x) SELECT a FROM y AS y JOIN y AS z;
diff --git a/tests/fixtures/optimizer/merge_derived_tables.sql b/tests/fixtures/optimizer/merge_subqueries.sql
index c5aa7e9..35aed3b 100644
--- a/tests/fixtures/optimizer/merge_derived_tables.sql
+++ b/tests/fixtures/optimizer/merge_subqueries.sql
@@ -18,6 +18,14 @@ SELECT x.a AS a, SUM(x.b) AS "_col_1" FROM x AS x WHERE x.a > 1 GROUP BY x.a;
SELECT a, c FROM (SELECT a, b FROM x WHERE a > 1) AS x JOIN y ON x.b = y.b;
SELECT x.a AS a, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b WHERE x.a > 1;
+-- Outer query has join
+SELECT a, c FROM (SELECT a, b FROM x WHERE a > 1) AS x JOIN y ON x.b = y.b;
+SELECT x.a AS a, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b WHERE x.a > 1;
+
+# leave_tables_isolated: true
+SELECT a, c FROM (SELECT a, b FROM x WHERE a > 1) AS x JOIN y ON x.b = y.b;
+SELECT x.a AS a, y.c AS c FROM (SELECT x.a AS a, x.b AS b FROM x AS x WHERE x.a > 1) AS x JOIN y AS y ON x.b = y.b;
+
-- Join on derived table
SELECT a, c FROM x JOIN (SELECT b, c FROM y) AS y ON x.b = y.b;
SELECT x.a AS a, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b;
@@ -42,13 +50,9 @@ SELECT q_2.a AS a, q.c AS c, r.c AS c FROM x AS q_2 JOIN y AS r_2 ON q_2.b = r_2
SELECT r.b FROM (SELECT b FROM x AS x) AS q JOIN (SELECT b FROM x) AS r ON q.b = r.b;
SELECT x_2.b AS b FROM x AS x JOIN x AS x_2 ON x.b = x_2.b;
--- WHERE clause in joined derived table is merged
+-- WHERE clause in joined derived table is merged to ON clause
SELECT x.a, y.c FROM x JOIN (SELECT b, c FROM y WHERE c > 1) AS y;
-SELECT x.a AS a, y.c AS c FROM x AS x JOIN y AS y WHERE y.c > 1;
-
--- WHERE clause in outer joined derived table is merged to ON clause
-SELECT x.a, y.c FROM x LEFT JOIN (SELECT b, c FROM y WHERE c > 1) AS y;
-SELECT x.a AS a, y.c AS c FROM x AS x LEFT JOIN y AS y ON y.c > 1;
+SELECT x.a AS a, y.c AS c FROM x AS x JOIN y AS y ON y.c > 1;
-- Comma JOIN in outer query
SELECT x.a, y.c FROM (SELECT a FROM x) AS x, (SELECT c FROM y) AS y;
@@ -61,3 +65,35 @@ SELECT x.a AS a, z.c AS c FROM x AS x CROSS JOIN y AS z;
-- (Regression) Column in ORDER BY
SELECT * FROM (SELECT * FROM (SELECT * FROM x)) ORDER BY a LIMIT 1;
SELECT x.a AS a, x.b AS b FROM x AS x ORDER BY x.a LIMIT 1;
+
+-- CTE
+WITH x AS (SELECT a, b FROM x) SELECT a, b FROM x;
+SELECT x.a AS a, x.b AS b FROM x AS x;
+
+-- CTE with outer table alias
+WITH y AS (SELECT a, b FROM x) SELECT a, b FROM y AS z;
+SELECT x.a AS a, x.b AS b FROM x AS x;
+
+-- Nested CTE
+WITH x AS (SELECT a FROM x), x2 AS (SELECT a FROM x) SELECT a FROM x2;
+SELECT x.a AS a FROM x AS x;
+
+-- CTE WHERE clause is merged
+WITH x AS (SELECT a, b FROM x WHERE a > 1) SELECT a, SUM(b) FROM x GROUP BY a;
+SELECT x.a AS a, SUM(x.b) AS "_col_1" FROM x AS x WHERE x.a > 1 GROUP BY x.a;
+
+-- CTE Outer query has join
+WITH x AS (SELECT a, b FROM x WHERE a > 1) SELECT a, c FROM x AS x JOIN y ON x.b = y.b;
+SELECT x.a AS a, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b WHERE x.a > 1;
+
+-- CTE with inner table alias
+WITH y AS (SELECT a, b FROM x AS q) SELECT a, b FROM y AS z;
+SELECT q.a AS a, q.b AS b FROM x AS q;
+
+-- Duplicate queries to CTE
+WITH x AS (SELECT a, b FROM x) SELECT x.a, y.b FROM x JOIN x AS y;
+WITH x AS (SELECT x.a AS a, x.b AS b FROM x AS x) SELECT x.a AS a, y.b AS b FROM x JOIN x AS y;
+
+-- Nested CTE
+SELECT * FROM (WITH x AS (SELECT a, b FROM x) SELECT a, b FROM x);
+SELECT x.a AS a, x.b AS b FROM x AS x;
diff --git a/tests/fixtures/optimizer/optimizer.sql b/tests/fixtures/optimizer/optimizer.sql
index f1d0f7d..0bb742b 100644
--- a/tests/fixtures/optimizer/optimizer.sql
+++ b/tests/fixtures/optimizer/optimizer.sql
@@ -65,18 +65,14 @@ WITH "cte1" AS (
SELECT
"x"."a" AS "a"
FROM "x" AS "x"
-), "cte2" AS (
- SELECT
- "cte1"."a" + 1 AS "a"
- FROM "cte1"
)
SELECT
"cte1"."a" AS "a"
FROM "cte1"
UNION ALL
SELECT
- "cte2"."a" AS "a"
-FROM "cte2";
+ "cte1"."a" + 1 AS "a"
+FROM "cte1";
SELECT a, SUM(b)
FROM (
@@ -86,18 +82,19 @@ FROM (
) d
WHERE (TRUE AND TRUE OR 'a' = 'b') AND a > 1
GROUP BY a;
-SELECT
- "x"."a" AS "a",
- SUM("y"."b") AS "_col_1"
-FROM "x" AS "x"
-LEFT JOIN (
+WITH "_u_0" AS (
SELECT
MAX("y"."b") AS "_col_0",
"y"."a" AS "_u_1"
FROM "y" AS "y"
GROUP BY
"y"."a"
-) AS "_u_0"
+)
+SELECT
+ "x"."a" AS "a",
+ SUM("y"."b") AS "_col_1"
+FROM "x" AS "x"
+LEFT JOIN "_u_0" AS "_u_0"
ON "x"."a" = "_u_0"."_u_1"
JOIN "y" AS "y"
ON "x"."a" = "y"."a"
@@ -127,3 +124,16 @@ LIMIT 1;
FROM "y" AS "y"
)
LIMIT 1;
+
+# dialect: spark
+SELECT /*+ BROADCAST(y) */ x.b FROM x JOIN y ON x.b = y.b;
+SELECT /*+ BROADCAST(`y`) */
+ `x`.`b` AS `b`
+FROM `x` AS `x`
+JOIN `y` AS `y`
+ ON `x`.`b` = `y`.`b`;
+
+SELECT AGGREGATE(ARRAY(x.a, x.b), 0, (x, acc) -> x + acc + a) AS sum_agg FROM x;
+SELECT
+ AGGREGATE(ARRAY("x"."a", "x"."b"), 0, ("x", "acc") -> "x" + "acc" + "x"."a") AS "sum_agg"
+FROM "x" AS "x";
diff --git a/tests/fixtures/optimizer/qualify_columns.sql b/tests/fixtures/optimizer/qualify_columns.sql
index 004c57c..f848e7a 100644
--- a/tests/fixtures/optimizer/qualify_columns.sql
+++ b/tests/fixtures/optimizer/qualify_columns.sql
@@ -69,6 +69,9 @@ SELECT ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.b) AS row_num FROM x AS x
SELECT x.b, x.a FROM x LEFT JOIN y ON x.b = y.b QUALIFY ROW_NUMBER() OVER(PARTITION BY x.b ORDER BY x.a DESC) = 1;
SELECT x.b AS b, x.a AS a FROM x AS x LEFT JOIN y AS y ON x.b = y.b QUALIFY ROW_NUMBER() OVER (PARTITION BY x.b ORDER BY x.a DESC) = 1;
+SELECT AGGREGATE(ARRAY(a, x.b), 0, (x, acc) -> x + acc + a) AS sum_agg FROM x;
+SELECT AGGREGATE(ARRAY(x.a, x.b), 0, (x, acc) -> x + acc + x.a) AS sum_agg FROM x AS x;
+
--------------------------------------
-- Derived tables
--------------------------------------
@@ -231,3 +234,10 @@ SELECT COALESCE(x.b, y.b) AS b FROM x AS x JOIN y AS y ON x.b = y.b WHERE COALES
SELECT b FROM x JOIN y USING (b) JOIN z USING (b);
SELECT COALESCE(x.b, y.b, z.b) AS b FROM x AS x JOIN y AS y ON x.b = y.b JOIN z AS z ON x.b = z.b;
+
+--------------------------------------
+-- Hint with table reference
+--------------------------------------
+# dialect: spark
+SELECT /*+ BROADCAST(y) */ x.b FROM x JOIN y ON x.b = y.b;
+SELECT /*+ BROADCAST(y) */ x.b AS b FROM x AS x JOIN y AS y ON x.b = y.b;
diff --git a/tests/fixtures/optimizer/qualify_columns__invalid.sql b/tests/fixtures/optimizer/qualify_columns__invalid.sql
index 056b0e9..1104b6e 100644
--- a/tests/fixtures/optimizer/qualify_columns__invalid.sql
+++ b/tests/fixtures/optimizer/qualify_columns__invalid.sql
@@ -5,7 +5,6 @@ SELECT z.* FROM x;
SELECT x FROM x;
INSERT INTO x VALUES (1, 2);
SELECT a FROM x AS z JOIN y AS z;
-WITH z AS (SELECT * FROM x) SELECT * FROM x AS z;
SELECT a FROM x JOIN (SELECT b FROM y WHERE y.b = x.c);
SELECT a FROM x AS y JOIN (SELECT a FROM y) AS q ON y.a = q.a;
SELECT q.a FROM (SELECT x.b FROM x) AS z JOIN (SELECT a FROM z) AS q ON z.b = q.a;
diff --git a/tests/fixtures/optimizer/tpc-h/tpc-h.sql b/tests/fixtures/optimizer/tpc-h/tpc-h.sql
index 0b6d382..d2f10fc 100644
--- a/tests/fixtures/optimizer/tpc-h/tpc-h.sql
+++ b/tests/fixtures/optimizer/tpc-h/tpc-h.sql
@@ -97,19 +97,32 @@ order by
p_partkey
limit
100;
-WITH "_e_0" AS (
+WITH "partsupp_2" AS (
SELECT
"partsupp"."ps_partkey" AS "ps_partkey",
"partsupp"."ps_suppkey" AS "ps_suppkey",
"partsupp"."ps_supplycost" AS "ps_supplycost"
FROM "partsupp" AS "partsupp"
-), "_e_1" AS (
+), "region_2" AS (
SELECT
"region"."r_regionkey" AS "r_regionkey",
"region"."r_name" AS "r_name"
FROM "region" AS "region"
WHERE
"region"."r_name" = 'EUROPE'
+), "_u_0" AS (
+ SELECT
+ MIN("partsupp"."ps_supplycost") AS "_col_0",
+ "partsupp"."ps_partkey" AS "_u_1"
+ FROM "partsupp_2" AS "partsupp"
+ CROSS JOIN "region_2" AS "region"
+ JOIN "nation" AS "nation"
+ ON "nation"."n_regionkey" = "region"."r_regionkey"
+ JOIN "supplier" AS "supplier"
+ ON "supplier"."s_nationkey" = "nation"."n_nationkey"
+ AND "supplier"."s_suppkey" = "partsupp"."ps_suppkey"
+ GROUP BY
+ "partsupp"."ps_partkey"
)
SELECT
"supplier"."s_acctbal" AS "s_acctbal",
@@ -121,25 +134,12 @@ SELECT
"supplier"."s_phone" AS "s_phone",
"supplier"."s_comment" AS "s_comment"
FROM "part" AS "part"
-LEFT JOIN (
- SELECT
- MIN("partsupp"."ps_supplycost") AS "_col_0",
- "partsupp"."ps_partkey" AS "_u_1"
- FROM "_e_0" AS "partsupp"
- CROSS JOIN "_e_1" AS "region"
- JOIN "nation" AS "nation"
- ON "nation"."n_regionkey" = "region"."r_regionkey"
- JOIN "supplier" AS "supplier"
- ON "supplier"."s_nationkey" = "nation"."n_nationkey"
- AND "supplier"."s_suppkey" = "partsupp"."ps_suppkey"
- GROUP BY
- "partsupp"."ps_partkey"
-) AS "_u_0"
+LEFT JOIN "_u_0" AS "_u_0"
ON "part"."p_partkey" = "_u_0"."_u_1"
-CROSS JOIN "_e_1" AS "region"
+CROSS JOIN "region_2" AS "region"
JOIN "nation" AS "nation"
ON "nation"."n_regionkey" = "region"."r_regionkey"
-JOIN "_e_0" AS "partsupp"
+JOIN "partsupp_2" AS "partsupp"
ON "part"."p_partkey" = "partsupp"."ps_partkey"
JOIN "supplier" AS "supplier"
ON "supplier"."s_nationkey" = "nation"."n_nationkey"
@@ -193,12 +193,12 @@ SELECT
FROM "customer" AS "customer"
JOIN "orders" AS "orders"
ON "customer"."c_custkey" = "orders"."o_custkey"
+ AND "orders"."o_orderdate" < '1995-03-15'
JOIN "lineitem" AS "lineitem"
ON "lineitem"."l_orderkey" = "orders"."o_orderkey"
+ AND "lineitem"."l_shipdate" > '1995-03-15'
WHERE
"customer"."c_mktsegment" = 'BUILDING'
- AND "lineitem"."l_shipdate" > '1995-03-15'
- AND "orders"."o_orderdate" < '1995-03-15'
GROUP BY
"lineitem"."l_orderkey",
"orders"."o_orderdate",
@@ -232,11 +232,7 @@ group by
o_orderpriority
order by
o_orderpriority;
-SELECT
- "orders"."o_orderpriority" AS "o_orderpriority",
- COUNT(*) AS "order_count"
-FROM "orders" AS "orders"
-LEFT JOIN (
+WITH "_u_0" AS (
SELECT
"lineitem"."l_orderkey" AS "l_orderkey"
FROM "lineitem" AS "lineitem"
@@ -244,7 +240,12 @@ LEFT JOIN (
"lineitem"."l_commitdate" < "lineitem"."l_receiptdate"
GROUP BY
"lineitem"."l_orderkey"
-) AS "_u_0"
+)
+SELECT
+ "orders"."o_orderpriority" AS "o_orderpriority",
+ COUNT(*) AS "order_count"
+FROM "orders" AS "orders"
+LEFT JOIN "_u_0" AS "_u_0"
ON "_u_0"."l_orderkey" = "orders"."o_orderkey"
WHERE
"orders"."o_orderdate" < CAST('1993-10-01' AS DATE)
@@ -290,7 +291,10 @@ SELECT
FROM "customer" AS "customer"
JOIN "orders" AS "orders"
ON "customer"."c_custkey" = "orders"."o_custkey"
-CROSS JOIN "region" AS "region"
+ AND "orders"."o_orderdate" < CAST('1995-01-01' AS DATE)
+ AND "orders"."o_orderdate" >= CAST('1994-01-01' AS DATE)
+JOIN "region" AS "region"
+ ON "region"."r_name" = 'ASIA'
JOIN "nation" AS "nation"
ON "nation"."n_regionkey" = "region"."r_regionkey"
JOIN "supplier" AS "supplier"
@@ -299,10 +303,6 @@ JOIN "supplier" AS "supplier"
JOIN "lineitem" AS "lineitem"
ON "lineitem"."l_orderkey" = "orders"."o_orderkey"
AND "lineitem"."l_suppkey" = "supplier"."s_suppkey"
-WHERE
- "orders"."o_orderdate" < CAST('1995-01-01' AS DATE)
- AND "orders"."o_orderdate" >= CAST('1994-01-01' AS DATE)
- AND "region"."r_name" = 'ASIA'
GROUP BY
"nation"."n_name"
ORDER BY
@@ -371,7 +371,7 @@ order by
supp_nation,
cust_nation,
l_year;
-WITH "_e_0" AS (
+WITH "n1" AS (
SELECT
"nation"."n_nationkey" AS "n_nationkey",
"nation"."n_name" AS "n_name"
@@ -389,14 +389,15 @@ SELECT
)) AS "revenue"
FROM "supplier" AS "supplier"
JOIN "lineitem" AS "lineitem"
- ON "supplier"."s_suppkey" = "lineitem"."l_suppkey"
+ ON "lineitem"."l_shipdate" BETWEEN CAST('1995-01-01' AS DATE) AND CAST('1996-12-31' AS DATE)
+ AND "supplier"."s_suppkey" = "lineitem"."l_suppkey"
JOIN "orders" AS "orders"
ON "orders"."o_orderkey" = "lineitem"."l_orderkey"
JOIN "customer" AS "customer"
ON "customer"."c_custkey" = "orders"."o_custkey"
-JOIN "_e_0" AS "n1"
+JOIN "n1" AS "n1"
ON "supplier"."s_nationkey" = "n1"."n_nationkey"
-JOIN "_e_0" AS "n2"
+JOIN "n1" AS "n2"
ON "customer"."c_nationkey" = "n2"."n_nationkey"
AND (
"n1"."n_name" = 'FRANCE'
@@ -406,8 +407,6 @@ JOIN "_e_0" AS "n2"
"n1"."n_name" = 'GERMANY'
OR "n2"."n_name" = 'GERMANY'
)
-WHERE
- "lineitem"."l_shipdate" BETWEEN CAST('1995-01-01' AS DATE) AND CAST('1996-12-31' AS DATE)
GROUP BY
"n1"."n_name",
"n2"."n_name",
@@ -469,13 +468,15 @@ SELECT
1 - "lineitem"."l_discount"
)) AS "mkt_share"
FROM "part" AS "part"
-CROSS JOIN "region" AS "region"
+JOIN "region" AS "region"
+ ON "region"."r_name" = 'AMERICA'
JOIN "nation" AS "nation"
ON "nation"."n_regionkey" = "region"."r_regionkey"
JOIN "customer" AS "customer"
ON "customer"."c_nationkey" = "nation"."n_nationkey"
JOIN "orders" AS "orders"
ON "orders"."o_custkey" = "customer"."c_custkey"
+ AND "orders"."o_orderdate" BETWEEN CAST('1995-01-01' AS DATE) AND CAST('1996-12-31' AS DATE)
JOIN "lineitem" AS "lineitem"
ON "lineitem"."l_orderkey" = "orders"."o_orderkey"
AND "part"."p_partkey" = "lineitem"."l_partkey"
@@ -484,9 +485,7 @@ JOIN "supplier" AS "supplier"
JOIN "nation" AS "nation_2"
ON "supplier"."s_nationkey" = "nation_2"."n_nationkey"
WHERE
- "orders"."o_orderdate" BETWEEN CAST('1995-01-01' AS DATE) AND CAST('1996-12-31' AS DATE)
- AND "part"."p_type" = 'ECONOMY ANODIZED STEEL'
- AND "region"."r_name" = 'AMERICA'
+ "part"."p_type" = 'ECONOMY ANODIZED STEEL'
GROUP BY
EXTRACT(year FROM "orders"."o_orderdate")
ORDER BY
@@ -604,14 +603,13 @@ SELECT
FROM "customer" AS "customer"
JOIN "orders" AS "orders"
ON "customer"."c_custkey" = "orders"."o_custkey"
+ AND "orders"."o_orderdate" < CAST('1994-01-01' AS DATE)
+ AND "orders"."o_orderdate" >= CAST('1993-10-01' AS DATE)
JOIN "lineitem" AS "lineitem"
ON "lineitem"."l_orderkey" = "orders"."o_orderkey"
+ AND "lineitem"."l_returnflag" = 'R'
JOIN "nation" AS "nation"
ON "customer"."c_nationkey" = "nation"."n_nationkey"
-WHERE
- "lineitem"."l_returnflag" = 'R'
- AND "orders"."o_orderdate" < CAST('1994-01-01' AS DATE)
- AND "orders"."o_orderdate" >= CAST('1993-10-01' AS DATE)
GROUP BY
"customer"."c_custkey",
"customer"."c_name",
@@ -654,12 +652,12 @@ group by
)
order by
value desc;
-WITH "_e_0" AS (
+WITH "supplier_2" AS (
SELECT
"supplier"."s_suppkey" AS "s_suppkey",
"supplier"."s_nationkey" AS "s_nationkey"
FROM "supplier" AS "supplier"
-), "_e_1" AS (
+), "nation_2" AS (
SELECT
"nation"."n_nationkey" AS "n_nationkey",
"nation"."n_name" AS "n_name"
@@ -671,9 +669,9 @@ SELECT
"partsupp"."ps_partkey" AS "ps_partkey",
SUM("partsupp"."ps_supplycost" * "partsupp"."ps_availqty") AS "value"
FROM "partsupp" AS "partsupp"
-JOIN "_e_0" AS "supplier"
+JOIN "supplier_2" AS "supplier"
ON "partsupp"."ps_suppkey" = "supplier"."s_suppkey"
-JOIN "_e_1" AS "nation"
+JOIN "nation_2" AS "nation"
ON "supplier"."s_nationkey" = "nation"."n_nationkey"
GROUP BY
"partsupp"."ps_partkey"
@@ -682,9 +680,9 @@ HAVING
SELECT
SUM("partsupp"."ps_supplycost" * "partsupp"."ps_availqty") * 0.0001 AS "_col_0"
FROM "partsupp" AS "partsupp"
- JOIN "_e_0" AS "supplier"
+ JOIN "supplier_2" AS "supplier"
ON "partsupp"."ps_suppkey" = "supplier"."s_suppkey"
- JOIN "_e_1" AS "nation"
+ JOIN "nation_2" AS "nation"
ON "supplier"."s_nationkey" = "nation"."n_nationkey"
)
ORDER BY
@@ -737,13 +735,12 @@ SELECT
END) AS "low_line_count"
FROM "orders" AS "orders"
JOIN "lineitem" AS "lineitem"
- ON "orders"."o_orderkey" = "lineitem"."l_orderkey"
-WHERE
- "lineitem"."l_commitdate" < "lineitem"."l_receiptdate"
+ ON "lineitem"."l_commitdate" < "lineitem"."l_receiptdate"
AND "lineitem"."l_receiptdate" < CAST('1995-01-01' AS DATE)
AND "lineitem"."l_receiptdate" >= CAST('1994-01-01' AS DATE)
AND "lineitem"."l_shipdate" < "lineitem"."l_commitdate"
AND "lineitem"."l_shipmode" IN ('MAIL', 'SHIP')
+ AND "orders"."o_orderkey" = "lineitem"."l_orderkey"
GROUP BY
"lineitem"."l_shipmode"
ORDER BY
@@ -772,10 +769,7 @@ group by
order by
custdist desc,
c_count desc;
-SELECT
- "c_orders"."c_count" AS "c_count",
- COUNT(*) AS "custdist"
-FROM (
+WITH "c_orders" AS (
SELECT
COUNT("orders"."o_orderkey") AS "c_count"
FROM "customer" AS "customer"
@@ -784,7 +778,11 @@ FROM (
AND NOT "orders"."o_comment" LIKE '%special%requests%'
GROUP BY
"customer"."c_custkey"
-) AS "c_orders"
+)
+SELECT
+ "c_orders"."c_count" AS "c_count",
+ COUNT(*) AS "custdist"
+FROM "c_orders" AS "c_orders"
GROUP BY
"c_orders"."c_count"
ORDER BY
@@ -920,13 +918,7 @@ order by
p_brand,
p_type,
p_size;
-SELECT
- "part"."p_brand" AS "p_brand",
- "part"."p_type" AS "p_type",
- "part"."p_size" AS "p_size",
- COUNT(DISTINCT "partsupp"."ps_suppkey") AS "supplier_cnt"
-FROM "partsupp" AS "partsupp"
-LEFT JOIN (
+WITH "_u_0" AS (
SELECT
"supplier"."s_suppkey" AS "s_suppkey"
FROM "supplier" AS "supplier"
@@ -934,15 +926,22 @@ LEFT JOIN (
"supplier"."s_comment" LIKE '%Customer%Complaints%'
GROUP BY
"supplier"."s_suppkey"
-) AS "_u_0"
+)
+SELECT
+ "part"."p_brand" AS "p_brand",
+ "part"."p_type" AS "p_type",
+ "part"."p_size" AS "p_size",
+ COUNT(DISTINCT "partsupp"."ps_suppkey") AS "supplier_cnt"
+FROM "partsupp" AS "partsupp"
+LEFT JOIN "_u_0" AS "_u_0"
ON "partsupp"."ps_suppkey" = "_u_0"."s_suppkey"
JOIN "part" AS "part"
- ON "part"."p_partkey" = "partsupp"."ps_partkey"
-WHERE
- "_u_0"."s_suppkey" IS NULL
- AND "part"."p_brand" <> 'Brand#45'
+ ON "part"."p_brand" <> 'Brand#45'
+ AND "part"."p_partkey" = "partsupp"."ps_partkey"
AND "part"."p_size" IN (49, 14, 23, 45, 19, 3, 36, 9)
AND NOT "part"."p_type" LIKE 'MEDIUM POLISHED%'
+WHERE
+ "_u_0"."s_suppkey" IS NULL
GROUP BY
"part"."p_brand",
"part"."p_type",
@@ -973,24 +972,25 @@ where
where
l_partkey = p_partkey
);
-SELECT
- SUM("lineitem"."l_extendedprice") / 7.0 AS "avg_yearly"
-FROM "lineitem" AS "lineitem"
-JOIN "part" AS "part"
- ON "part"."p_partkey" = "lineitem"."l_partkey"
-LEFT JOIN (
+WITH "_u_0" AS (
SELECT
0.2 * AVG("lineitem"."l_quantity") AS "_col_0",
"lineitem"."l_partkey" AS "_u_1"
FROM "lineitem" AS "lineitem"
GROUP BY
"lineitem"."l_partkey"
-) AS "_u_0"
+)
+SELECT
+ SUM("lineitem"."l_extendedprice") / 7.0 AS "avg_yearly"
+FROM "lineitem" AS "lineitem"
+JOIN "part" AS "part"
+ ON "part"."p_brand" = 'Brand#23'
+ AND "part"."p_container" = 'MED BOX'
+ AND "part"."p_partkey" = "lineitem"."l_partkey"
+LEFT JOIN "_u_0" AS "_u_0"
ON "_u_0"."_u_1" = "part"."p_partkey"
WHERE
"lineitem"."l_quantity" < "_u_0"."_col_0"
- AND "part"."p_brand" = 'Brand#23'
- AND "part"."p_container" = 'MED BOX'
AND NOT "_u_0"."_u_1" IS NULL;
--------------------------------------
@@ -1030,6 +1030,16 @@ order by
o_orderdate
limit
100;
+WITH "_u_0" AS (
+ SELECT
+ "lineitem"."l_orderkey" AS "l_orderkey"
+ FROM "lineitem" AS "lineitem"
+ GROUP BY
+ "lineitem"."l_orderkey",
+ "lineitem"."l_orderkey"
+ HAVING
+ SUM("lineitem"."l_quantity") > 300
+)
SELECT
"customer"."c_name" AS "c_name",
"customer"."c_custkey" AS "c_custkey",
@@ -1040,16 +1050,7 @@ SELECT
FROM "customer" AS "customer"
JOIN "orders" AS "orders"
ON "customer"."c_custkey" = "orders"."o_custkey"
-LEFT JOIN (
- SELECT
- "lineitem"."l_orderkey" AS "l_orderkey"
- FROM "lineitem" AS "lineitem"
- GROUP BY
- "lineitem"."l_orderkey",
- "lineitem"."l_orderkey"
- HAVING
- SUM("lineitem"."l_quantity") > 300
-) AS "_u_0"
+LEFT JOIN "_u_0" AS "_u_0"
ON "orders"."o_orderkey" = "_u_0"."l_orderkey"
JOIN "lineitem" AS "lineitem"
ON "orders"."o_orderkey" = "lineitem"."l_orderkey"
@@ -1200,38 +1201,34 @@ where
and n_name = 'CANADA'
order by
s_name;
-SELECT
- "supplier"."s_name" AS "s_name",
- "supplier"."s_address" AS "s_address"
-FROM "supplier" AS "supplier"
-LEFT JOIN (
+WITH "_u_0" AS (
+ SELECT
+ 0.5 * SUM("lineitem"."l_quantity") AS "_col_0",
+ "lineitem"."l_partkey" AS "_u_1",
+ "lineitem"."l_suppkey" AS "_u_2"
+ FROM "lineitem" AS "lineitem"
+ WHERE
+ "lineitem"."l_shipdate" < CAST('1995-01-01' AS DATE)
+ AND "lineitem"."l_shipdate" >= CAST('1994-01-01' AS DATE)
+ GROUP BY
+ "lineitem"."l_partkey",
+ "lineitem"."l_suppkey"
+), "_u_3" AS (
+ SELECT
+ "part"."p_partkey" AS "p_partkey"
+ FROM "part" AS "part"
+ WHERE
+ "part"."p_name" LIKE 'forest%'
+ GROUP BY
+ "part"."p_partkey"
+), "_u_4" AS (
SELECT
"partsupp"."ps_suppkey" AS "ps_suppkey"
FROM "partsupp" AS "partsupp"
- LEFT JOIN (
- SELECT
- 0.5 * SUM("lineitem"."l_quantity") AS "_col_0",
- "lineitem"."l_partkey" AS "_u_1",
- "lineitem"."l_suppkey" AS "_u_2"
- FROM "lineitem" AS "lineitem"
- WHERE
- "lineitem"."l_shipdate" < CAST('1995-01-01' AS DATE)
- AND "lineitem"."l_shipdate" >= CAST('1994-01-01' AS DATE)
- GROUP BY
- "lineitem"."l_partkey",
- "lineitem"."l_suppkey"
- ) AS "_u_0"
+ LEFT JOIN "_u_0" AS "_u_0"
ON "_u_0"."_u_1" = "partsupp"."ps_partkey"
AND "_u_0"."_u_2" = "partsupp"."ps_suppkey"
- LEFT JOIN (
- SELECT
- "part"."p_partkey" AS "p_partkey"
- FROM "part" AS "part"
- WHERE
- "part"."p_name" LIKE 'forest%'
- GROUP BY
- "part"."p_partkey"
- ) AS "_u_3"
+ LEFT JOIN "_u_3" AS "_u_3"
ON "partsupp"."ps_partkey" = "_u_3"."p_partkey"
WHERE
"partsupp"."ps_availqty" > "_u_0"."_col_0"
@@ -1240,13 +1237,18 @@ LEFT JOIN (
AND NOT "_u_3"."p_partkey" IS NULL
GROUP BY
"partsupp"."ps_suppkey"
-) AS "_u_4"
+)
+SELECT
+ "supplier"."s_name" AS "s_name",
+ "supplier"."s_address" AS "s_address"
+FROM "supplier" AS "supplier"
+LEFT JOIN "_u_4" AS "_u_4"
ON "supplier"."s_suppkey" = "_u_4"."ps_suppkey"
JOIN "nation" AS "nation"
- ON "supplier"."s_nationkey" = "nation"."n_nationkey"
+ ON "nation"."n_name" = 'CANADA'
+ AND "supplier"."s_nationkey" = "nation"."n_nationkey"
WHERE
- "nation"."n_name" = 'CANADA'
- AND NOT "_u_4"."ps_suppkey" IS NULL
+ NOT "_u_4"."ps_suppkey" IS NULL
ORDER BY
"s_name";
@@ -1294,22 +1296,14 @@ order by
s_name
limit
100;
-SELECT
- "supplier"."s_name" AS "s_name",
- COUNT(*) AS "numwait"
-FROM "supplier" AS "supplier"
-JOIN "lineitem" AS "lineitem"
- ON "supplier"."s_suppkey" = "lineitem"."l_suppkey"
-LEFT JOIN (
+WITH "_u_0" AS (
SELECT
"l2"."l_orderkey" AS "l_orderkey",
ARRAY_AGG("l2"."l_suppkey") AS "_u_1"
FROM "lineitem" AS "l2"
GROUP BY
"l2"."l_orderkey"
-) AS "_u_0"
- ON "_u_0"."l_orderkey" = "lineitem"."l_orderkey"
-LEFT JOIN (
+), "_u_2" AS (
SELECT
"l3"."l_orderkey" AS "l_orderkey",
ARRAY_AGG("l3"."l_suppkey") AS "_u_3"
@@ -1318,20 +1312,29 @@ LEFT JOIN (
"l3"."l_receiptdate" > "l3"."l_commitdate"
GROUP BY
"l3"."l_orderkey"
-) AS "_u_2"
+)
+SELECT
+ "supplier"."s_name" AS "s_name",
+ COUNT(*) AS "numwait"
+FROM "supplier" AS "supplier"
+JOIN "lineitem" AS "lineitem"
+ ON "lineitem"."l_receiptdate" > "lineitem"."l_commitdate"
+ AND "supplier"."s_suppkey" = "lineitem"."l_suppkey"
+LEFT JOIN "_u_0" AS "_u_0"
+ ON "_u_0"."l_orderkey" = "lineitem"."l_orderkey"
+LEFT JOIN "_u_2" AS "_u_2"
ON "_u_2"."l_orderkey" = "lineitem"."l_orderkey"
JOIN "orders" AS "orders"
ON "orders"."o_orderkey" = "lineitem"."l_orderkey"
+ AND "orders"."o_orderstatus" = 'F'
JOIN "nation" AS "nation"
- ON "supplier"."s_nationkey" = "nation"."n_nationkey"
+ ON "nation"."n_name" = 'SAUDI ARABIA'
+ AND "supplier"."s_nationkey" = "nation"."n_nationkey"
WHERE
(
"_u_2"."l_orderkey" IS NULL
OR NOT ARRAY_ANY("_u_2"."_u_3", "_x" -> "_x" <> "lineitem"."l_suppkey")
)
- AND "lineitem"."l_receiptdate" > "lineitem"."l_commitdate"
- AND "nation"."n_name" = 'SAUDI ARABIA'
- AND "orders"."o_orderstatus" = 'F'
AND ARRAY_ANY("_u_0"."_u_1", "_x" -> "_x" <> "lineitem"."l_suppkey")
AND NOT "_u_0"."l_orderkey" IS NULL
GROUP BY
@@ -1381,18 +1384,19 @@ group by
cntrycode
order by
cntrycode;
-SELECT
- SUBSTRING("customer"."c_phone", 1, 2) AS "cntrycode",
- COUNT(*) AS "numcust",
- SUM("customer"."c_acctbal") AS "totacctbal"
-FROM "customer" AS "customer"
-LEFT JOIN (
+WITH "_u_0" AS (
SELECT
"orders"."o_custkey" AS "_u_1"
FROM "orders" AS "orders"
GROUP BY
"orders"."o_custkey"
-) AS "_u_0"
+)
+SELECT
+ SUBSTRING("customer"."c_phone", 1, 2) AS "cntrycode",
+ COUNT(*) AS "numcust",
+ SUM("customer"."c_acctbal") AS "totacctbal"
+FROM "customer" AS "customer"
+LEFT JOIN "_u_0" AS "_u_0"
ON "_u_0"."_u_1" = "customer"."c_custkey"
WHERE
"_u_0"."_u_1" IS NULL
diff --git a/tests/fixtures/pretty.sql b/tests/fixtures/pretty.sql
index 5ed74f4..19a7451 100644
--- a/tests/fixtures/pretty.sql
+++ b/tests/fixtures/pretty.sql
@@ -264,22 +264,3 @@ CREATE TABLE "t_customer_account" (
"account_no" VARCHAR(100)
);
-CREATE TABLE "t_customer_account" (
- "id" int(11) NOT NULL AUTO_INCREMENT,
- "customer_id" int(11) DEFAULT NULL COMMENT '客户id',
- "bank" varchar(100) COLLATE utf8_bin DEFAULT NULL COMMENT '行别',
- "account_no" varchar(100) COLLATE utf8_bin DEFAULT NULL COMMENT '账号',
- PRIMARY KEY ("id")
-) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='客户账户表';
-CREATE TABLE "t_customer_account" (
- "id" INT(11) NOT NULL AUTO_INCREMENT,
- "customer_id" INT(11) DEFAULT NULL COMMENT '客户id',
- "bank" VARCHAR(100) COLLATE utf8_bin DEFAULT NULL COMMENT '行别',
- "account_no" VARCHAR(100) COLLATE utf8_bin DEFAULT NULL COMMENT '账号',
- PRIMARY KEY("id")
-)
-ENGINE=InnoDB
-AUTO_INCREMENT=1
-DEFAULT CHARACTER SET=utf8
-COLLATE=utf8_bin
-COMMENT='客户账户表';
diff --git a/tests/test_build.py b/tests/test_build.py
index 18c0e47..b5d657c 100644
--- a/tests/test_build.py
+++ b/tests/test_build.py
@@ -270,7 +270,7 @@ class TestBuild(unittest.TestCase):
lambda: parse_one("SELECT * FROM y")
.assert_is(exp.Select)
.ctas("foo.x", properties={"format": "parquet", "y": "2"}),
- "CREATE TABLE foo.x STORED AS PARQUET TBLPROPERTIES ('y' = '2') AS SELECT * FROM y",
+ "CREATE TABLE foo.x STORED AS PARQUET TBLPROPERTIES ('y'='2') AS SELECT * FROM y",
"hive",
),
(lambda: and_("x=1", "y=1"), "x = 1 AND y = 1"),
@@ -308,6 +308,18 @@ class TestBuild(unittest.TestCase):
lambda: exp.subquery("select x from tbl UNION select x from bar", "unioned").select("x"),
"SELECT x FROM (SELECT x FROM tbl UNION SELECT x FROM bar) AS unioned",
),
+ (
+ lambda: exp.update("tbl", {"x": None, "y": {"x": 1}}),
+ "UPDATE tbl SET x = NULL, y = MAP('x', 1)",
+ ),
+ (
+ lambda: exp.update("tbl", {"x": 1}, where="y > 0"),
+ "UPDATE tbl SET x = 1 WHERE y > 0",
+ ),
+ (
+ lambda: exp.update("tbl", {"x": 1}, from_="tbl2"),
+ "UPDATE tbl SET x = 1 FROM tbl2",
+ ),
]:
with self.subTest(sql):
self.assertEqual(expression().sql(dialect[0] if dialect else None), sql)
diff --git a/tests/test_expressions.py b/tests/test_expressions.py
index 64ad02d..cc41307 100644
--- a/tests/test_expressions.py
+++ b/tests/test_expressions.py
@@ -27,6 +27,8 @@ class TestExpressions(unittest.TestCase):
parse_one("ROW() OVER (partition BY y)"),
)
self.assertEqual(parse_one("TO_DATE(x)", read="hive"), parse_one("ts_or_ds_to_date(x)"))
+ self.assertEqual(exp.Table(pivots=[]), exp.Table())
+ self.assertNotEqual(exp.Table(pivots=[None]), exp.Table())
def test_find(self):
expression = parse_one("CREATE TABLE x STORED AS PARQUET AS SELECT * FROM y")
@@ -280,6 +282,19 @@ class TestExpressions(unittest.TestCase):
expression.find(exp.Table).replace(parse_one("y"))
self.assertEqual(expression.sql(), "SELECT c, b FROM y")
+ def test_pop(self):
+ expression = parse_one("SELECT a, b FROM x")
+ expression.find(exp.Column).pop()
+ self.assertEqual(expression.sql(), "SELECT b FROM x")
+ expression.find(exp.Column).pop()
+ self.assertEqual(expression.sql(), "SELECT FROM x")
+ expression.pop()
+ self.assertEqual(expression.sql(), "SELECT FROM x")
+
+ expression = parse_one("WITH x AS (SELECT a FROM x) SELECT * FROM x")
+ expression.find(exp.With).pop()
+ self.assertEqual(expression.sql(), "SELECT * FROM x")
+
def test_walk(self):
expression = parse_one("SELECT * FROM (SELECT * FROM x)")
self.assertEqual(len(list(expression.walk())), 9)
@@ -316,6 +331,7 @@ class TestExpressions(unittest.TestCase):
self.assertIsInstance(parse_one("MAX(a)"), exp.Max)
self.assertIsInstance(parse_one("MIN(a)"), exp.Min)
self.assertIsInstance(parse_one("MONTH(a)"), exp.Month)
+ self.assertIsInstance(parse_one("POSITION(' ' IN a)"), exp.StrPosition)
self.assertIsInstance(parse_one("POW(a, 2)"), exp.Pow)
self.assertIsInstance(parse_one("POWER(a, 2)"), exp.Pow)
self.assertIsInstance(parse_one("QUANTILE(a, 0.90)"), exp.Quantile)
@@ -420,7 +436,7 @@ class TestExpressions(unittest.TestCase):
exp.Properties.from_dict(
{
"FORMAT": "parquet",
- "PARTITIONED_BY": [exp.to_identifier("a"), exp.to_identifier("b")],
+ "PARTITIONED_BY": (exp.to_identifier("a"), exp.to_identifier("b")),
"custom": 1,
"TABLE_FORMAT": exp.to_identifier("test_format"),
"ENGINE": None,
@@ -444,4 +460,17 @@ class TestExpressions(unittest.TestCase):
),
)
- self.assertRaises(ValueError, exp.Properties.from_dict, {"FORMAT": {"key": "value"}})
+ self.assertRaises(ValueError, exp.Properties.from_dict, {"FORMAT": object})
+
+ def test_convert(self):
+ for value, expected in [
+ (1, "1"),
+ ("1", "'1'"),
+ (None, "NULL"),
+ (True, "TRUE"),
+ ((1, "2", None), "(1, '2', NULL)"),
+ ([1, "2", None], "ARRAY(1, '2', NULL)"),
+ ({"x": None}, "MAP('x', NULL)"),
+ ]:
+ with self.subTest(value):
+ self.assertEqual(exp.convert(value).sql(), expected)
diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py
index 102e141..8d4aecc 100644
--- a/tests/test_optimizer.py
+++ b/tests/test_optimizer.py
@@ -1,9 +1,11 @@
import unittest
+from functools import partial
-from sqlglot import optimizer, parse_one, table
+from sqlglot import exp, optimizer, parse_one, table
from sqlglot.errors import OptimizeError
+from sqlglot.optimizer.annotate_types import annotate_types
from sqlglot.optimizer.schema import MappingSchema, ensure_schema
-from sqlglot.optimizer.scope import traverse_scope
+from sqlglot.optimizer.scope import build_scope, traverse_scope
from tests.helpers import TPCH_SCHEMA, load_sql_fixture_pairs, load_sql_fixtures
@@ -27,11 +29,17 @@ class TestOptimizer(unittest.TestCase):
}
def check_file(self, file, func, pretty=False, **kwargs):
- for meta, sql, expected in load_sql_fixture_pairs(f"optimizer/{file}.sql"):
+ for i, (meta, sql, expected) in enumerate(load_sql_fixture_pairs(f"optimizer/{file}.sql"), start=1):
dialect = meta.get("dialect")
- with self.subTest(sql):
+ leave_tables_isolated = meta.get("leave_tables_isolated")
+
+ func_kwargs = {**kwargs}
+ if leave_tables_isolated is not None:
+ func_kwargs["leave_tables_isolated"] = leave_tables_isolated.lower() in ("true", "1")
+
+ with self.subTest(f"{i}, {sql}"):
self.assertEqual(
- func(parse_one(sql, read=dialect), **kwargs).sql(pretty=pretty, dialect=dialect),
+ func(parse_one(sql, read=dialect), **func_kwargs).sql(pretty=pretty, dialect=dialect),
expected,
)
@@ -123,21 +131,20 @@ class TestOptimizer(unittest.TestCase):
optimizer.optimize_joins.optimize_joins,
)
- def test_eliminate_subqueries(self):
- self.check_file(
- "eliminate_subqueries",
- optimizer.eliminate_subqueries.eliminate_subqueries,
- pretty=True,
+ def test_merge_subqueries(self):
+ optimize = partial(
+ optimizer.optimize,
+ rules=[
+ optimizer.qualify_tables.qualify_tables,
+ optimizer.qualify_columns.qualify_columns,
+ optimizer.merge_subqueries.merge_subqueries,
+ ],
)
- def test_merge_derived_tables(self):
- def optimize(expression, **kwargs):
- expression = optimizer.qualify_tables.qualify_tables(expression)
- expression = optimizer.qualify_columns.qualify_columns(expression, **kwargs)
- expression = optimizer.merge_derived_tables.merge_derived_tables(expression)
- return expression
+ self.check_file("merge_subqueries", optimize, schema=self.schema)
- self.check_file("merge_derived_tables", optimize, schema=self.schema)
+ def test_eliminate_subqueries(self):
+ self.check_file("eliminate_subqueries", optimizer.eliminate_subqueries.eliminate_subqueries)
def test_tpch(self):
self.check_file("tpc-h/tpc-h", optimizer.optimize, schema=TPCH_SCHEMA, pretty=True)
@@ -257,17 +264,73 @@ FROM READ_CSV('tests/fixtures/optimizer/tpc-h/nation.csv.gz', 'delimiter', '|')
ON s.b = r.b
WHERE s.b > (SELECT MAX(x.a) FROM x WHERE x.b = s.b)
"""
- scopes = traverse_scope(parse_one(sql))
- self.assertEqual(len(scopes), 5)
- self.assertEqual(scopes[0].expression.sql(), "SELECT x.b FROM x")
- self.assertEqual(scopes[1].expression.sql(), "SELECT y.b FROM y")
- self.assertEqual(scopes[2].expression.sql(), "SELECT MAX(x.a) FROM x WHERE x.b = s.b")
- self.assertEqual(scopes[3].expression.sql(), "SELECT y.c AS b FROM y")
- self.assertEqual(scopes[4].expression.sql(), parse_one(sql).sql())
-
- self.assertEqual(set(scopes[4].sources), {"q", "r", "s"})
- self.assertEqual(len(scopes[4].columns), 6)
- self.assertEqual(set(c.table for c in scopes[4].columns), {"r", "s"})
- self.assertEqual(scopes[4].source_columns("q"), [])
- self.assertEqual(len(scopes[4].source_columns("r")), 2)
- self.assertEqual(set(c.table for c in scopes[4].source_columns("r")), {"r"})
+ for scopes in traverse_scope(parse_one(sql)), list(build_scope(parse_one(sql)).traverse()):
+ self.assertEqual(len(scopes), 5)
+ self.assertEqual(scopes[0].expression.sql(), "SELECT x.b FROM x")
+ self.assertEqual(scopes[1].expression.sql(), "SELECT y.b FROM y")
+ self.assertEqual(scopes[2].expression.sql(), "SELECT MAX(x.a) FROM x WHERE x.b = s.b")
+ self.assertEqual(scopes[3].expression.sql(), "SELECT y.c AS b FROM y")
+ self.assertEqual(scopes[4].expression.sql(), parse_one(sql).sql())
+
+ self.assertEqual(set(scopes[4].sources), {"q", "r", "s"})
+ self.assertEqual(len(scopes[4].columns), 6)
+ self.assertEqual(set(c.table for c in scopes[4].columns), {"r", "s"})
+ self.assertEqual(scopes[4].source_columns("q"), [])
+ self.assertEqual(len(scopes[4].source_columns("r")), 2)
+ self.assertEqual(set(c.table for c in scopes[4].source_columns("r")), {"r"})
+
+ def test_literal_type_annotation(self):
+ tests = {
+ "SELECT 5": exp.DataType.Type.INT,
+ "SELECT 5.3": exp.DataType.Type.DOUBLE,
+ "SELECT 'bla'": exp.DataType.Type.VARCHAR,
+ "5": exp.DataType.Type.INT,
+ "5.3": exp.DataType.Type.DOUBLE,
+ "'bla'": exp.DataType.Type.VARCHAR,
+ }
+
+ for sql, target_type in tests.items():
+ expression = parse_one(sql)
+ annotated_expression = annotate_types(expression)
+
+ self.assertEqual(annotated_expression.find(exp.Literal).type, target_type)
+
+ def test_boolean_type_annotation(self):
+ tests = {
+ "SELECT TRUE": exp.DataType.Type.BOOLEAN,
+ "FALSE": exp.DataType.Type.BOOLEAN,
+ }
+
+ for sql, target_type in tests.items():
+ expression = parse_one(sql)
+ annotated_expression = annotate_types(expression)
+
+ self.assertEqual(annotated_expression.find(exp.Boolean).type, target_type)
+
+ def test_cast_type_annotation(self):
+ expression = parse_one("CAST('2020-01-01' AS TIMESTAMPTZ(9))")
+ annotate_types(expression)
+
+ self.assertEqual(expression.type, exp.DataType.Type.TIMESTAMPTZ)
+ self.assertEqual(expression.this.type, exp.DataType.Type.VARCHAR)
+ self.assertEqual(expression.args["to"].type, exp.DataType.Type.TIMESTAMPTZ)
+ self.assertEqual(expression.args["to"].expressions[0].type, exp.DataType.Type.INT)
+
+ def test_cache_annotation(self):
+ expression = parse_one("CACHE LAZY TABLE x OPTIONS('storageLevel' = 'value') AS SELECT 1")
+ annotated_expression = annotate_types(expression)
+
+ self.assertEqual(annotated_expression.expression.expressions[0].type, exp.DataType.Type.INT)
+
+ def test_binary_annotation(self):
+ expression = parse_one("SELECT 0.0 + (2 + 3)")
+ annotate_types(expression)
+
+ expression = expression.expressions[0]
+
+ self.assertEqual(expression.type, exp.DataType.Type.DOUBLE)
+ self.assertEqual(expression.left.type, exp.DataType.Type.DOUBLE)
+ self.assertEqual(expression.right.type, exp.DataType.Type.INT)
+ self.assertEqual(expression.right.this.type, exp.DataType.Type.INT)
+ self.assertEqual(expression.right.this.left.type, exp.DataType.Type.INT)
+ self.assertEqual(expression.right.this.right.type, exp.DataType.Type.INT)
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 9e430e2..4c46531 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -21,6 +21,11 @@ class TestParser(unittest.TestCase):
self.assertIsNotNone(parse_one("date").find(exp.Column))
+ def test_float(self):
+ self.assertEqual(parse_one(".2"), parse_one("0.2"))
+ self.assertEqual(parse_one("int 1"), parse_one("CAST(1 AS INT)"))
+ self.assertEqual(parse_one("int.5"), parse_one("CAST(0.5 AS INT)"))
+
def test_table(self):
tables = [t.sql() for t in parse_one("select * from a, b.c, .d").find_all(exp.Table)]
self.assertEqual(tables, ["a", "b.c", "d"])
diff --git a/tests/test_transforms.py b/tests/test_transforms.py
index 2030109..1928d2c 100644
--- a/tests/test_transforms.py
+++ b/tests/test_transforms.py
@@ -6,11 +6,32 @@ from sqlglot.transforms import unalias_group
class TestTime(unittest.TestCase):
def validate(self, transform, sql, target):
- self.assertEqual(parse_one(sql).transform(transform).sql(), target)
+ with self.subTest(sql):
+ self.assertEqual(parse_one(sql).transform(transform).sql(), target)
def test_unalias_group(self):
self.validate(
unalias_group,
"SELECT a, b AS b, c AS c, 4 FROM x GROUP BY a, b, x.c, 4",
- "SELECT a, b AS b, c AS c, 4 FROM x GROUP BY a, 2, x.c, 4",
+ "SELECT a, b AS b, c AS c, 4 FROM x GROUP BY a, b, x.c, 4",
+ )
+ self.validate(
+ unalias_group,
+ "SELECT TO_DATE(the_date) AS the_date, CUSTOM_UDF(other_col) AS other_col, last_col AS aliased_last, COUNT(*) AS the_count FROM x GROUP BY TO_DATE(the_date), CUSTOM_UDF(other_col), aliased_last",
+ "SELECT TO_DATE(the_date) AS the_date, CUSTOM_UDF(other_col) AS other_col, last_col AS aliased_last, COUNT(*) AS the_count FROM x GROUP BY TO_DATE(the_date), CUSTOM_UDF(other_col), 3",
+ )
+ self.validate(
+ unalias_group,
+ "SELECT SOME_UDF(TO_DATE(the_date)) AS the_date, COUNT(*) AS the_count FROM x GROUP BY SOME_UDF(TO_DATE(the_date))",
+ "SELECT SOME_UDF(TO_DATE(the_date)) AS the_date, COUNT(*) AS the_count FROM x GROUP BY SOME_UDF(TO_DATE(the_date))",
+ )
+ self.validate(
+ unalias_group,
+ "SELECT SOME_UDF(TO_DATE(the_date)) AS new_date, COUNT(*) AS the_count FROM x GROUP BY new_date",
+ "SELECT SOME_UDF(TO_DATE(the_date)) AS new_date, COUNT(*) AS the_count FROM x GROUP BY 1",
+ )
+ self.validate(
+ unalias_group,
+ "SELECT the_date AS the_date, COUNT(*) AS the_count FROM x GROUP BY the_date",
+ "SELECT the_date AS the_date, COUNT(*) AS the_count FROM x GROUP BY the_date",
)