diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-06-02 23:59:11 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-06-02 23:59:11 +0000 |
commit | caea5267cb8e1fea3702adbdf6f68fd37d13b3b7 (patch) | |
tree | f06f1da1ab3b6906beca1c3c7222d28ff00766ac /tests | |
parent | Adding upstream version 12.2.0. (diff) | |
download | sqlglot-caea5267cb8e1fea3702adbdf6f68fd37d13b3b7.tar.xz sqlglot-caea5267cb8e1fea3702adbdf6f68fd37d13b3b7.zip |
Adding upstream version 15.0.0.upstream/15.0.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
48 files changed, 1614 insertions, 570 deletions
diff --git a/tests/dataframe/integration/test_dataframe.py b/tests/dataframe/integration/test_dataframe.py index d00464b..702c6ee 100644 --- a/tests/dataframe/integration/test_dataframe.py +++ b/tests/dataframe/integration/test_dataframe.py @@ -1155,8 +1155,9 @@ class TestDataframeFunc(DataFrameValidator): df, dfs = self.compare_spark_with_sqlglot(df_joined, dfs_joined) self.assertIn("ResolvedHint (strategy=broadcast)", self.get_explain_plan(df)) self.assertIn("ResolvedHint (strategy=broadcast)", self.get_explain_plan(dfs)) - - # TODO: Add test to make sure with and without alias are the same once ids are deterministic + self.assertEqual( + "'UnresolvedHint BROADCAST, ['a2]", self.get_explain_plan(dfs).split("\n")[1] + ) def test_broadcast_func(self): df_joined = self.df_spark_employee.join( @@ -1188,6 +1189,9 @@ class TestDataframeFunc(DataFrameValidator): df, dfs = self.compare_spark_with_sqlglot(df_joined, dfs_joined) self.assertIn("ResolvedHint (strategy=broadcast)", self.get_explain_plan(df)) self.assertIn("ResolvedHint (strategy=broadcast)", self.get_explain_plan(dfs)) + self.assertEqual( + "'UnresolvedHint BROADCAST, ['a2]", self.get_explain_plan(dfs).split("\n")[1] + ) def test_repartition_by_num(self): """ diff --git a/tests/dataframe/unit/test_session.py b/tests/dataframe/unit/test_session.py index 7da0833..0970a2e 100644 --- a/tests/dataframe/unit/test_session.py +++ b/tests/dataframe/unit/test_session.py @@ -70,13 +70,12 @@ class TestDataframeSession(DataFrameSQLValidator): @mock.patch("sqlglot.schema", MappingSchema()) def test_sql_select_only(self): - # TODO: Do exact matches once CTE names are deterministic query = "SELECT cola, colb FROM table" sqlglot.schema.add_table("table", {"cola": "string", "colb": "string"}) df = self.spark.sql(query) - self.assertIn( + self.assertEqual( "SELECT `table`.`cola` AS `cola`, `table`.`colb` AS `colb` FROM `table` AS `table`", - df.sql(pretty=False), + df.sql(pretty=False)[0], ) @mock.patch("sqlglot.schema", MappingSchema()) @@ -90,14 +89,13 @@ class TestDataframeSession(DataFrameSQLValidator): @mock.patch("sqlglot.schema", MappingSchema()) def test_sql_with_aggs(self): - # TODO: Do exact matches once CTE names are deterministic query = "SELECT cola, colb FROM table" sqlglot.schema.add_table("table", {"cola": "string", "colb": "string"}) df = self.spark.sql(query).groupBy(F.col("cola")).agg(F.sum("colb")) - result = df.sql(pretty=False, optimize=False)[0] - self.assertIn("SELECT cola, colb FROM table", result) - self.assertIn("SUM(colb)", result) - self.assertIn("GROUP BY cola", result) + self.assertEqual( + "WITH t38189 AS (SELECT cola, colb FROM table), t42330 AS (SELECT cola, colb FROM t38189) SELECT cola, SUM(colb) FROM t42330 GROUP BY cola", + df.sql(pretty=False, optimize=False)[0], + ) @mock.patch("sqlglot.schema", MappingSchema()) def test_sql_create(self): diff --git a/tests/dialects/test_bigquery.py b/tests/dialects/test_bigquery.py index 87bba6f..99d8a3c 100644 --- a/tests/dialects/test_bigquery.py +++ b/tests/dialects/test_bigquery.py @@ -6,6 +6,14 @@ class TestBigQuery(Validator): dialect = "bigquery" def test_bigquery(self): + self.validate_identity("SAFE_CAST(x AS STRING)") + self.validate_identity("SELECT * FROM a-b-c.mydataset.mytable") + self.validate_identity("SELECT * FROM abc-def-ghi") + self.validate_identity("SELECT * FROM a-b-c") + self.validate_identity("SELECT * FROM my-table") + self.validate_identity("SELECT * FROM my-project.mydataset.mytable") + self.validate_identity("SELECT * FROM pro-ject_id.c.d CROSS JOIN foo-bar") + self.validate_identity("x <> ''") self.validate_identity("DATE_TRUNC(col, WEEK(MONDAY))") self.validate_identity("SELECT b'abc'") self.validate_identity("""SELECT * FROM UNNEST(ARRAY<STRUCT<x INT64>>[1, 2])""") @@ -13,6 +21,7 @@ class TestBigQuery(Validator): self.validate_identity("SELECT DISTINCT AS STRUCT 1 AS a, 2 AS b") self.validate_identity("SELECT AS VALUE STRUCT(1 AS a, 2 AS b)") self.validate_identity("SELECT STRUCT<ARRAY<STRING>>(['2023-01-17'])") + self.validate_identity("SELECT STRUCT<STRING>((SELECT a FROM b.c LIMIT 1)).*") self.validate_identity("SELECT * FROM q UNPIVOT(values FOR quarter IN (b, c))") self.validate_identity("""CREATE TABLE x (a STRUCT<values ARRAY<INT64>>)""") self.validate_identity("""CREATE TABLE x (a STRUCT<b STRING OPTIONS (description='b')>)""") @@ -22,17 +31,34 @@ class TestBigQuery(Validator): self.validate_identity( "SELECT * FROM (SELECT * FROM `t`) AS a UNPIVOT((c) FOR c_name IN (v1, v2))" ) + self.validate_identity( + "CREATE TABLE IF NOT EXISTS foo AS SELECT * FROM bla EXCEPT DISTINCT (SELECT * FROM bar) LIMIT 0" + ) + self.validate_all('x <> ""', write={"bigquery": "x <> ''"}) + self.validate_all('x <> """"""', write={"bigquery": "x <> ''"}) + self.validate_all("x <> ''''''", write={"bigquery": "x <> ''"}) self.validate_all( "CREATE TEMP TABLE foo AS SELECT 1", write={"bigquery": "CREATE TEMPORARY TABLE foo AS SELECT 1"}, ) + self.validate_all( + "SELECT * FROM `SOME_PROJECT_ID.SOME_DATASET_ID.INFORMATION_SCHEMA.SOME_VIEW`", + write={ + "bigquery": "SELECT * FROM SOME_PROJECT_ID.SOME_DATASET_ID.INFORMATION_SCHEMA.SOME_VIEW", + }, + ) + self.validate_all( + "SELECT * FROM `my-project.my-dataset.my-table`", + write={"bigquery": "SELECT * FROM `my-project`.`my-dataset`.`my-table`"}, + ) self.validate_all("LEAST(x, y)", read={"sqlite": "MIN(x, y)"}) self.validate_all("CAST(x AS CHAR)", write={"bigquery": "CAST(x AS STRING)"}) self.validate_all("CAST(x AS NCHAR)", write={"bigquery": "CAST(x AS STRING)"}) self.validate_all("CAST(x AS NVARCHAR)", write={"bigquery": "CAST(x AS STRING)"}) self.validate_all("CAST(x AS TIMESTAMP)", write={"bigquery": "CAST(x AS DATETIME)"}) self.validate_all("CAST(x AS TIMESTAMPTZ)", write={"bigquery": "CAST(x AS TIMESTAMP)"}) + self.validate_all("CAST(x AS RECORD)", write={"bigquery": "CAST(x AS STRUCT)"}) self.validate_all( "SELECT ARRAY(SELECT AS STRUCT 1 a, 2 b)", write={ @@ -64,16 +90,6 @@ class TestBigQuery(Validator): "spark": "'x\\''", }, ) - self.validate_all( - r'r"""/\*.*\*/"""', - write={ - "bigquery": r"'/\*.*\*/'", - "duckdb": r"'/\*.*\*/'", - "presto": r"'/\*.*\*/'", - "hive": r"'/\*.*\*/'", - "spark": r"'/\*.*\*/'", - }, - ) with self.assertRaises(ValueError): transpile("'\\'", read="bigquery") @@ -87,13 +103,23 @@ class TestBigQuery(Validator): }, ) self.validate_all( + r'r"""/\*.*\*/"""', + write={ + "bigquery": r"r'/\*.*\*/'", + "duckdb": r"'/\\*.*\\*/'", + "presto": r"'/\\*.*\\*/'", + "hive": r"'/\\*.*\\*/'", + "spark": r"'/\\*.*\\*/'", + }, + ) + self.validate_all( r'R"""/\*.*\*/"""', write={ - "bigquery": r"'/\*.*\*/'", - "duckdb": r"'/\*.*\*/'", - "presto": r"'/\*.*\*/'", - "hive": r"'/\*.*\*/'", - "spark": r"'/\*.*\*/'", + "bigquery": r"r'/\*.*\*/'", + "duckdb": r"'/\\*.*\\*/'", + "presto": r"'/\\*.*\\*/'", + "hive": r"'/\\*.*\\*/'", + "spark": r"'/\\*.*\\*/'", }, ) self.validate_all( @@ -234,7 +260,7 @@ class TestBigQuery(Validator): "DIV(x, y)", write={ "bigquery": "DIV(x, y)", - "duckdb": "CAST(x / y AS INT)", + "duckdb": "x // y", }, ) @@ -308,7 +334,7 @@ class TestBigQuery(Validator): self.validate_all( "DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY)", write={ - "postgres": "CURRENT_DATE - INTERVAL '1' DAY", + "postgres": "CURRENT_DATE - INTERVAL '1 DAY'", "bigquery": "DATE_SUB(CURRENT_DATE, INTERVAL 1 DAY)", }, ) @@ -318,7 +344,7 @@ class TestBigQuery(Validator): "bigquery": "DATE_ADD(CURRENT_DATE, INTERVAL 1 DAY)", "duckdb": "CURRENT_DATE + INTERVAL 1 DAY", "mysql": "DATE_ADD(CURRENT_DATE, INTERVAL 1 DAY)", - "postgres": "CURRENT_DATE + INTERVAL '1' DAY", + "postgres": "CURRENT_DATE + INTERVAL '1 DAY'", "presto": "DATE_ADD('DAY', 1, CURRENT_DATE)", "hive": "DATE_ADD(CURRENT_DATE, 1)", "spark": "DATE_ADD(CURRENT_DATE, 1)", @@ -470,3 +496,12 @@ class TestBigQuery(Validator): "snowflake": "MERGE INTO dataset.Inventory AS T USING dataset.NewArrivals AS S ON FALSE WHEN NOT MATCHED AND product LIKE '%a%' THEN DELETE WHEN NOT MATCHED AND product LIKE '%b%' THEN DELETE", }, ) + + def test_rename_table(self): + self.validate_all( + "ALTER TABLE db.t1 RENAME TO db.t2", + write={ + "snowflake": "ALTER TABLE db.t1 RENAME TO db.t2", + "bigquery": "ALTER TABLE db.t1 RENAME TO t2", + }, + ) diff --git a/tests/dialects/test_clickhouse.py b/tests/dialects/test_clickhouse.py index 1060881..b6a7765 100644 --- a/tests/dialects/test_clickhouse.py +++ b/tests/dialects/test_clickhouse.py @@ -1,3 +1,4 @@ +from sqlglot import exp, parse_one from tests.dialects.test_dialect import Validator @@ -5,6 +6,9 @@ class TestClickhouse(Validator): dialect = "clickhouse" def test_clickhouse(self): + self.validate_identity("ATTACH DATABASE DEFAULT ENGINE = ORDINARY") + self.validate_identity("CAST(['hello'], 'Array(Enum8(''hello'' = 1))')") + self.validate_identity("SELECT x, COUNT() FROM y GROUP BY x WITH TOTALS") self.validate_identity("SELECT INTERVAL t.days day") self.validate_identity("SELECT match('abc', '([a-z]+)')") self.validate_identity("dictGet(x, 'y')") @@ -16,17 +20,36 @@ class TestClickhouse(Validator): self.validate_identity("SELECT * FROM foo LEFT ASOF JOIN bla") self.validate_identity("SELECT * FROM foo ASOF JOIN bla") self.validate_identity("SELECT * FROM foo ANY JOIN bla") + self.validate_identity("SELECT * FROM foo GLOBAL ANY JOIN bla") + self.validate_identity("SELECT * FROM foo GLOBAL LEFT ANY JOIN bla") self.validate_identity("SELECT quantile(0.5)(a)") self.validate_identity("SELECT quantiles(0.5)(a) AS x FROM t") self.validate_identity("SELECT quantiles(0.1, 0.2, 0.3)(a)") + self.validate_identity("SELECT quantileTiming(0.5)(RANGE(100))") self.validate_identity("SELECT histogram(5)(a)") self.validate_identity("SELECT groupUniqArray(2)(a)") self.validate_identity("SELECT exponentialTimeDecayedAvg(60)(a, b)") self.validate_identity("SELECT * FROM foo WHERE x GLOBAL IN (SELECT * FROM bar)") self.validate_identity("position(haystack, needle)") self.validate_identity("position(haystack, needle, position)") + self.validate_identity("CAST(x AS DATETIME)") + self.validate_identity( + 'SELECT CAST(tuple(1 AS "a", 2 AS "b", 3.0 AS "c").2 AS Nullable(TEXT))' + ) + self.validate_identity( + "CREATE TABLE test (id UInt8) ENGINE=AggregatingMergeTree() ORDER BY tuple()" + ) self.validate_all( + "SELECT uniq(x) FROM (SELECT any(y) AS x FROM (SELECT 1 AS y))", + read={ + "bigquery": "SELECT APPROX_COUNT_DISTINCT(x) FROM (SELECT ANY_VALUE(y) x FROM (SELECT 1 y))", + }, + write={ + "bigquery": "SELECT APPROX_COUNT_DISTINCT(x) FROM (SELECT ANY_VALUE(y) AS x FROM (SELECT 1 AS y))", + }, + ) + self.validate_all( "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname", write={ "clickhouse": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname, lname", @@ -41,9 +64,7 @@ class TestClickhouse(Validator): ) self.validate_all( "CAST(1 AS Nullable(DateTime64(6, 'UTC')))", - write={ - "clickhouse": "CAST(1 AS Nullable(DateTime64(6, 'UTC')))", - }, + write={"clickhouse": "CAST(1 AS Nullable(DateTime64(6, 'UTC')))"}, ) self.validate_all( "SELECT x #! comment", @@ -59,6 +80,40 @@ class TestClickhouse(Validator): "SELECT position(needle IN haystack)", write={"clickhouse": "SELECT position(haystack, needle)"}, ) + self.validate_identity( + "SELECT * FROM x LIMIT 10 SETTINGS max_results = 100, result = 'break'" + ) + self.validate_identity("SELECT * FROM x LIMIT 10 SETTINGS max_results = 100, result_") + self.validate_identity("SELECT * FROM x FORMAT PrettyCompact") + self.validate_identity( + "SELECT * FROM x LIMIT 10 SETTINGS max_results = 100, result_ FORMAT PrettyCompact" + ) + self.validate_all( + "SELECT * FROM foo JOIN bar USING id, name", + write={"clickhouse": "SELECT * FROM foo JOIN bar USING (id, name)"}, + ) + self.validate_all( + "SELECT * FROM foo ANY LEFT JOIN bla ON foo.c1 = bla.c2", + write={"clickhouse": "SELECT * FROM foo LEFT ANY JOIN bla ON foo.c1 = bla.c2"}, + ) + self.validate_all( + "SELECT * FROM foo GLOBAL ANY LEFT JOIN bla ON foo.c1 = bla.c2", + write={"clickhouse": "SELECT * FROM foo GLOBAL LEFT ANY JOIN bla ON foo.c1 = bla.c2"}, + ) + self.validate_all( + """ + SELECT + loyalty, + count() + FROM hits SEMI LEFT JOIN users USING (UserID) + GROUP BY loyalty + ORDER BY loyalty ASC + """, + write={ + "clickhouse": "SELECT loyalty, COUNT() FROM hits LEFT SEMI JOIN users USING (UserID)" + + " GROUP BY loyalty ORDER BY loyalty" + }, + ) def test_cte(self): self.validate_identity("WITH 'x' AS foo SELECT foo") @@ -66,6 +121,57 @@ class TestClickhouse(Validator): self.validate_identity("WITH (SELECT foo) AS bar SELECT bar + 5") self.validate_identity("WITH test1 AS (SELECT i + 1, j + 1 FROM test1) SELECT * FROM test1") + def test_ternary(self): + self.validate_all("x ? 1 : 2", write={"clickhouse": "CASE WHEN x THEN 1 ELSE 2 END"}) + self.validate_all( + "IF(BAR(col), sign > 0 ? FOO() : 0, 1)", + write={ + "clickhouse": "CASE WHEN BAR(col) THEN CASE WHEN sign > 0 THEN FOO() ELSE 0 END ELSE 1 END" + }, + ) + self.validate_all( + "x AND FOO() > 3 + 2 ? 1 : 2", + write={"clickhouse": "CASE WHEN x AND FOO() > 3 + 2 THEN 1 ELSE 2 END"}, + ) + self.validate_all( + "x ? (y ? 1 : 2) : 3", + write={"clickhouse": "CASE WHEN x THEN (CASE WHEN y THEN 1 ELSE 2 END) ELSE 3 END"}, + ) + self.validate_all( + "x AND (foo() ? FALSE : TRUE) ? (y ? 1 : 2) : 3", + write={ + "clickhouse": "CASE WHEN x AND (CASE WHEN foo() THEN FALSE ELSE TRUE END) THEN (CASE WHEN y THEN 1 ELSE 2 END) ELSE 3 END" + }, + ) + + ternary = parse_one("x ? (y ? 1 : 2) : 3", read="clickhouse") + + self.assertIsInstance(ternary, exp.If) + self.assertIsInstance(ternary.this, exp.Column) + self.assertIsInstance(ternary.args["true"], exp.Paren) + self.assertIsInstance(ternary.args["false"], exp.Literal) + + nested_ternary = ternary.args["true"].this + + self.assertIsInstance(nested_ternary.this, exp.Column) + self.assertIsInstance(nested_ternary.args["true"], exp.Literal) + self.assertIsInstance(nested_ternary.args["false"], exp.Literal) + + parse_one("a and b ? 1 : 2", read="clickhouse").assert_is(exp.If).this.assert_is(exp.And) + + def test_parameterization(self): + self.validate_all( + "SELECT {abc: UInt32}, {b: String}, {c: DateTime},{d: Map(String, Array(UInt8))}, {e: Tuple(UInt8, String)}", + write={ + "clickhouse": "SELECT {abc: UInt32}, {b: TEXT}, {c: DATETIME}, {d: Map(TEXT, Array(UInt8))}, {e: Tuple(UInt8, String)}", + "": "SELECT :abc, :b, :c, :d, :e", + }, + ) + self.validate_all( + "SELECT * FROM {table: Identifier}", + write={"clickhouse": "SELECT * FROM {table: Identifier}"}, + ) + def test_signed_and_unsigned_types(self): data_types = [ "UInt8", @@ -86,3 +192,206 @@ class TestClickhouse(Validator): f"pow(2, 32)::{data_type}", write={"clickhouse": f"CAST(POWER(2, 32) AS {data_type})"}, ) + + def test_ddl(self): + self.validate_identity( + "CREATE TABLE foo (x UInt32) TTL time_column + INTERVAL '1' MONTH DELETE WHERE column = 'value'" + ) + + self.validate_all( + """ + CREATE TABLE example1 ( + timestamp DateTime, + x UInt32 TTL now() + INTERVAL 1 MONTH, + y String TTL timestamp + INTERVAL 1 DAY, + z String + ) + ENGINE = MergeTree + ORDER BY tuple() + """, + write={ + "clickhouse": """CREATE TABLE example1 ( + timestamp DATETIME, + x UInt32 TTL now() + INTERVAL '1' MONTH, + y TEXT TTL timestamp + INTERVAL '1' DAY, + z TEXT +) +ENGINE=MergeTree +ORDER BY tuple()""", + }, + pretty=True, + ) + self.validate_all( + """ + CREATE TABLE test (id UInt64, timestamp DateTime64, data String, max_hits UInt64, sum_hits UInt64) ENGINE = MergeTree + PRIMARY KEY (id, toStartOfDay(timestamp), timestamp) + TTL timestamp + INTERVAL 1 DAY + GROUP BY id, toStartOfDay(timestamp) + SET + max_hits = max(max_hits), + sum_hits = sum(sum_hits) + """, + write={ + "clickhouse": """CREATE TABLE test ( + id UInt64, + timestamp DateTime64, + data TEXT, + max_hits UInt64, + sum_hits UInt64 +) +ENGINE=MergeTree +PRIMARY KEY (id, toStartOfDay(timestamp), timestamp) +TTL + timestamp + INTERVAL '1' DAY +GROUP BY + id, + toStartOfDay(timestamp) +SET + max_hits = MAX(max_hits), + sum_hits = SUM(sum_hits)""", + }, + pretty=True, + ) + self.validate_all( + """ + CREATE TABLE test (id String, data String) ENGINE = AggregatingMergeTree() + ORDER BY tuple() + SETTINGS + max_suspicious_broken_parts=500, + parts_to_throw_insert=100 + """, + write={ + "clickhouse": """CREATE TABLE test ( + id TEXT, + data TEXT +) +ENGINE=AggregatingMergeTree() +ORDER BY tuple() +SETTINGS + max_suspicious_broken_parts = 500, + parts_to_throw_insert = 100""", + }, + pretty=True, + ) + self.validate_all( + """ + CREATE TABLE example_table + ( + d DateTime, + a Int + ) + ENGINE = MergeTree + PARTITION BY toYYYYMM(d) + ORDER BY d + TTL d + INTERVAL 1 MONTH DELETE, + d + INTERVAL 1 WEEK TO VOLUME 'aaa', + d + INTERVAL 2 WEEK TO DISK 'bbb'; + """, + write={ + "clickhouse": """CREATE TABLE example_table ( + d DATETIME, + a Int32 +) +ENGINE=MergeTree +PARTITION BY toYYYYMM(d) +ORDER BY d +TTL + d + INTERVAL '1' MONTH DELETE, + d + INTERVAL '1' WEEK TO VOLUME 'aaa', + d + INTERVAL '2' WEEK TO DISK 'bbb'""", + }, + pretty=True, + ) + self.validate_all( + """ + CREATE TABLE table_with_where + ( + d DateTime, + a Int + ) + ENGINE = MergeTree + PARTITION BY toYYYYMM(d) + ORDER BY d + TTL d + INTERVAL 1 MONTH DELETE WHERE toDayOfWeek(d) = 1; + """, + write={ + "clickhouse": """CREATE TABLE table_with_where ( + d DATETIME, + a Int32 +) +ENGINE=MergeTree +PARTITION BY toYYYYMM(d) +ORDER BY d +TTL + d + INTERVAL '1' MONTH DELETE +WHERE + toDayOfWeek(d) = 1""", + }, + pretty=True, + ) + self.validate_all( + """ + CREATE TABLE table_for_recompression + ( + d DateTime, + key UInt64, + value String + ) ENGINE MergeTree() + ORDER BY tuple() + PARTITION BY key + TTL d + INTERVAL 1 MONTH RECOMPRESS CODEC(ZSTD(17)), d + INTERVAL 1 YEAR RECOMPRESS CODEC(LZ4HC(10)) + SETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0; + """, + write={ + "clickhouse": """CREATE TABLE table_for_recompression ( + d DATETIME, + key UInt64, + value TEXT +) +ENGINE=MergeTree() +ORDER BY tuple() +PARTITION BY key +TTL + d + INTERVAL '1' MONTH RECOMPRESS CODEC(ZSTD(17)), + d + INTERVAL '1' YEAR RECOMPRESS CODEC(LZ4HC(10)) +SETTINGS + min_rows_for_wide_part = 0, + min_bytes_for_wide_part = 0""", + }, + pretty=True, + ) + self.validate_all( + """ + CREATE TABLE table_for_aggregation + ( + d DateTime, + k1 Int, + k2 Int, + x Int, + y Int + ) + ENGINE = MergeTree + ORDER BY (k1, k2) + TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y); + """, + write={ + "clickhouse": """CREATE TABLE table_for_aggregation ( + d DATETIME, + k1 Int32, + k2 Int32, + x Int32, + y Int32 +) +ENGINE=MergeTree +ORDER BY (k1, k2) +TTL + d + INTERVAL '1' MONTH +GROUP BY + k1, + k2 +SET + x = MAX(x), + y = MIN(y)""", + }, + pretty=True, + ) diff --git a/tests/dialects/test_databricks.py b/tests/dialects/test_databricks.py index 4619108..8239dec 100644 --- a/tests/dialects/test_databricks.py +++ b/tests/dialects/test_databricks.py @@ -9,6 +9,14 @@ class TestDatabricks(Validator): self.validate_identity("CREATE FUNCTION a.b(x INT) RETURNS INT RETURN x + 1") self.validate_identity("CREATE FUNCTION a AS b") self.validate_identity("SELECT ${x} FROM ${y} WHERE ${z} > 1") + self.validate_identity("CREATE TABLE foo (x DATE GENERATED ALWAYS AS (CAST(y AS DATE)))") + + self.validate_all( + "CREATE TABLE foo (x INT GENERATED ALWAYS AS (YEAR(y)))", + write={ + "databricks": "CREATE TABLE foo (x INT GENERATED ALWAYS AS (YEAR(TO_DATE(y))))", + }, + ) # https://docs.databricks.com/sql/language-manual/functions/colonsign.html def test_json(self): diff --git a/tests/dialects/test_dialect.py b/tests/dialects/test_dialect.py index f12273b..e144e81 100644 --- a/tests/dialects/test_dialect.py +++ b/tests/dialects/test_dialect.py @@ -574,7 +574,7 @@ class TestDialect(Validator): "duckdb": "x + INTERVAL 1 DAY", "hive": "DATE_ADD(x, 1)", "mysql": "DATE_ADD(x, INTERVAL 1 DAY)", - "postgres": "x + INTERVAL '1' DAY", + "postgres": "x + INTERVAL '1 DAY'", "presto": "DATE_ADD('DAY', 1, x)", "snowflake": "DATEADD(DAY, 1, x)", "spark": "DATE_ADD(x, 1)", @@ -869,7 +869,7 @@ class TestDialect(Validator): "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname", write={ "bigquery": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname", - "duckdb": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname", + "duckdb": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname, lname NULLS FIRST", "oracle": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname", "presto": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname, lname NULLS FIRST", "hive": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname", @@ -1354,7 +1354,7 @@ class TestDialect(Validator): }, write={ "hive": "CREATE INDEX my_idx ON TABLE tbl (a, b)", - "postgres": "CREATE INDEX my_idx ON tbl (a, b)", + "postgres": "CREATE INDEX my_idx ON tbl (a NULLS FIRST, b NULLS FIRST)", "sqlite": "CREATE INDEX my_idx ON tbl (a, b)", }, ) @@ -1366,7 +1366,7 @@ class TestDialect(Validator): }, write={ "hive": "CREATE UNIQUE INDEX my_idx ON TABLE tbl (a, b)", - "postgres": "CREATE UNIQUE INDEX my_idx ON tbl (a, b)", + "postgres": "CREATE UNIQUE INDEX my_idx ON tbl (a NULLS FIRST, b NULLS FIRST)", "sqlite": "CREATE UNIQUE INDEX my_idx ON tbl (a, b)", }, ) @@ -1415,12 +1415,12 @@ class TestDialect(Validator): }, ) self.validate_all( - "WITH cte1 AS (SELECT a, b FROM table1), cte2 AS (SELECT c, e AS d FROM table2) SELECT b, d AS dd FROM cte1 AS t JOIN cte2 WHERE cte1.a = cte2.c", + "WITH cte1 AS (SELECT a, b FROM table1), cte2 AS (SELECT c, e AS d FROM table2) SELECT b, d AS dd FROM cte1 AS t CROSS JOIN cte2 WHERE cte1.a = cte2.c", write={ - "hive": "WITH cte1 AS (SELECT a, b FROM table1), cte2 AS (SELECT c, e AS d FROM table2) SELECT b, d AS dd FROM cte1 AS t JOIN cte2 WHERE cte1.a = cte2.c", - "oracle": "WITH cte1 AS (SELECT a, b FROM table1), cte2 AS (SELECT c, e AS d FROM table2) SELECT b, d AS dd FROM cte1 t JOIN cte2 WHERE cte1.a = cte2.c", - "postgres": "WITH cte1 AS (SELECT a, b FROM table1), cte2 AS (SELECT c, e AS d FROM table2) SELECT b, d AS dd FROM cte1 AS t JOIN cte2 WHERE cte1.a = cte2.c", - "sqlite": "WITH cte1 AS (SELECT a, b FROM table1), cte2 AS (SELECT c, e AS d FROM table2) SELECT b, d AS dd FROM cte1 AS t JOIN cte2 WHERE cte1.a = cte2.c", + "hive": "WITH cte1 AS (SELECT a, b FROM table1), cte2 AS (SELECT c, e AS d FROM table2) SELECT b, d AS dd FROM cte1 AS t CROSS JOIN cte2 WHERE cte1.a = cte2.c", + "oracle": "WITH cte1 AS (SELECT a, b FROM table1), cte2 AS (SELECT c, e AS d FROM table2) SELECT b, d AS dd FROM cte1 t CROSS JOIN cte2 WHERE cte1.a = cte2.c", + "postgres": "WITH cte1 AS (SELECT a, b FROM table1), cte2 AS (SELECT c, e AS d FROM table2) SELECT b, d AS dd FROM cte1 AS t CROSS JOIN cte2 WHERE cte1.a = cte2.c", + "sqlite": "WITH cte1 AS (SELECT a, b FROM table1), cte2 AS (SELECT c, e AS d FROM table2) SELECT b, d AS dd FROM cte1 AS t CROSS JOIN cte2 WHERE cte1.a = cte2.c", }, ) diff --git a/tests/dialects/test_duckdb.py b/tests/dialects/test_duckdb.py index 8c1b748..ce6b122 100644 --- a/tests/dialects/test_duckdb.py +++ b/tests/dialects/test_duckdb.py @@ -1,4 +1,4 @@ -from sqlglot import ErrorLevel, UnsupportedError, transpile +from sqlglot import ErrorLevel, UnsupportedError, exp, parse_one, transpile from tests.dialects.test_dialect import Validator @@ -124,6 +124,20 @@ class TestDuckDB(Validator): ) def test_duckdb(self): + # https://github.com/duckdb/duckdb/releases/tag/v0.8.0 + self.assertEqual( + parse_one("a / b", read="duckdb").assert_is(exp.Div).sql(dialect="duckdb"), "a / b" + ) + self.assertEqual( + parse_one("a // b", read="duckdb").assert_is(exp.IntDiv).sql(dialect="duckdb"), "a // b" + ) + + self.validate_identity("PIVOT Cities ON Year USING SUM(Population)") + self.validate_identity("PIVOT Cities ON Year USING FIRST(Population)") + self.validate_identity("PIVOT Cities ON Year USING SUM(Population) GROUP BY Country") + self.validate_identity("PIVOT Cities ON Country, Name USING SUM(Population)") + self.validate_identity("PIVOT Cities ON Country || '_' || Name USING SUM(Population)") + self.validate_identity("PIVOT Cities ON Year USING SUM(Population) GROUP BY Country, Name") self.validate_identity("SELECT {'a': 1} AS x") self.validate_identity("SELECT {'a': {'b': {'c': 1}}, 'd': {'e': 2}} AS x") self.validate_identity("SELECT {'x': 1, 'y': 2, 'z': 3}") @@ -138,9 +152,36 @@ class TestDuckDB(Validator): self.validate_identity( "SELECT a['x space'] FROM (SELECT {'x space': 1, 'y': 2, 'z': 3} AS a)" ) + self.validate_identity( + "PIVOT Cities ON Year IN (2000, 2010) USING SUM(Population) GROUP BY Country" + ) + self.validate_identity( + "PIVOT Cities ON Year USING SUM(Population) AS total, MAX(Population) AS max GROUP BY Country" + ) + self.validate_identity( + "WITH pivot_alias AS (PIVOT Cities ON Year USING SUM(Population) GROUP BY Country) SELECT * FROM pivot_alias" + ) + self.validate_identity( + "SELECT * FROM (PIVOT Cities ON Year USING SUM(Population) GROUP BY Country) AS pivot_alias" + ) + self.validate_all("FROM (FROM tbl)", write={"duckdb": "SELECT * FROM (SELECT * FROM tbl)"}) + self.validate_all("FROM tbl", write={"duckdb": "SELECT * FROM tbl"}) self.validate_all("0b1010", write={"": "0 AS b1010"}) self.validate_all("0x1010", write={"": "0 AS x1010"}) + self.validate_all("x ~ y", write={"duckdb": "REGEXP_MATCHES(x, y)"}) + self.validate_all("SELECT * FROM 'x.y'", write={"duckdb": 'SELECT * FROM "x.y"'}) + self.validate_all( + "PIVOT_WIDER Cities ON Year USING SUM(Population)", + write={"duckdb": "PIVOT Cities ON Year USING SUM(Population)"}, + ) + self.validate_all( + "WITH t AS (SELECT 1) FROM t", write={"duckdb": "WITH t AS (SELECT 1) SELECT * FROM t"} + ) + self.validate_all( + "WITH t AS (SELECT 1) SELECT * FROM (FROM t)", + write={"duckdb": "WITH t AS (SELECT 1) SELECT * FROM (SELECT * FROM t)"}, + ) self.validate_all( """SELECT DATEDIFF('day', t1."A", t1."B") FROM "table" AS t1""", write={ @@ -155,8 +196,6 @@ class TestDuckDB(Validator): "trino": "SELECT DATE_DIFF('day', CAST('2020-01-01' AS DATE), CAST('2020-01-05' AS DATE))", }, ) - self.validate_all("x ~ y", write={"duckdb": "REGEXP_MATCHES(x, y)"}) - self.validate_all("SELECT * FROM 'x.y'", write={"duckdb": 'SELECT * FROM "x.y"'}) self.validate_all( "WITH 'x' AS (SELECT 1) SELECT * FROM x", write={"duckdb": 'WITH "x" AS (SELECT 1) SELECT * FROM x'}, @@ -341,7 +380,8 @@ class TestDuckDB(Validator): self.validate_all( "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname", write={ - "duckdb": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname", + "": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname NULLS LAST", + "duckdb": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname, lname", }, ) self.validate_all( @@ -370,13 +410,14 @@ class TestDuckDB(Validator): "hive": "SELECT DATE_ADD(TO_DATE(x), 1)", }, ) - - with self.assertRaises(UnsupportedError): - transpile( - "SELECT a FROM b PIVOT(SUM(x) FOR y IN ('z', 'q'))", - read="duckdb", - unsupported_level=ErrorLevel.IMMEDIATE, - ) + self.validate_all( + "SELECT CAST('2020-05-06' AS DATE) - INTERVAL 5 DAY", + read={"bigquery": "SELECT DATE_SUB(CAST('2020-05-06' AS DATE), INTERVAL 5 DAY)"}, + ) + self.validate_all( + "SELECT CAST('2020-05-06' AS DATE) + INTERVAL 5 DAY", + read={"bigquery": "SELECT DATE_ADD(CAST('2020-05-06' AS DATE), INTERVAL 5 DAY)"}, + ) with self.assertRaises(UnsupportedError): transpile( @@ -481,3 +522,12 @@ class TestDuckDB(Validator): "SELECT a, LOGICAL_OR(b) FROM table GROUP BY a", write={"duckdb": "SELECT a, BOOL_OR(b) FROM table GROUP BY a"}, ) + + def test_rename_table(self): + self.validate_all( + "ALTER TABLE db.t1 RENAME TO db.t2", + write={ + "snowflake": "ALTER TABLE db.t1 RENAME TO db.t2", + "duckdb": "ALTER TABLE db.t1 RENAME TO t2", + }, + ) diff --git a/tests/dialects/test_hive.py b/tests/dialects/test_hive.py index c69368c..99b5602 100644 --- a/tests/dialects/test_hive.py +++ b/tests/dialects/test_hive.py @@ -4,6 +4,17 @@ from tests.dialects.test_dialect import Validator class TestHive(Validator): dialect = "hive" + def test_hive(self): + self.validate_identity("SELECT * FROM test DISTRIBUTE BY y SORT BY x DESC ORDER BY l") + self.validate_identity( + "SELECT * FROM test WHERE RAND() <= 0.1 DISTRIBUTE BY RAND() SORT BY RAND()" + ) + self.validate_identity("(SELECT 1 UNION SELECT 2) DISTRIBUTE BY z") + self.validate_identity("(SELECT 1 UNION SELECT 2) DISTRIBUTE BY z SORT BY x") + self.validate_identity("(SELECT 1 UNION SELECT 2) CLUSTER BY y DESC") + self.validate_identity("SELECT * FROM test CLUSTER BY y") + self.validate_identity("(SELECT 1 UNION SELECT 2) SORT BY z") + def test_bits(self): self.validate_all( "x & 1", @@ -362,7 +373,7 @@ class TestHive(Validator): self.validate_all( "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname", write={ - "duckdb": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname", + "duckdb": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname, lname NULLS FIRST", "presto": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname, lname NULLS FIRST", "hive": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname", "spark": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname", @@ -512,10 +523,10 @@ class TestHive(Validator): }, ) self.validate_all( - "SELECT * FROM x TABLESAMPLE(10 PERCENT) y", + "SELECT * FROM x.z TABLESAMPLE(10 PERCENT) y", write={ - "hive": "SELECT * FROM x TABLESAMPLE (10 PERCENT) AS y", - "spark": "SELECT * FROM x TABLESAMPLE (10 PERCENT) AS y", + "hive": "SELECT * FROM x.z TABLESAMPLE (10 PERCENT) AS y", + "spark": "SELECT * FROM x.z TABLESAMPLE (10 PERCENT) AS y", }, ) self.validate_all( @@ -549,6 +560,12 @@ class TestHive(Validator): }, ) self.validate_all( + "STRUCT(a = b, c = d)", + read={ + "snowflake": "OBJECT_CONSTRUCT(a, b, c, d)", + }, + ) + self.validate_all( "MAP(a, b, c, d)", read={ "": "VAR_MAP(a, b, c, d)", @@ -557,7 +574,6 @@ class TestHive(Validator): "hive": "MAP(a, b, c, d)", "presto": "MAP(ARRAY[a, c], ARRAY[b, d])", "spark": "MAP(a, b, c, d)", - "snowflake": "OBJECT_CONSTRUCT(a, b, c, d)", }, write={ "": "MAP(ARRAY(a, c), ARRAY(b, d))", @@ -627,7 +643,7 @@ class TestHive(Validator): self.validate_all( "x div y", write={ - "duckdb": "CAST(x / y AS INT)", + "duckdb": "x // y", "presto": "CAST(x / y AS INTEGER)", "hive": "CAST(x / y AS INT)", "spark": "CAST(x / y AS INT)", diff --git a/tests/dialects/test_mysql.py b/tests/dialects/test_mysql.py index f31b1b9..a80153b 100644 --- a/tests/dialects/test_mysql.py +++ b/tests/dialects/test_mysql.py @@ -6,6 +6,10 @@ class TestMySQL(Validator): dialect = "mysql" def test_ddl(self): + self.validate_identity( + "INSERT INTO x VALUES (1, 'a', 2.0) ON DUPLICATE KEY UPDATE SET x.id = 1" + ) + self.validate_all( "CREATE TABLE z (a INT) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x'", write={ @@ -21,10 +25,6 @@ class TestMySQL(Validator): "mysql": "CREATE TABLE t (c DATETIME DEFAULT CURRENT_TIMESTAMP() ON UPDATE CURRENT_TIMESTAMP()) DEFAULT CHARACTER SET=utf8 ROW_FORMAT=DYNAMIC", }, ) - self.validate_identity( - "INSERT INTO x VALUES (1, 'a', 2.0) ON DUPLICATE KEY UPDATE SET x.id = 1" - ) - self.validate_all( "CREATE TABLE x (id int not null auto_increment, primary key (id))", write={ @@ -37,6 +37,12 @@ class TestMySQL(Validator): "sqlite": "CREATE TABLE x (id INTEGER NOT NULL)", }, ) + self.validate_all( + "CREATE TABLE `foo` (`id` char(36) NOT NULL DEFAULT (uuid()), PRIMARY KEY (`id`), UNIQUE KEY `id` (`id`))", + write={ + "mysql": "CREATE TABLE `foo` (`id` CHAR(36) NOT NULL DEFAULT (UUID()), PRIMARY KEY (`id`), UNIQUE `id` (`id`))", + }, + ) def test_identity(self): self.validate_identity("SELECT CURRENT_TIMESTAMP(6)") @@ -47,8 +53,11 @@ class TestMySQL(Validator): self.validate_identity("SELECT TRIM(BOTH 'bla' FROM ' XXX ')") self.validate_identity("SELECT TRIM('bla' FROM ' XXX ')") self.validate_identity("@@GLOBAL.max_connections") - self.validate_identity("CREATE TABLE A LIKE B") + self.validate_identity("SELECT * FROM t1, t2 FOR SHARE OF t1, t2 SKIP LOCKED") + self.validate_identity( + "SELECT * FROM t1, t2, t3 FOR SHARE OF t1 NOWAIT FOR UPDATE OF t2, t3 SKIP LOCKED" + ) # SET Commands self.validate_identity("SET @var_name = expr") @@ -369,6 +378,9 @@ class TestMySQL(Validator): def test_mysql(self): self.validate_all( + "SELECT * FROM t LOCK IN SHARE MODE", write={"mysql": "SELECT * FROM t FOR SHARE"} + ) + self.validate_all( "SELECT DATE(DATE_SUB(`dt`, INTERVAL DAYOFMONTH(`dt`) - 1 DAY)) AS __timestamp FROM tableT", write={ "mysql": "SELECT DATE(DATE_SUB(`dt`, INTERVAL (DAYOFMONTH(`dt`) - 1) DAY)) AS __timestamp FROM tableT", diff --git a/tests/dialects/test_oracle.py b/tests/dialects/test_oracle.py index 88c79fd..12ff699 100644 --- a/tests/dialects/test_oracle.py +++ b/tests/dialects/test_oracle.py @@ -5,6 +5,17 @@ class TestOracle(Validator): dialect = "oracle" def test_oracle(self): + self.validate_identity("SELECT * FROM t FOR UPDATE") + self.validate_identity("SELECT * FROM t FOR UPDATE WAIT 5") + self.validate_identity("SELECT * FROM t FOR UPDATE NOWAIT") + self.validate_identity("SELECT * FROM t FOR UPDATE SKIP LOCKED") + self.validate_identity("SELECT * FROM t FOR UPDATE OF s.t.c, s.t.v") + self.validate_identity("SELECT * FROM t FOR UPDATE OF s.t.c, s.t.v NOWAIT") + self.validate_identity("SELECT * FROM t FOR UPDATE OF s.t.c, s.t.v SKIP LOCKED") + self.validate_identity("SELECT STANDARD_HASH('hello')") + self.validate_identity("SELECT STANDARD_HASH('hello', 'MD5')") + self.validate_identity("SELECT CAST(NULL AS VARCHAR2(2328 CHAR)) AS COL1") + self.validate_identity("SELECT CAST(NULL AS VARCHAR2(2328 BYTE)) AS COL1") self.validate_identity("SELECT * FROM table_name@dblink_name.database_link_domain") self.validate_identity("SELECT * FROM table_name SAMPLE (25) s") self.validate_identity("SELECT * FROM V$SESSION") diff --git a/tests/dialects/test_postgres.py b/tests/dialects/test_postgres.py index b535a84..1f288c6 100644 --- a/tests/dialects/test_postgres.py +++ b/tests/dialects/test_postgres.py @@ -85,6 +85,9 @@ class TestPostgres(Validator): ) def test_postgres(self): + self.validate_identity( + """LAST_VALUE("col1") OVER (ORDER BY "col2" RANGE BETWEEN INTERVAL '1 day' PRECEDING AND '1 month' FOLLOWING)""" + ) self.validate_identity("SELECT ARRAY[1, 2, 3] @> ARRAY[1, 2]") self.validate_identity("SELECT ARRAY[1, 2, 3] <@ ARRAY[1, 2]") self.validate_identity("SELECT ARRAY[1, 2, 3] && ARRAY[1, 2]") @@ -158,7 +161,7 @@ class TestPostgres(Validator): write={ "postgres": "SELECT EXTRACT(minute FROM CAST('2023-01-04 04:05:06.789' AS TIMESTAMP))", "redshift": "SELECT EXTRACT(minute FROM CAST('2023-01-04 04:05:06.789' AS TIMESTAMP))", - "snowflake": "SELECT EXTRACT(minute FROM CAST('2023-01-04 04:05:06.789' AS TIMESTAMPNTZ))", + "snowflake": "SELECT DATE_PART(minute, CAST('2023-01-04 04:05:06.789' AS TIMESTAMPNTZ))", }, ) self.validate_all( @@ -166,7 +169,7 @@ class TestPostgres(Validator): write={ "postgres": "SELECT EXTRACT(month FROM CAST('20220502' AS DATE))", "redshift": "SELECT EXTRACT(month FROM CAST('20220502' AS DATE))", - "snowflake": "SELECT EXTRACT(month FROM CAST('20220502' AS DATE))", + "snowflake": "SELECT DATE_PART(month, CAST('20220502' AS DATE))", }, ) self.validate_all( @@ -193,7 +196,7 @@ class TestPostgres(Validator): self.validate_all( "GENERATE_SERIES(a, b, ' 2 days ')", write={ - "postgres": "GENERATE_SERIES(a, b, INTERVAL '2' day)", + "postgres": "GENERATE_SERIES(a, b, INTERVAL '2 days')", "presto": "SEQUENCE(a, b, INTERVAL '2' day)", "trino": "SEQUENCE(a, b, INTERVAL '2' day)", }, @@ -201,7 +204,7 @@ class TestPostgres(Validator): self.validate_all( "GENERATE_SERIES('2019-01-01'::TIMESTAMP, NOW(), '1day')", write={ - "postgres": "GENERATE_SERIES(CAST('2019-01-01' AS TIMESTAMP), CURRENT_TIMESTAMP, INTERVAL '1' day)", + "postgres": "GENERATE_SERIES(CAST('2019-01-01' AS TIMESTAMP), CURRENT_TIMESTAMP, INTERVAL '1 day')", "presto": "SEQUENCE(TRY_CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' day)", "trino": "SEQUENCE(TRY_CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' day)", }, @@ -234,6 +237,15 @@ class TestPostgres(Validator): }, ) self.validate_all( + "SELECT * FROM t CROSS JOIN GENERATE_SERIES(2, 4) AS s", + write={ + "postgres": "SELECT * FROM t CROSS JOIN GENERATE_SERIES(2, 4) AS s", + "presto": "SELECT * FROM t CROSS JOIN UNNEST(SEQUENCE(2, 4)) AS _u(s)", + "trino": "SELECT * FROM t CROSS JOIN UNNEST(SEQUENCE(2, 4)) AS _u(s)", + "tsql": "SELECT * FROM t CROSS JOIN GENERATE_SERIES(2, 4) AS s", + }, + ) + self.validate_all( "END WORK AND NO CHAIN", write={"postgres": "COMMIT AND NO CHAIN"}, ) diff --git a/tests/dialects/test_presto.py b/tests/dialects/test_presto.py index 15962cc..1f5953c 100644 --- a/tests/dialects/test_presto.py +++ b/tests/dialects/test_presto.py @@ -438,6 +438,36 @@ class TestPresto(Validator): self.validate_all("(5 * INTERVAL '7' day)", read={"": "INTERVAL '5' week"}) self.validate_all("(5 * INTERVAL '7' day)", read={"": "INTERVAL '5' WEEKS"}) self.validate_all( + "WITH RECURSIVE t(n) AS (SELECT 1 AS n UNION ALL SELECT n + 1 AS n FROM t WHERE n < 4) SELECT SUM(n) FROM t", + read={ + "postgres": "WITH RECURSIVE t AS (SELECT 1 AS n UNION ALL SELECT n + 1 AS n FROM t WHERE n < 4) SELECT SUM(n) FROM t", + }, + ) + self.validate_all( + "WITH RECURSIVE t(n, k) AS (SELECT 1 AS n, 2 AS k) SELECT SUM(n) FROM t", + read={ + "postgres": "WITH RECURSIVE t AS (SELECT 1 AS n, 2 as k) SELECT SUM(n) FROM t", + }, + ) + self.validate_all( + "WITH RECURSIVE t1(n) AS (SELECT 1 AS n), t2(n) AS (SELECT 2 AS n) SELECT SUM(t1.n), SUM(t2.n) FROM t1, t2", + read={ + "postgres": "WITH RECURSIVE t1 AS (SELECT 1 AS n), t2 AS (SELECT 2 AS n) SELECT SUM(t1.n), SUM(t2.n) FROM t1, t2", + }, + ) + self.validate_all( + "WITH RECURSIVE t(n, _c_0) AS (SELECT 1 AS n, (1 + 2)) SELECT * FROM t", + read={ + "postgres": "WITH RECURSIVE t AS (SELECT 1 AS n, (1 + 2)) SELECT * FROM t", + }, + ) + self.validate_all( + 'WITH RECURSIVE t(n, "1") AS (SELECT n, 1 FROM tbl) SELECT * FROM t', + read={ + "postgres": "WITH RECURSIVE t AS (SELECT n, 1 FROM tbl) SELECT * FROM t", + }, + ) + self.validate_all( "SELECT JSON_OBJECT(KEY 'key1' VALUE 1, KEY 'key2' VALUE TRUE)", write={ "presto": "SELECT JSON_OBJECT('key1': 1, 'key2': TRUE)", @@ -757,3 +787,27 @@ class TestPresto(Validator): "SELECT col, pos, pos_2, col_2 FROM _u CROSS JOIN UNNEST(SEQUENCE(2, 3)) WITH ORDINALITY AS _u_2(col_2, pos_2)", read={"spark": "SELECT col, pos, POSEXPLODE(SEQUENCE(2, 3)) FROM _u"}, ) + + def test_match_recognize(self): + self.validate_identity( + """SELECT + * +FROM orders +MATCH_RECOGNIZE ( + PARTITION BY custkey + ORDER BY + orderdate + MEASURES + A.totalprice AS starting_price, + LAST(B.totalprice) AS bottom_price, + LAST(C.totalprice) AS top_price + ONE ROW PER MATCH + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+ C+ D+) + DEFINE + B AS totalprice < PREV(totalprice), + C AS totalprice > PREV(totalprice) AND totalprice <= A.totalprice, + D AS totalprice > PREV(totalprice) +)""", + pretty=True, + ) diff --git a/tests/dialects/test_redshift.py b/tests/dialects/test_redshift.py index f75480e..6707b7a 100644 --- a/tests/dialects/test_redshift.py +++ b/tests/dialects/test_redshift.py @@ -11,10 +11,10 @@ class TestRedshift(Validator): self.validate_identity("$foo") self.validate_all( - "SELECT SNAPSHOT", + "SELECT SNAPSHOT, type", write={ - "": "SELECT SNAPSHOT", - "redshift": 'SELECT "SNAPSHOT"', + "": "SELECT SNAPSHOT, type", + "redshift": 'SELECT "SNAPSHOT", "type"', }, ) @@ -31,7 +31,7 @@ class TestRedshift(Validator): write={ "postgres": "SELECT EXTRACT(minute FROM CAST('2023-01-04 04:05:06.789' AS TIMESTAMP))", "redshift": "SELECT EXTRACT(minute FROM CAST('2023-01-04 04:05:06.789' AS TIMESTAMP))", - "snowflake": "SELECT EXTRACT(minute FROM CAST('2023-01-04 04:05:06.789' AS TIMESTAMPNTZ))", + "snowflake": "SELECT DATE_PART(minute, CAST('2023-01-04 04:05:06.789' AS TIMESTAMPNTZ))", }, ) self.validate_all( @@ -39,10 +39,10 @@ class TestRedshift(Validator): write={ "postgres": "SELECT EXTRACT(month FROM CAST('20220502' AS DATE))", "redshift": "SELECT EXTRACT(month FROM CAST('20220502' AS DATE))", - "snowflake": "SELECT EXTRACT(month FROM CAST('20220502' AS DATE))", + "snowflake": "SELECT DATE_PART(month, CAST('20220502' AS DATE))", }, ) - self.validate_all("SELECT INTERVAL '5 day'", read={"": "SELECT INTERVAL '5' days"}) + self.validate_all("SELECT INTERVAL '5 days'", read={"": "SELECT INTERVAL '5' days"}) self.validate_all("CONVERT(INTEGER, x)", write={"redshift": "CAST(x AS INTEGER)"}) self.validate_all( "DATEADD('day', ndays, caldate)", write={"redshift": "DATEADD(day, ndays, caldate)"} @@ -65,7 +65,7 @@ class TestRedshift(Validator): "SELECT ST_AsEWKT(ST_GeomFromEWKT('SRID=4326;POINT(10 20)')::geography)", write={ "redshift": "SELECT ST_ASEWKT(CAST(ST_GEOMFROMEWKT('SRID=4326;POINT(10 20)') AS GEOGRAPHY))", - "bigquery": "SELECT ST_ASEWKT(TRY_CAST(ST_GEOMFROMEWKT('SRID=4326;POINT(10 20)') AS GEOGRAPHY))", + "bigquery": "SELECT ST_ASEWKT(SAFE_CAST(ST_GEOMFROMEWKT('SRID=4326;POINT(10 20)') AS GEOGRAPHY))", }, ) self.validate_all( @@ -182,6 +182,16 @@ class TestRedshift(Validator): def test_values(self): self.validate_all( + "SELECT * FROM (VALUES (1, 2)) AS t", + write={"redshift": "SELECT * FROM (SELECT 1, 2) AS t"}, + ) + self.validate_all( + "SELECT * FROM (VALUES (1)) AS t1(id) CROSS JOIN (VALUES (1)) AS t2(id)", + write={ + "redshift": "SELECT * FROM (SELECT 1 AS id) AS t1 CROSS JOIN (SELECT 1 AS id) AS t2", + }, + ) + self.validate_all( "SELECT a, b FROM (VALUES (1, 2)) AS t (a, b)", write={ "redshift": "SELECT a, b FROM (SELECT 1 AS a, 2 AS b) AS t", diff --git a/tests/dialects/test_snowflake.py b/tests/dialects/test_snowflake.py index 57ee235..941f2aa 100644 --- a/tests/dialects/test_snowflake.py +++ b/tests/dialects/test_snowflake.py @@ -6,6 +6,8 @@ class TestSnowflake(Validator): dialect = "snowflake" def test_snowflake(self): + self.validate_identity("INITCAP('iqamqinterestedqinqthisqtopic', 'q')") + self.validate_identity("CAST(x AS GEOMETRY)") self.validate_identity("OBJECT_CONSTRUCT(*)") self.validate_identity("SELECT TO_DATE('2019-02-28') + INTERVAL '1 day, 1 year'") self.validate_identity("SELECT CAST('2021-01-01' AS DATE) + INTERVAL '1 DAY'") @@ -25,7 +27,21 @@ class TestSnowflake(Validator): 'COPY INTO NEW_TABLE ("foo", "bar") FROM (SELECT $1, $2, $3, $4 FROM @%old_table)' ) self.validate_identity("COMMENT IF EXISTS ON TABLE foo IS 'bar'") + self.validate_identity("SELECT CONVERT_TIMEZONE('UTC', 'America/Los_Angeles', col)") + self.validate_all("CAST(x AS CHAR VARYING)", write={"snowflake": "CAST(x AS VARCHAR)"}) + self.validate_all("CAST(x AS CHARACTER VARYING)", write={"snowflake": "CAST(x AS VARCHAR)"}) + self.validate_all("CAST(x AS NCHAR VARYING)", write={"snowflake": "CAST(x AS VARCHAR)"}) + self.validate_all( + "OBJECT_CONSTRUCT(a, b, c, d)", + read={ + "": "STRUCT(a as b, c as d)", + }, + write={ + "duckdb": "{'a': b, 'c': d}", + "snowflake": "OBJECT_CONSTRUCT(a, b, c, d)", + }, + ) self.validate_all( "SELECT i, p, o FROM qt QUALIFY ROW_NUMBER() OVER (PARTITION BY p ORDER BY o) = 1", write={ @@ -284,7 +300,7 @@ class TestSnowflake(Validator): self.validate_all( "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname", write={ - "duckdb": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname NULLS LAST", + "duckdb": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname, lname", "postgres": "SELECT fname, lname, age FROM person ORDER BY age DESC, fname, lname", "presto": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname, lname", "hive": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname NULLS LAST", @@ -458,7 +474,8 @@ class TestSnowflake(Validator): ) def test_timestamps(self): - self.validate_identity("SELECT EXTRACT(month FROM a)") + self.validate_identity("SELECT CAST('12:00:00' AS TIME)") + self.validate_identity("SELECT DATE_PART(month, a)") self.validate_all( "SELECT CAST(a AS TIMESTAMP)", @@ -487,19 +504,19 @@ class TestSnowflake(Validator): self.validate_all( "SELECT EXTRACT('month', a)", write={ - "snowflake": "SELECT EXTRACT('month' FROM a)", + "snowflake": "SELECT DATE_PART('month', a)", }, ) self.validate_all( "SELECT DATE_PART('month', a)", write={ - "snowflake": "SELECT EXTRACT('month' FROM a)", + "snowflake": "SELECT DATE_PART('month', a)", }, ) self.validate_all( "SELECT DATE_PART(month, a::DATETIME)", write={ - "snowflake": "SELECT EXTRACT(month FROM CAST(a AS DATETIME))", + "snowflake": "SELECT DATE_PART(month, CAST(a AS DATETIME))", }, ) self.validate_all( @@ -554,10 +571,23 @@ class TestSnowflake(Validator): ) def test_ddl(self): + self.validate_identity("CREATE TABLE geospatial_table (id INT, g GEOGRAPHY)") + self.validate_identity("CREATE MATERIALIZED VIEW a COMMENT='...' AS SELECT 1 FROM x") + self.validate_identity("CREATE DATABASE mytestdb_clone CLONE mytestdb") + self.validate_identity("CREATE SCHEMA mytestschema_clone CLONE testschema") + self.validate_identity("CREATE TABLE orders_clone CLONE orders") + self.validate_identity( + "CREATE TABLE orders_clone_restore CLONE orders AT (TIMESTAMP => TO_TIMESTAMP_TZ('04/05/2013 01:02:03', 'mm/dd/yyyy hh24:mi:ss'))" + ) + self.validate_identity( + "CREATE TABLE orders_clone_restore CLONE orders BEFORE (STATEMENT => '8e5d0ca9-005e-44e6-b858-a8f5b37c5726')" + ) self.validate_identity( "CREATE TABLE a (x DATE, y BIGINT) WITH (PARTITION BY (x), integration='q', auto_refresh=TRUE, file_format=(type = parquet))" ) - self.validate_identity("CREATE MATERIALIZED VIEW a COMMENT='...' AS SELECT 1 FROM x") + self.validate_identity( + "CREATE SCHEMA mytestschema_clone_restore CLONE testschema BEFORE (TIMESTAMP => TO_TIMESTAMP(40 * 365 * 86400))" + ) self.validate_all( "CREATE OR REPLACE TRANSIENT TABLE a (id INT)", @@ -758,7 +788,7 @@ FROM persons AS p, LATERAL FLATTEN(input => p.c, path => 'contact') AS f, LATERA def test_values(self): self.validate_all( - 'SELECT c0, c1 FROM (VALUES (1, 2), (3, 4)) AS "t0"(c0, c1)', + 'SELECT "c0", "c1" FROM (VALUES (1, 2), (3, 4)) AS "t0"("c0", "c1")', read={ "spark": "SELECT `c0`, `c1` FROM (VALUES (1, 2), (3, 4)) AS `t0`(`c0`, `c1`)", }, diff --git a/tests/dialects/test_spark.py b/tests/dialects/test_spark.py index be03b4e..bcfd984 100644 --- a/tests/dialects/test_spark.py +++ b/tests/dialects/test_spark.py @@ -39,8 +39,8 @@ class TestSpark(Validator): "CREATE TABLE x USING ICEBERG PARTITIONED BY (MONTHS(y)) LOCATION 's3://z'", write={ "duckdb": "CREATE TABLE x", - "presto": "CREATE TABLE x WITH (TABLE_FORMAT='ICEBERG', PARTITIONED_BY=ARRAY['MONTHS'])", - "hive": "CREATE TABLE x USING ICEBERG PARTITIONED BY (MONTHS(y)) LOCATION 's3://z'", + "presto": "CREATE TABLE x WITH (FORMAT='ICEBERG', PARTITIONED_BY=ARRAY['MONTHS'])", + "hive": "CREATE TABLE x STORED AS ICEBERG PARTITIONED BY (MONTHS(y)) LOCATION 's3://z'", "spark": "CREATE TABLE x USING ICEBERG PARTITIONED BY (MONTHS(y)) LOCATION 's3://z'", }, ) @@ -113,6 +113,13 @@ TBLPROPERTIES ( "spark": "ALTER TABLE StudentInfo DROP COLUMNS (LastName, DOB)", }, ) + self.validate_all( + "CREATE TABLE x USING ICEBERG PARTITIONED BY (MONTHS(y)) LOCATION 's3://z'", + identify=True, + write={ + "spark": "CREATE TABLE `x` USING ICEBERG PARTITIONED BY (MONTHS(`y`)) LOCATION 's3://z'", + }, + ) def test_to_date(self): self.validate_all( @@ -207,6 +214,7 @@ TBLPROPERTIES ( ) def test_spark(self): + self.validate_identity("INTERVAL -86 days") self.validate_identity("SELECT UNIX_TIMESTAMP()") self.validate_identity("TRIM(' SparkSQL ')") self.validate_identity("TRIM(BOTH 'SL' FROM 'SSparkSQLS')") @@ -215,6 +223,18 @@ TBLPROPERTIES ( self.validate_identity("SPLIT(str, pattern, lim)") self.validate_all( + "SELECT piv.Q1 FROM (SELECT * FROM produce PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2'))) AS piv", + read={ + "snowflake": "SELECT piv.Q1 FROM produce PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2')) piv", + }, + ) + self.validate_all( + "SELECT piv.Q1 FROM (SELECT * FROM (SELECT * FROM produce) PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2'))) AS piv", + read={ + "snowflake": "SELECT piv.Q1 FROM (SELECT * FROM produce) PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2')) piv", + }, + ) + self.validate_all( "SELECT * FROM produce PIVOT(SUM(produce.sales) FOR quarter IN ('Q1', 'Q2'))", read={ "snowflake": "SELECT * FROM produce PIVOT (SUM(produce.sales) FOR produce.quarter IN ('Q1', 'Q2'))", @@ -301,7 +321,7 @@ TBLPROPERTIES ( "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname", write={ "clickhouse": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname, lname NULLS FIRST", - "duckdb": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname", + "duckdb": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname, lname NULLS FIRST", "postgres": "SELECT fname, lname, age FROM person ORDER BY age DESC, fname, lname NULLS FIRST", "presto": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname, lname NULLS FIRST", "hive": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname", diff --git a/tests/dialects/test_teradata.py b/tests/dialects/test_teradata.py index dcb513d..03eb02f 100644 --- a/tests/dialects/test_teradata.py +++ b/tests/dialects/test_teradata.py @@ -24,6 +24,13 @@ class TestTeradata(Validator): def test_create(self): self.validate_identity("CREATE TABLE x (y INT) PRIMARY INDEX (y) PARTITION BY y INDEX (y)") + self.validate_identity("CREATE TABLE x (y INT) PARTITION BY y INDEX (y)") + self.validate_identity( + "CREATE MULTISET VOLATILE TABLE my_table (id INT) PRIMARY INDEX (id) ON COMMIT PRESERVE ROWS" + ) + self.validate_identity( + "CREATE SET VOLATILE TABLE my_table (id INT) PRIMARY INDEX (id) ON COMMIT DELETE ROWS" + ) self.validate_identity( "CREATE TABLE a (b INT) PRIMARY INDEX (y) PARTITION BY RANGE_N(b BETWEEN 'a', 'b' AND 'c' EACH '1')" ) @@ -35,10 +42,20 @@ class TestTeradata(Validator): ) self.validate_all( + """ + CREATE SET TABLE test, NO FALLBACK, NO BEFORE JOURNAL, NO AFTER JOURNAL, + CHECKSUM = DEFAULT (x INT, y INT, z CHAR(30), a INT, b DATE, e INT) + PRIMARY INDEX (a), + INDEX(x, y) + """, + write={ + "teradata": "CREATE SET TABLE test, NO FALLBACK, NO BEFORE JOURNAL, NO AFTER JOURNAL, CHECKSUM=DEFAULT (x INT, y INT, z CHAR(30), a INT, b DATE, e INT) PRIMARY INDEX (a) INDEX (x, y)", + }, + ) + self.validate_all( "REPLACE VIEW a AS (SELECT b FROM c)", write={"teradata": "CREATE OR REPLACE VIEW a AS (SELECT b FROM c)"}, ) - self.validate_all( "CREATE VOLATILE TABLE a", write={ diff --git a/tests/dialects/test_tsql.py b/tests/dialects/test_tsql.py index 3a3ac73..8789aed 100644 --- a/tests/dialects/test_tsql.py +++ b/tests/dialects/test_tsql.py @@ -467,6 +467,11 @@ WHERE def test_add_date(self): self.validate_identity("SELECT DATEADD(year, 1, '2017/08/25')") + + self.validate_all( + "DATEADD(year, 50, '2006-07-31')", + write={"bigquery": "DATE_ADD('2006-07-31', INTERVAL 50 YEAR)"}, + ) self.validate_all( "SELECT DATEADD(year, 1, '2017/08/25')", write={"spark": "SELECT ADD_MONTHS('2017/08/25', 12)"}, @@ -525,7 +530,7 @@ WHERE self.validate_all( "SELECT x.a, x.b, t.v, t.y FROM x CROSS APPLY (SELECT v, y FROM t) t(v, y)", write={ - "spark": "SELECT x.a, x.b, t.v, t.y FROM x JOIN LATERAL (SELECT v, y FROM t) AS t(v, y)", + "spark": "SELECT x.a, x.b, t.v, t.y FROM x, LATERAL (SELECT v, y FROM t) AS t(v, y)", }, ) self.validate_all( @@ -545,7 +550,7 @@ WHERE self.validate_all( "SELECT t.x, y.z FROM x CROSS APPLY tvfTest(t.x)y(z)", write={ - "spark": "SELECT t.x, y.z FROM x JOIN LATERAL TVFTEST(t.x) AS y(z)", + "spark": "SELECT t.x, y.z FROM x, LATERAL TVFTEST(t.x) AS y(z)", }, ) self.validate_all( @@ -637,7 +642,7 @@ WHERE self.assertIsInstance(expr.this, exp.Var) self.assertEqual(expr.sql("tsql"), "@x") - table = parse_one("select * from @x", read="tsql").args["from"].expressions[0] + table = parse_one("select * from @x", read="tsql").args["from"].this self.assertIsInstance(table, exp.Table) self.assertIsInstance(table.this, exp.Parameter) self.assertIsInstance(table.this.this, exp.Var) @@ -724,3 +729,44 @@ WHERE }, ) self.validate_identity("SELECT x FROM a INNER LOOP JOIN b ON b.id = a.id") + + def test_openjson(self): + self.validate_identity("SELECT * FROM OPENJSON(@json)") + + self.validate_all( + """SELECT [key], value FROM OPENJSON(@json,'$.path.to."sub-object"')""", + write={ + "tsql": """SELECT "key", value FROM OPENJSON(@json, '$.path.to."sub-object"')""", + }, + ) + self.validate_all( + "SELECT * FROM OPENJSON(@array) WITH (month VARCHAR(3), temp int, month_id tinyint '$.sql:identity()') as months", + write={ + "tsql": "SELECT * FROM OPENJSON(@array) WITH (month VARCHAR(3), temp INTEGER, month_id TINYINT '$.sql:identity()') AS months", + }, + ) + self.validate_all( + """ + SELECT * + FROM OPENJSON ( @json ) + WITH ( + Number VARCHAR(200) '$.Order.Number', + Date DATETIME '$.Order.Date', + Customer VARCHAR(200) '$.AccountNumber', + Quantity INT '$.Item.Quantity', + [Order] NVARCHAR(MAX) AS JSON + ) + """, + write={ + "tsql": """SELECT + * +FROM OPENJSON(@json) WITH ( + Number VARCHAR(200) '$.Order.Number', + Date DATETIME2 '$.Order.Date', + Customer VARCHAR(200) '$.AccountNumber', + Quantity INTEGER '$.Item.Quantity', + "Order" TEXT AS JSON +)""" + }, + pretty=True, + ) diff --git a/tests/fixtures/identity.sql b/tests/fixtures/identity.sql index ea695c9..0a1e305 100644 --- a/tests/fixtures/identity.sql +++ b/tests/fixtures/identity.sql @@ -174,6 +174,8 @@ SET variable = value SET GLOBAL variable = value SET LOCAL variable = value SET @user OFF +@x +@"x" COMMIT USE db USE role x @@ -183,6 +185,7 @@ USE schema x.y NOT 1 NOT NOT 1 SELECT * FROM test +SELECT * FROM db.FOO() SELECT *, 1 FROM test SELECT * FROM a.b SELECT * FROM a.b.c @@ -288,10 +291,6 @@ SELECT a FROM test ORDER BY a, b SELECT x FROM tests ORDER BY a DESC, b DESC, c SELECT a FROM test ORDER BY a > 1 SELECT * FROM test ORDER BY DATE DESC, TIMESTAMP DESC -SELECT * FROM test DISTRIBUTE BY y SORT BY x DESC ORDER BY l -SELECT * FROM test CLUSTER BY y -SELECT * FROM test CLUSTER BY y -SELECT * FROM test WHERE RAND() <= 0.1 DISTRIBUTE BY RAND() SORT BY RAND() SELECT a, b FROM test GROUP BY 1 SELECT a, b FROM test GROUP BY a SELECT a, b FROM test WHERE a = 1 GROUP BY a HAVING a = 2 @@ -414,7 +413,7 @@ SELECT 1 AS delete, 2 AS alter SELECT * FROM (x) SELECT * FROM ((x)) SELECT * FROM ((SELECT 1)) -SELECT * FROM (x LATERAL VIEW EXPLODE(y) JOIN foo) +SELECT * FROM (x CROSS JOIN foo LATERAL VIEW EXPLODE(y)) SELECT * FROM (SELECT 1) AS x SELECT * FROM (SELECT 1 UNION SELECT 2) AS x SELECT * FROM (SELECT 1 UNION ALL SELECT 2) AS x @@ -447,10 +446,6 @@ SELECT 1 UNION (SELECT 2) (SELECT 1) ORDER BY x LIMIT 1 OFFSET 1 (SELECT 1 UNION SELECT 2) UNION (SELECT 2 UNION ALL SELECT 3) (SELECT 1 UNION SELECT 2) ORDER BY x LIMIT 1 OFFSET 1 -(SELECT 1 UNION SELECT 2) CLUSTER BY y DESC -(SELECT 1 UNION SELECT 2) SORT BY z -(SELECT 1 UNION SELECT 2) DISTRIBUTE BY z -(SELECT 1 UNION SELECT 2) DISTRIBUTE BY z SORT BY x SELECT 1 UNION (SELECT 2) ORDER BY x (SELECT 1) UNION SELECT 2 ORDER BY x SELECT * FROM (((SELECT 1) UNION SELECT 2) ORDER BY x LIMIT 1 OFFSET 1) @@ -563,6 +558,7 @@ CREATE TABLE foo (bar INT REFERENCES baz(baz_id) ON UPDATE SET DEFAULT) CREATE TABLE asd AS SELECT asd FROM asd WITH NO DATA CREATE TABLE asd AS SELECT asd FROM asd WITH DATA CREATE TABLE products (x INT GENERATED BY DEFAULT AS IDENTITY) +CREATE TABLE products (x INT GENERATED BY DEFAULT ON NULL AS IDENTITY) CREATE TABLE products (x INT GENERATED ALWAYS AS IDENTITY) CREATE TABLE konyvszerzo (szerzo_azon INT CONSTRAINT konyvszerzo_szerzo_fk REFERENCES szerzo) CREATE TABLE IF NOT EXISTS customer (pk BIGINT NOT NULL GENERATED ALWAYS AS IDENTITY (INCREMENT BY 1)) @@ -610,6 +606,7 @@ CREATE FUNCTION a.b(x INT) RETURNS INT AS RETURN x + 1 CREATE FUNCTION a.b.c() CREATE INDEX abc ON t (a) CREATE INDEX abc ON t (a, b, b) +CREATE INDEX abc ON t (a NULLS LAST) CREATE UNIQUE INDEX abc ON t (a, b, b) CREATE UNIQUE INDEX IF NOT EXISTS my_idx ON tbl (a, b) CREATE SCHEMA x @@ -685,6 +682,7 @@ INSERT OVERWRITE TABLE a.b IF EXISTS SELECT * FROM y INSERT OVERWRITE DIRECTORY 'x' SELECT 1 INSERT OVERWRITE LOCAL DIRECTORY 'x' SELECT 1 INSERT OVERWRITE LOCAL DIRECTORY 'x' ROW FORMAT DELIMITED FIELDS TERMINATED BY '1' COLLECTION ITEMS TERMINATED BY '2' MAP KEYS TERMINATED BY '3' LINES TERMINATED BY '4' NULL DEFINED AS '5' SELECT 1 +LOAD foo LOAD DATA INPATH 'x' INTO TABLE y PARTITION(ds = 'yyyy') LOAD DATA LOCAL INPATH 'x' INTO TABLE y PARTITION(ds = 'yyyy') LOAD DATA LOCAL INPATH 'x' INTO TABLE y PARTITION(ds = 'yyyy') INPUTFORMAT 'y' @@ -721,9 +719,9 @@ SELECT ((SELECT 1) + 1) SELECT * FROM project.dataset.INFORMATION_SCHEMA.TABLES SELECT * FROM (table1 AS t1 LEFT JOIN table2 AS t2 ON 1 = 1) SELECT * FROM (tbl1 LEFT JOIN tbl2 ON 1 = 1) -SELECT * FROM (tbl1 JOIN tbl2 JOIN tbl3) -SELECT * FROM (tbl1 JOIN (tbl2 JOIN tbl3) ON bla = foo) -SELECT * FROM (tbl1 JOIN LATERAL (SELECT * FROM bla) AS tbl) +SELECT * FROM (tbl1, tbl2 JOIN tbl3 ON TRUE) +SELECT * FROM (tbl1 JOIN (tbl2 CROSS JOIN tbl3) ON bla = foo) +SELECT * FROM (tbl1, LATERAL (SELECT * FROM bla) AS tbl) SELECT CAST(x AS INT) /* comment */ FROM foo SELECT a /* x */, b /* x */ SELECT a /* x */ /* y */ /* z */, b /* k */ /* m */ @@ -826,3 +824,5 @@ SELECT NEXT VALUE FOR db.schema.sequence_name OVER (ORDER BY foo), col SELECT PERCENTILE_CONT(x, 0.5) OVER () SELECT PERCENTILE_CONT(x, 0.5 RESPECT NULLS) OVER () SELECT PERCENTILE_CONT(x, 0.5 IGNORE NULLS) OVER () +WITH my_cte AS (SELECT 'a' AS desc) SELECT desc AS description FROM my_cte +WITH my_cte AS (SELECT 'a' AS asc) SELECT asc AS description FROM my_cte diff --git a/tests/fixtures/optimizer/canonicalize.sql b/tests/fixtures/optimizer/canonicalize.sql index ccf2f16..1fc44ef 100644 --- a/tests/fixtures/optimizer/canonicalize.sql +++ b/tests/fixtures/optimizer/canonicalize.sql @@ -10,8 +10,8 @@ SELECT CAST(1 AS VARCHAR) AS "a" FROM "w" AS "w"; SELECT CAST(1 + 3.2 AS DOUBLE) AS a FROM w AS w; SELECT 1 + 3.2 AS "a" FROM "w" AS "w"; -SELECT CAST("2022-01-01" AS DATE) + INTERVAL '1' day; -SELECT CAST("2022-01-01" AS DATE) + INTERVAL '1' day AS "_col_0"; +SELECT CAST('2022-01-01' AS DATE) + INTERVAL '1' day; +SELECT CAST('2022-01-01' AS DATE) + INTERVAL '1' day AS "_col_0"; -------------------------------------- -- Ensure boolean predicates diff --git a/tests/fixtures/optimizer/eliminate_subqueries.sql b/tests/fixtures/optimizer/eliminate_subqueries.sql index 4fa63dd..4c79c22 100644 --- a/tests/fixtures/optimizer/eliminate_subqueries.sql +++ b/tests/fixtures/optimizer/eliminate_subqueries.sql @@ -35,8 +35,8 @@ SELECT * FROM (SELECT * FROM (SELECT a FROM x) AS x) AS y JOIN (SELECT * FROM x) WITH x_2 AS (SELECT a FROM x), y AS (SELECT * FROM x_2 AS x), z AS (SELECT * FROM x) SELECT * FROM y AS y JOIN z AS z ON x.a = y.a; -- Name conflicts with table alias -SELECT a FROM (SELECT a FROM (SELECT a FROM x) AS y) AS z JOIN q AS y; -WITH y AS (SELECT a FROM x), z AS (SELECT a FROM y AS y) SELECT a FROM z AS z JOIN q AS y; +SELECT a FROM (SELECT a FROM (SELECT a FROM x) AS y) AS z CROSS JOIN q AS y; +WITH y AS (SELECT a FROM x), z AS (SELECT a FROM y AS y) SELECT a FROM z AS z CROSS JOIN q AS y; -- Name conflicts with existing CTE WITH y AS (SELECT a FROM (SELECT a FROM x) AS y) SELECT a FROM y; @@ -63,12 +63,12 @@ SELECT a FROM x WHERE b = (SELECT c FROM y WHERE y.a = x.a); SELECT a FROM x WHERE b = (SELECT c FROM y WHERE y.a = x.a); -- Duplicate CTE -SELECT a FROM (SELECT b FROM x) AS y JOIN (SELECT b FROM x) AS z; -WITH y AS (SELECT b FROM x) SELECT a FROM y AS y JOIN y AS z; +SELECT a FROM (SELECT b FROM x) AS y CROSS JOIN (SELECT b FROM x) AS z; +WITH y AS (SELECT b FROM x) SELECT a FROM y AS y CROSS JOIN y AS z; -- Doubly duplicate CTE SELECT * FROM (SELECT * FROM x JOIN (SELECT * FROM x) AS y) AS z JOIN (SELECT * FROM x JOIN (SELECT * FROM x) AS y) AS q; -WITH y AS (SELECT * FROM x), z AS (SELECT * FROM x JOIN y AS y) SELECT * FROM z AS z JOIN z AS q; +WITH y AS (SELECT * FROM x), z AS (SELECT * FROM x, y AS y) SELECT * FROM z AS z, z AS q; -- Another duplicate... SELECT x.id FROM (SELECT * FROM x AS x JOIN y AS y ON x.id = y.id) AS x JOIN (SELECT * FROM x AS x JOIN y AS y ON x.id = y.id) AS y ON x.id = y.id; @@ -79,8 +79,8 @@ WITH x_2 AS (SELECT * FROM x AS x JOIN y AS y ON x.id = y.id) SELECT x.id FROM x (WITH cte AS (SELECT * FROM x) SELECT * FROM cte AS cte) LIMIT 1; -- Existing duplicate CTE -WITH y AS (SELECT a FROM x) SELECT a FROM (SELECT a FROM x) AS y JOIN y AS z; -WITH y AS (SELECT a FROM x) SELECT a FROM y AS y JOIN y AS z; +WITH y AS (SELECT a FROM x) SELECT a FROM (SELECT a FROM x) AS y CROSS JOIN y AS z; +WITH y AS (SELECT a FROM x) SELECT a FROM y AS y CROSS JOIN y AS z; -- Nested CTE WITH cte1 AS (SELECT a FROM x) SELECT a FROM (WITH cte2 AS (SELECT a FROM cte1) SELECT a FROM cte2); diff --git a/tests/fixtures/optimizer/expand_laterals.sql b/tests/fixtures/optimizer/expand_laterals.sql deleted file mode 100644 index 09bbd0f..0000000 --- a/tests/fixtures/optimizer/expand_laterals.sql +++ /dev/null @@ -1,40 +0,0 @@ -# title: expand alias reference -SELECT - x.a + 1 AS i, - i + 1 AS j, - j + 1 AS k -FROM x; -SELECT - x.a + 1 AS i, - x.a + 1 + 1 AS j, - x.a + 1 + 1 + 1 AS k -FROM x; - -# title: noop - reference comes before alias -SELECT - b + 1 AS j, - x.a + 1 AS i -FROM x; -SELECT - b + 1 AS j, - x.a + 1 AS i -FROM x; - - -# title: subquery -SELECT - * -FROM ( - SELECT - x.a + 1 AS i, - i + 1 AS j - FROM x -); -SELECT - * -FROM ( - SELECT - x.a + 1 AS i, - x.a + 1 + 1 AS j - FROM x -); diff --git a/tests/fixtures/optimizer/expand_multi_table_selects.sql b/tests/fixtures/optimizer/expand_multi_table_selects.sql deleted file mode 100644 index a5a4664..0000000 --- a/tests/fixtures/optimizer/expand_multi_table_selects.sql +++ /dev/null @@ -1,11 +0,0 @@ --------------------------------------- --- Multi Table Selects --------------------------------------- -SELECT * FROM x AS x, y AS y WHERE x.a = y.a; -SELECT * FROM x AS x CROSS JOIN y AS y WHERE x.a = y.a; - -SELECT * FROM x AS x, y AS y WHERE x.a = y.a AND x.a = 1 and y.b = 1; -SELECT * FROM x AS x CROSS JOIN y AS y WHERE x.a = y.a AND x.a = 1 AND y.b = 1; - -SELECT * FROM x AS x, y AS y WHERE x.a > y.a; -SELECT * FROM x AS x CROSS JOIN y AS y WHERE x.a > y.a; diff --git a/tests/fixtures/optimizer/isolate_table_selects.sql b/tests/fixtures/optimizer/isolate_table_selects.sql index 93c0f7c..43540e8 100644 --- a/tests/fixtures/optimizer/isolate_table_selects.sql +++ b/tests/fixtures/optimizer/isolate_table_selects.sql @@ -4,20 +4,20 @@ SELECT * FROM (SELECT * FROM x AS x) AS x, (SELECT * FROM y AS y) AS y2; SELECT * FROM x AS x WHERE x = 1; SELECT * FROM x AS x WHERE x = 1; -SELECT * FROM x AS x JOIN y AS y; -SELECT * FROM (SELECT * FROM x AS x) AS x JOIN (SELECT * FROM y AS y) AS y; +SELECT * FROM x AS x CROSS JOIN y AS y; +SELECT * FROM (SELECT * FROM x AS x) AS x CROSS JOIN (SELECT * FROM y AS y) AS y; -SELECT * FROM (SELECT 1) AS x JOIN y AS y; -SELECT * FROM (SELECT 1) AS x JOIN (SELECT * FROM y AS y) AS y; +SELECT * FROM (SELECT 1) AS x CROSS JOIN y AS y; +SELECT * FROM (SELECT 1) AS x CROSS JOIN (SELECT * FROM y AS y) AS y; SELECT * FROM x AS x JOIN (SELECT * FROM y) AS y; -SELECT * FROM (SELECT * FROM x AS x) AS x JOIN (SELECT * FROM y) AS y; +SELECT * FROM (SELECT * FROM x AS x) AS x, (SELECT * FROM y) AS y; WITH y AS (SELECT *) SELECT * FROM x AS x; WITH y AS (SELECT *) SELECT * FROM x AS x; -WITH y AS (SELECT * FROM y AS y2 JOIN x AS z2) SELECT * FROM x AS x JOIN y as y; -WITH y AS (SELECT * FROM (SELECT * FROM y AS y) AS y2 JOIN (SELECT * FROM x AS x) AS z2) SELECT * FROM (SELECT * FROM x AS x) AS x JOIN y AS y; +WITH y AS (SELECT * FROM y AS y2 CROSS JOIN x AS z2) SELECT * FROM x AS x CROSS JOIN y as y; +WITH y AS (SELECT * FROM (SELECT * FROM y AS y) AS y2 CROSS JOIN (SELECT * FROM x AS x) AS z2) SELECT * FROM (SELECT * FROM x AS x) AS x CROSS JOIN y AS y; -SELECT * FROM x AS x JOIN xx AS y; -SELECT * FROM (SELECT * FROM x AS x) AS x JOIN xx AS y; +SELECT * FROM x AS x CROSS JOIN xx AS y; +SELECT * FROM (SELECT * FROM x AS x) AS x CROSS JOIN xx AS y; diff --git a/tests/fixtures/optimizer/merge_subqueries.sql b/tests/fixtures/optimizer/merge_subqueries.sql index 4c06e42..1124a79 100644 --- a/tests/fixtures/optimizer/merge_subqueries.sql +++ b/tests/fixtures/optimizer/merge_subqueries.sql @@ -48,8 +48,8 @@ SELECT r.b FROM (SELECT b FROM x AS x) AS q JOIN (SELECT b FROM x) AS r ON q.b = SELECT x_2.b AS b FROM x AS x JOIN x AS x_2 ON x.b = x_2.b; # title: WHERE clause in joined derived table is merged to ON clause -SELECT x.a, y.c FROM x JOIN (SELECT b, c FROM y WHERE c > 1) AS y ON x.b = y.b; -SELECT x.a AS a, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b AND y.c > 1; +SELECT x.a, y.c FROM x JOIN (SELECT b, c FROM y WHERE c > 1) AS y ON x.b = y.b ORDER BY x.a; +SELECT x.a AS a, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b AND y.c > 1 ORDER BY x.a; # title: Comma JOIN in outer query SELECT x.a, y.c FROM (SELECT a FROM x) AS x, (SELECT c FROM y) AS y; @@ -57,7 +57,7 @@ SELECT x.a AS a, y.c AS c FROM x AS x, y AS y; # title: Comma JOIN in inner query SELECT x.a, x.c FROM (SELECT x.a, z.c FROM x, y AS z) AS x; -SELECT x.a AS a, z.c AS c FROM x AS x CROSS JOIN y AS z; +SELECT x.a AS a, z.c AS c FROM x AS x, y AS z; # title: (Regression) Column in ORDER BY SELECT * FROM (SELECT * FROM (SELECT * FROM x)) ORDER BY a LIMIT 1; diff --git a/tests/fixtures/optimizer/normalize.sql b/tests/fixtures/optimizer/normalize.sql index 803a474..12bc388 100644 --- a/tests/fixtures/optimizer/normalize.sql +++ b/tests/fixtures/optimizer/normalize.sql @@ -42,3 +42,6 @@ A OR ((((B OR C) AND (B OR D)) OR C) AND (((B OR C) AND (B OR D)) OR D)); SELECT * FROM x WHERE (A AND B) OR C; SELECT * FROM x WHERE (A OR C) AND (B OR C); + +dt2 between '2022-01-01 12:00:00' and '2022-12-31' and dt2 >= '2022-05-01 12:00:00' or dt2 = '2021-06-01 12:00:00'; +(dt2 <= '2022-12-31' OR dt2 = '2021-06-01 12:00:00') AND (dt2 = '2021-06-01 12:00:00' OR dt2 >= '2022-01-01 12:00:00') AND (dt2 = '2021-06-01 12:00:00' OR dt2 >= '2022-05-01 12:00:00') diff --git a/tests/fixtures/optimizer/lower_identities.sql b/tests/fixtures/optimizer/normalize_identifiers.sql index cea346f..ddb755f 100644 --- a/tests/fixtures/optimizer/lower_identities.sql +++ b/tests/fixtures/optimizer/normalize_identifiers.sql @@ -1,11 +1,19 @@ SELECT a FROM x; SELECT a FROM x; +# dialect: snowflake +SELECT A FROM X; +SELECT A FROM X; + SELECT "A" FROM "X"; SELECT "A" FROM "X"; SELECT a AS A FROM x; -SELECT a AS A FROM x; +SELECT a AS a FROM x; + +# dialect: snowflake +SELECT A AS a FROM X; +SELECT A AS A FROM X; SELECT * FROM x; SELECT * FROM x; @@ -13,29 +21,37 @@ SELECT * FROM x; SELECT A FROM x; SELECT a FROM x; +# dialect: snowflake +SELECT a FROM X; +SELECT A FROM X; + SELECT a FROM X; SELECT a FROM x; +# dialect: snowflake +SELECT A FROM x; +SELECT A FROM X; + SELECT A AS A FROM (SELECT a AS A FROM x); -SELECT a AS A FROM (SELECT a AS a FROM x); +SELECT a AS a FROM (SELECT a AS a FROM x); SELECT a AS B FROM x ORDER BY B; -SELECT a AS B FROM x ORDER BY B; +SELECT a AS b FROM x ORDER BY b; SELECT A FROM x ORDER BY A; SELECT a FROM x ORDER BY a; SELECT A AS B FROM X GROUP BY A HAVING SUM(B) > 0; -SELECT a AS B FROM x GROUP BY a HAVING SUM(b) > 0; +SELECT a AS b FROM x GROUP BY a HAVING SUM(b) > 0; SELECT A AS B, SUM(B) AS C FROM X GROUP BY A HAVING C > 0; -SELECT a AS B, SUM(b) AS C FROM x GROUP BY a HAVING C > 0; +SELECT a AS b, SUM(b) AS c FROM x GROUP BY a HAVING c > 0; SELECT A FROM X UNION SELECT A FROM X; SELECT a FROM x UNION SELECT a FROM x; SELECT A AS A FROM X UNION SELECT A AS A FROM X; -SELECT a AS A FROM x UNION SELECT a AS A FROM x; +SELECT a AS a FROM x UNION SELECT a AS a FROM x; (SELECT A AS A FROM X); -(SELECT a AS A FROM x); +(SELECT a AS a FROM x); diff --git a/tests/fixtures/optimizer/optimize_joins.sql b/tests/fixtures/optimizer/optimize_joins.sql index b64544e..b8e39c3 100644 --- a/tests/fixtures/optimizer/optimize_joins.sql +++ b/tests/fixtures/optimizer/optimize_joins.sql @@ -10,11 +10,23 @@ SELECT * FROM x JOIN z ON x.a = z.a AND TRUE JOIN y ON y.a = z.a; SELECT * FROM x LEFT JOIN y ON y.a = 1 JOIN z ON x.a = z.a AND y.a = z.a; SELECT * FROM x JOIN z ON x.a = z.a AND TRUE LEFT JOIN y ON y.a = 1 AND y.a = z.a; -SELECT * FROM x INNER JOIN z; -SELECT * FROM x JOIN z; +SELECT * FROM x INNER JOIN z ON x.id = z.id; +SELECT * FROM x JOIN z ON x.id = z.id; SELECT * FROM x LEFT OUTER JOIN z; SELECT * FROM x LEFT JOIN z; SELECT * FROM x CROSS JOIN z; SELECT * FROM x CROSS JOIN z; + +SELECT * FROM x JOIN z; +SELECT * FROM x CROSS JOIN z; + +SELECT * FROM x NATURAL JOIN z; +SELECT * FROM x NATURAL JOIN z; + +SELECT * FROM x RIGHT JOIN z; +SELECT * FROM x RIGHT JOIN z; + +SELECT * FROM x JOIN z USING (id); +SELECT * FROM x JOIN z USING (id); diff --git a/tests/fixtures/optimizer/optimizer.sql b/tests/fixtures/optimizer/optimizer.sql index 9e7880c..e0567d7 100644 --- a/tests/fixtures/optimizer/optimizer.sql +++ b/tests/fixtures/optimizer/optimizer.sql @@ -101,10 +101,10 @@ SELECT "x"."a" AS "a", SUM("y"."b") AS "sum_b" FROM "x" AS "x" -LEFT JOIN "_u_0" AS "_u_0" - ON "x"."b" = "_u_0"."_u_1" JOIN "y" AS "y" ON "x"."b" = "y"."b" +LEFT JOIN "_u_0" AS "_u_0" + ON "x"."b" = "_u_0"."_u_1" WHERE "_u_0"."_col_0" >= 0 AND "x"."a" > 1 GROUP BY @@ -502,3 +502,120 @@ WHERE "unioned"."source_system" = 'bamboohr' OR "unioned"."source_system" = 'workday' QUALIFY ROW_NUMBER() OVER (PARTITION BY "unioned"."unique_filter_key" ORDER BY "unioned"."sort_order" DESC, 1) = 1; + +# title: pivoted source with explicit selections +# execute: false +SELECT * FROM (SELECT a, b, c FROM sc.tb) PIVOT (SUM(c) FOR b IN ('x','y','z')); +SELECT + "_q_1"."a" AS "a", + "_q_1"."x" AS "x", + "_q_1"."y" AS "y", + "_q_1"."z" AS "z" +FROM ( + SELECT + "tb"."a" AS "a", + "tb"."b" AS "b", + "tb"."c" AS "c" + FROM "sc"."tb" AS "tb" +) AS "_q_0" PIVOT(SUM("_q_0"."c") FOR "_q_0"."b" IN ('x', 'y', 'z')) AS "_q_1"; + +# title: pivoted source with implicit selections +# execute: false +SELECT * FROM (SELECT * FROM u) PIVOT (SUM(f) FOR h IN ('x', 'y')); +SELECT + "_q_1"."g" AS "g", + "_q_1"."x" AS "x", + "_q_1"."y" AS "y" +FROM ( + SELECT + "u"."f" AS "f", + "u"."g" AS "g", + "u"."h" AS "h" + FROM "u" AS "u" +) AS "_q_0" PIVOT(SUM("_q_0"."f") FOR "_q_0"."h" IN ('x', 'y')) AS "_q_1"; + +# title: selecting explicit qualified columns from pivoted source with explicit selections +# execute: false +SELECT piv.x, piv.y FROM (SELECT f, h FROM u) PIVOT (SUM(f) FOR h IN ('x', 'y')) AS piv; +SELECT + "piv"."x" AS "x", + "piv"."y" AS "y" +FROM ( + SELECT + "u"."f" AS "f", + "u"."h" AS "h" + FROM "u" AS "u" +) AS "_q_0" PIVOT(SUM("_q_0"."f") FOR "_q_0"."h" IN ('x', 'y')) AS "piv"; + +# title: selecting explicit unqualified columns from pivoted source with implicit selections +# execute: false +SELECT x, y FROM u PIVOT (SUM(f) FOR h IN ('x', 'y')); +SELECT + "_q_0"."x" AS "x", + "_q_0"."y" AS "y" +FROM "u" AS "u" PIVOT(SUM("u"."f") FOR "u"."h" IN ('x', 'y')) AS "_q_0"; + +# title: selecting all columns from a pivoted CTE source, using alias for the aggregation and generating bigquery +# execute: false +# dialect: bigquery +WITH u_cte(f, g, h) AS (SELECT * FROM u) SELECT * FROM u_cte PIVOT(SUM(f) AS sum FOR h IN ('x', 'y')); +WITH `u_cte` AS ( + SELECT + `u`.`f` AS `f`, + `u`.`g` AS `g`, + `u`.`h` AS `h` + FROM `u` AS `u` +) +SELECT + `_q_0`.`g` AS `g`, + `_q_0`.`sum_x` AS `sum_x`, + `_q_0`.`sum_y` AS `sum_y` +FROM `u_cte` AS `u_cte` PIVOT(SUM(`u_cte`.`f`) AS `sum` FOR `u_cte`.`h` IN ('x', 'y')) AS `_q_0`; + +# title: selecting all columns from a pivoted source and generating snowflake +# execute: false +# dialect: snowflake +SELECT * FROM u PIVOT (SUM(f) FOR h IN ('x', 'y')); +SELECT + "_q_0"."G" AS "G", + "_q_0"."'x'" AS "'x'", + "_q_0"."'y'" AS "'y'" +FROM "U" AS "U" PIVOT(SUM("U"."F") FOR "U"."H" IN ('x', 'y')) AS "_q_0" +; + +# title: selecting all columns from a pivoted source and generating spark +# note: spark doesn't allow pivot aliases or qualified columns for the pivot's "field" (`h`) +# execute: false +# dialect: spark +SELECT * FROM u PIVOT (SUM(f) FOR h IN ('x', 'y')); +SELECT + `_q_0`.`g` AS `g`, + `_q_0`.`x` AS `x`, + `_q_0`.`y` AS `y` +FROM ( + SELECT + * + FROM `u` AS `u` PIVOT(SUM(`u`.`f`) FOR `h` IN ('x', 'y')) +) AS `_q_0`; + +# title: quoting is maintained +# dialect: snowflake +with cte1("id", foo) as (select 1, 2) select "id" from cte1; +WITH "CTE1" AS ( + SELECT + 1 AS "id" +) +SELECT + "CTE1"."id" AS "id" +FROM "CTE1"; + +# title: ensures proper quoting happens after all optimizations +# execute: false +SELECT "foO".x FROM (SELECT 1 AS x) AS "foO"; +WITH "foO" AS ( + SELECT + 1 AS "x" +) +SELECT + "foO"."x" AS "x" +FROM "foO" AS "foO"; diff --git a/tests/fixtures/optimizer/pushdown_predicates.sql b/tests/fixtures/optimizer/pushdown_predicates.sql index dd318a2..83a353d 100644 --- a/tests/fixtures/optimizer/pushdown_predicates.sql +++ b/tests/fixtures/optimizer/pushdown_predicates.sql @@ -4,8 +4,8 @@ SELECT x.a AS a FROM (SELECT x.a FROM x AS x WHERE x.a = 1 AND x.b = 1) AS x JOI WITH x AS (SELECT y.a FROM y) SELECT * FROM x WHERE x.a = 1; WITH x AS (SELECT y.a FROM y WHERE y.a = 1) SELECT * FROM x WHERE TRUE; -SELECT x.a FROM (SELECT * FROM x) AS x JOIN y WHERE y.a = 1 OR (x.a = 1 AND x.b = 1); -SELECT x.a FROM (SELECT * FROM x) AS x JOIN y WHERE (x.a = 1 AND x.b = 1) OR y.a = 1; +SELECT x.a FROM (SELECT * FROM x) AS x CROSS JOIN y WHERE y.a = 1 OR (x.a = 1 AND x.b = 1); +SELECT x.a FROM (SELECT * FROM x) AS x CROSS JOIN y WHERE (x.a = 1 AND x.b = 1) OR y.a = 1; SELECT x.a FROM (SELECT * FROM x) AS x JOIN y WHERE (x.a = y.a AND x.a = 1 AND x.b = 1) OR x.a = y.a; SELECT x.a FROM (SELECT * FROM x) AS x JOIN y ON x.a = y.a WHERE TRUE; diff --git a/tests/fixtures/optimizer/pushdown_projections.sql b/tests/fixtures/optimizer/pushdown_projections.sql index 6ff9383..0821339 100644 --- a/tests/fixtures/optimizer/pushdown_projections.sql +++ b/tests/fixtures/optimizer/pushdown_projections.sql @@ -61,9 +61,13 @@ SELECT q.x AS x FROM (VALUES (1, 2)) AS q(x, y); SELECT i.a FROM x AS i LEFT JOIN (SELECT a, b FROM (SELECT a, b FROM x)) AS j ON i.a = j.a; SELECT i.a AS a FROM x AS i LEFT JOIN (SELECT _q_0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _q_0) AS j ON i.a = j.a; +WITH cte AS (SELECT source.a AS a, ROW_NUMBER() OVER (PARTITION BY source.id, source.timestamp ORDER BY source.a DESC) AS index FROM source AS source QUALIFY index) SELECT cte.a AS a FROM cte; +WITH cte AS (SELECT source.a AS a FROM source AS source QUALIFY ROW_NUMBER() OVER (PARTITION BY source.id, source.timestamp ORDER BY source.a DESC)) SELECT cte.a AS a FROM cte; + -------------------------------------- -- Unknown Star Expansion -------------------------------------- + SELECT a FROM (SELECT * FROM zz) WHERE b = 1; SELECT _q_0.a AS a FROM (SELECT zz.a AS a, zz.b AS b FROM zz AS zz) AS _q_0 WHERE _q_0.b = 1; diff --git a/tests/fixtures/optimizer/qualify_columns.sql b/tests/fixtures/optimizer/qualify_columns.sql index f077647..7be2c7f 100644 --- a/tests/fixtures/optimizer/qualify_columns.sql +++ b/tests/fixtures/optimizer/qualify_columns.sql @@ -5,7 +5,7 @@ SELECT a FROM x; SELECT x.a AS a FROM x AS x; SELECT "a" FROM x; -SELECT x."a" AS "a" FROM x AS x; +SELECT x.a AS a FROM x AS x; # execute: false SELECT a FROM zz GROUP BY a ORDER BY a; @@ -396,8 +396,39 @@ SELECT x.a + 1 AS a, ROW_NUMBER() OVER (PARTITION BY x.b ORDER BY x.a) AS row_nu # dialect: bigquery SELECT ROW_NUMBER() OVER (PARTITION BY a ORDER BY b) AS row_num FROM x QUALIFY row_num = 1; -SELECT ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.b) AS row_num FROM x AS x QUALIFY row_num = 1; +SELECT ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.b) AS row_num FROM x AS x QUALIFY ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.b) = 1; # dialect: bigquery SELECT x.b, x.a FROM x LEFT JOIN y ON x.b = y.b QUALIFY ROW_NUMBER() OVER(PARTITION BY x.b ORDER BY x.a DESC) = 1; SELECT x.b AS b, x.a AS a FROM x AS x LEFT JOIN y AS y ON x.b = y.b QUALIFY ROW_NUMBER() OVER (PARTITION BY x.b ORDER BY x.a DESC) = 1; + +SELECT * FROM x QUALIFY COUNT(a) OVER (PARTITION BY b) > 1; +SELECT x.a AS a, x.b AS b FROM x AS x QUALIFY COUNT(x.a) OVER (PARTITION BY x.b) > 1; + +-------------------------------------- +-- Expand laterals +-------------------------------------- + +# title: expand alias reference +SELECT + x.a + 1 AS i, + i + 1 AS j, + j + 1 AS k +FROM x; +SELECT x.a + 1 AS i, x.a + 1 + 1 AS j, x.a + 1 + 1 + 1 AS k FROM x AS x; + +# title: noop - reference comes before alias +# execute: false +SELECT i + 1 AS j, x.a + 1 AS i FROM x; +SELECT i + 1 AS j, x.a + 1 AS i FROM x AS x; + +# title: subquery +SELECT + * +FROM ( + SELECT + x.a + 1 AS i, + i + 1 AS j + FROM x +); +SELECT _q_0.i AS i, _q_0.j AS j FROM (SELECT x.a + 1 AS i, x.a + 1 + 1 AS j FROM x AS x) AS _q_0; diff --git a/tests/fixtures/optimizer/qualify_columns__invalid.sql b/tests/fixtures/optimizer/qualify_columns__invalid.sql index f0f9f87..d2d4959 100644 --- a/tests/fixtures/optimizer/qualify_columns__invalid.sql +++ b/tests/fixtures/optimizer/qualify_columns__invalid.sql @@ -1,7 +1,7 @@ SELECT z.a FROM x; SELECT z.* FROM x; SELECT x FROM x; -INSERT INTO x VALUES (1, 2); +SELECT x FROM VALUES (1, 2); SELECT a FROM x AS z JOIN y AS z; SELECT a FROM x JOIN (SELECT b FROM y WHERE y.b = x.c); SELECT a FROM x AS y JOIN (SELECT a FROM y) AS q ON y.a = q.a; diff --git a/tests/fixtures/optimizer/qualify_columns__with_invisible.sql b/tests/fixtures/optimizer/qualify_columns__with_invisible.sql index 05253f3..1e07015 100644 --- a/tests/fixtures/optimizer/qualify_columns__with_invisible.sql +++ b/tests/fixtures/optimizer/qualify_columns__with_invisible.sql @@ -10,11 +10,11 @@ SELECT x.b AS b FROM x AS x; -------------------------------------- -- Derived tables -------------------------------------- -SELECT x.a FROM x AS x JOIN (SELECT * FROM x); -SELECT x.a AS a FROM x AS x JOIN (SELECT x.a AS a FROM x AS x) AS _q_0; +SELECT x.a FROM x AS x CROSS JOIN (SELECT * FROM x); +SELECT x.a AS a FROM x AS x CROSS JOIN (SELECT x.a AS a FROM x AS x) AS _q_0; -SELECT x.b FROM x AS x JOIN (SELECT b FROM x); -SELECT x.b AS b FROM x AS x JOIN (SELECT x.b AS b FROM x AS x) AS _q_0; +SELECT x.b FROM x AS x CROSS JOIN (SELECT b FROM x); +SELECT x.b AS b FROM x AS x CROSS JOIN (SELECT x.b AS b FROM x AS x) AS _q_0; -------------------------------------- -- Expand * @@ -22,11 +22,11 @@ SELECT x.b AS b FROM x AS x JOIN (SELECT x.b AS b FROM x AS x) AS _q_0; SELECT * FROM x; SELECT x.a AS a FROM x AS x; -SELECT * FROM y JOIN z ON y.b = z.b; -SELECT y.b AS b, z.b AS b FROM y AS y JOIN z AS z ON y.b = z.b; +SELECT * FROM y CROSS JOIN z ON y.b = z.b; +SELECT y.b AS b, z.b AS b FROM y AS y CROSS JOIN z AS z ON y.b = z.b; -SELECT * FROM y JOIN z ON y.c = z.c; -SELECT y.b AS b, z.b AS b FROM y AS y JOIN z AS z ON y.c = z.c; +SELECT * FROM y CROSS JOIN z ON y.c = z.c; +SELECT y.b AS b, z.b AS b FROM y AS y CROSS JOIN z AS z ON y.c = z.c; SELECT a FROM (SELECT * FROM x); SELECT _q_0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _q_0; diff --git a/tests/fixtures/optimizer/qualify_tables.sql b/tests/fixtures/optimizer/qualify_tables.sql index 0ad155a..24d1b65 100644 --- a/tests/fixtures/optimizer/qualify_tables.sql +++ b/tests/fixtures/optimizer/qualify_tables.sql @@ -16,9 +16,12 @@ WITH a AS (SELECT 1 FROM c.db.z AS z) SELECT 1 FROM a; SELECT (SELECT y.c FROM y AS y) FROM x; SELECT (SELECT y.c FROM c.db.y AS y) FROM c.db.x AS x; -------------------------- +SELECT * FROM x PIVOT (SUM(a) FOR b IN ('a', 'b')); +SELECT * FROM c.db.x AS x PIVOT(SUM(a) FOR b IN ('a', 'b')) AS _q_0; + +---------------------------- -- Expand join constructs -------------------------- +---------------------------- -- This is valid in Trino, so we treat the (tbl AS tbl) as a "join construct" per postgres' terminology. SELECT * FROM (tbl AS tbl) AS _q_0; diff --git a/tests/fixtures/optimizer/simplify.sql b/tests/fixtures/optimizer/simplify.sql index a2cd859..5c8d371 100644 --- a/tests/fixtures/optimizer/simplify.sql +++ b/tests/fixtures/optimizer/simplify.sql @@ -201,6 +201,21 @@ A AND B AND C AND D; (((((A) AND B)) AND C)) AND D; A AND B AND C AND D; +(x + 1) + 2; +x + 3; + +x + (1 + 2); +x + 3; + +(x * 2) * 4 + (1 + 3) + 5; +x * 8 + 9; + +(x - 1) - 2; +(x - 1) - 2; + +x - (3 - 2); +x - 1; + -------------------------------------- -- Comparison and Pruning -------------------------------------- @@ -574,4 +589,4 @@ x > 3; TRUE; x = 2018 OR x <> 2018; -x <> 2018 OR x = 2018;
\ No newline at end of file +x <> 2018 OR x = 2018; diff --git a/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql b/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql index 9908756..a6ee325 100644 --- a/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql +++ b/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql @@ -46,12 +46,12 @@ WITH "customer_total_return" AS ( SELECT "customer"."c_customer_id" AS "c_customer_id" FROM "customer_total_return" AS "ctr1" -LEFT JOIN "_u_0" AS "_u_0" - ON "ctr1"."ctr_store_sk" = "_u_0"."_u_1" JOIN "store" AS "store" ON "store"."s_state" = 'TN' AND "store"."s_store_sk" = "ctr1"."ctr_store_sk" JOIN "customer" AS "customer" ON "ctr1"."ctr_customer_sk" = "customer"."c_customer_sk" +LEFT JOIN "_u_0" AS "_u_0" + ON "ctr1"."ctr_store_sk" = "_u_0"."_u_1" WHERE "ctr1"."ctr_total_return" > "_u_0"."_col_0" ORDER BY @@ -211,9 +211,8 @@ JOIN "date_dim" AS "date_dim" ON "date_dim"."d_week_seq" = "wswscs"."d_week_seq" AND "date_dim"."d_year" = 1998 JOIN "wswscs" AS "wswscs_2" ON "wswscs"."d_week_seq" = "wswscs_2"."d_week_seq" - 53 -CROSS JOIN "date_dim" AS "date_dim_2" -WHERE - "date_dim_2"."d_week_seq" = "wswscs_2"."d_week_seq" AND "date_dim_2"."d_year" = 1999 +JOIN "date_dim" AS "date_dim_2" + ON "date_dim_2"."d_week_seq" = "wswscs_2"."d_week_seq" AND "date_dim_2"."d_year" = 1999 ORDER BY "d_week_seq1"; @@ -953,13 +952,13 @@ JOIN "date_dim" AS "date_dim" ON "date_dim"."d_month_seq" = "_u_0"."_col_0" JOIN "store_sales" AS "store_sales" ON "store_sales"."ss_sold_date_sk" = "date_dim"."d_date_sk" +JOIN "customer" AS "customer" + ON "customer"."c_customer_sk" = "store_sales"."ss_customer_sk" + AND "customer_address"."ca_address_sk" = "customer"."c_current_addr_sk" JOIN "item" AS "item" ON "store_sales"."ss_item_sk" = "item"."i_item_sk" LEFT JOIN "_u_1" AS "_u_1" ON "_u_1"."_u_2" = "item"."i_category" -JOIN "customer" AS "customer" - ON "customer"."c_customer_sk" = "store_sales"."ss_customer_sk" - AND "customer_address"."ca_address_sk" = "customer"."c_current_addr_sk" WHERE "item"."i_current_price" > 1.2 * "_u_1"."_col_0" GROUP BY @@ -1256,9 +1255,9 @@ LIMIT 100; WITH "a1" AS ( SELECT SUBSTR("customer_address"."ca_zip", 1, 5) AS "ca_zip" - FROM "customer_address" AS "customer_address", "customer" AS "customer" - WHERE - "customer"."c_preferred_cust_flag" = 'Y' + FROM "customer_address" AS "customer_address" + JOIN "customer" AS "customer" + ON "customer"."c_preferred_cust_flag" = 'Y' AND "customer_address"."ca_address_sk" = "customer"."c_current_addr_sk" GROUP BY "customer_address"."ca_zip" @@ -1612,17 +1611,17 @@ SELECT "customer_demographics"."cd_dep_college_count" AS "cd_dep_college_count", COUNT(*) AS "cnt6" FROM "customer" AS "customer" +JOIN "customer_address" AS "customer_address" + ON "customer"."c_current_addr_sk" = "customer_address"."ca_address_sk" + AND "customer_address"."ca_county" IN ('Lycoming County', 'Sheridan County', 'Kandiyohi County', 'Pike County', 'Greene County') +JOIN "customer_demographics" AS "customer_demographics" + ON "customer_demographics"."cd_demo_sk" = "customer"."c_current_cdemo_sk" LEFT JOIN "_u_0" AS "_u_0" ON "customer"."c_customer_sk" = "_u_0"."_u_1" LEFT JOIN "_u_2" AS "_u_2" ON "customer"."c_customer_sk" = "_u_2"."_u_3" LEFT JOIN "_u_4" AS "_u_4" ON "customer"."c_customer_sk" = "_u_4"."_u_5" -JOIN "customer_address" AS "customer_address" - ON "customer"."c_current_addr_sk" = "customer_address"."ca_address_sk" - AND "customer_address"."ca_county" IN ('Lycoming County', 'Sheridan County', 'Kandiyohi County', 'Pike County', 'Greene County') -JOIN "customer_demographics" AS "customer_demographics" - ON "customer_demographics"."cd_demo_sk" = "customer"."c_current_cdemo_sk" WHERE NOT "_u_0"."_u_1" IS NULL AND ( @@ -2179,33 +2178,30 @@ WITH "item_2" AS ( "ics"."i_class_id" AS "i_class_id", "ics"."i_category_id" AS "i_category_id" FROM "catalog_sales" AS "catalog_sales" - CROSS JOIN "item_2" AS "ics" - CROSS JOIN "d1" AS "d2" - WHERE - "catalog_sales"."cs_item_sk" = "ics"."i_item_sk" - AND "catalog_sales"."cs_sold_date_sk" = "d2"."d_date_sk" + JOIN "item_2" AS "ics" + ON "catalog_sales"."cs_item_sk" = "ics"."i_item_sk" + JOIN "d1" AS "d2" + ON "catalog_sales"."cs_sold_date_sk" = "d2"."d_date_sk" INTERSECT SELECT "iws"."i_brand_id" AS "i_brand_id", "iws"."i_class_id" AS "i_class_id", "iws"."i_category_id" AS "i_category_id" FROM "web_sales" AS "web_sales" - CROSS JOIN "item_2" AS "iws" - CROSS JOIN "d1" AS "d3" - WHERE - "web_sales"."ws_item_sk" = "iws"."i_item_sk" - AND "web_sales"."ws_sold_date_sk" = "d3"."d_date_sk" + JOIN "item_2" AS "iws" + ON "web_sales"."ws_item_sk" = "iws"."i_item_sk" + JOIN "d1" AS "d3" + ON "web_sales"."ws_sold_date_sk" = "d3"."d_date_sk" ), "_q_0" AS ( SELECT "iss"."i_brand_id" AS "brand_id", "iss"."i_class_id" AS "class_id", "iss"."i_category_id" AS "category_id" FROM "store_sales" AS "store_sales" - CROSS JOIN "item_2" AS "iss" - CROSS JOIN "d1" AS "d1" - WHERE - "store_sales"."ss_item_sk" = "iss"."i_item_sk" - AND "store_sales"."ss_sold_date_sk" = "d1"."d_date_sk" + JOIN "item_2" AS "iss" + ON "store_sales"."ss_item_sk" = "iss"."i_item_sk" + JOIN "d1" AS "d1" + ON "store_sales"."ss_sold_date_sk" = "d1"."d_date_sk" INTERSECT SELECT "cte_4"."i_brand_id" AS "i_brand_id", @@ -2242,6 +2238,14 @@ WITH "item_2" AS ( SELECT AVG("x"."quantity" * "x"."list_price") AS "average_sales" FROM "x" AS "x" +), "date_dim_2" AS ( + SELECT + "date_dim"."d_date_sk" AS "d_date_sk", + "date_dim"."d_year" AS "d_year", + "date_dim"."d_moy" AS "d_moy" + FROM "date_dim" AS "date_dim" + WHERE + "date_dim"."d_moy" = 11 AND "date_dim"."d_year" = 2001 ), "_u_0" AS ( SELECT "item"."i_item_sk" AS "ss_item_sk" @@ -2256,14 +2260,6 @@ WITH "item_2" AS ( SELECT "avg_sales"."average_sales" AS "average_sales" FROM "avg_sales" -), "date_dim_2" AS ( - SELECT - "date_dim"."d_date_sk" AS "d_date_sk", - "date_dim"."d_year" AS "d_year", - "date_dim"."d_moy" AS "d_moy" - FROM "date_dim" AS "date_dim" - WHERE - "date_dim"."d_moy" = 11 AND "date_dim"."d_year" = 2001 ), "cte_9" AS ( SELECT 'store' AS "channel", @@ -2273,13 +2269,13 @@ WITH "item_2" AS ( SUM("store_sales"."ss_quantity" * "store_sales"."ss_list_price") AS "sales", COUNT(*) AS "number_sales" FROM "store_sales" AS "store_sales" - LEFT JOIN "_u_0" AS "_u_0" - ON "store_sales"."ss_item_sk" = "_u_0"."ss_item_sk" - CROSS JOIN "_u_1" AS "_u_1" JOIN "item_2" AS "item" ON "store_sales"."ss_item_sk" = "item"."i_item_sk" JOIN "date_dim_2" AS "date_dim" ON "store_sales"."ss_sold_date_sk" = "date_dim"."d_date_sk" + LEFT JOIN "_u_0" AS "_u_0" + ON "store_sales"."ss_item_sk" = "_u_0"."ss_item_sk" + CROSS JOIN "_u_1" AS "_u_1" WHERE NOT "_u_0"."ss_item_sk" IS NULL GROUP BY @@ -2297,13 +2293,13 @@ WITH "item_2" AS ( SUM("catalog_sales"."cs_quantity" * "catalog_sales"."cs_list_price") AS "sales", COUNT(*) AS "number_sales" FROM "catalog_sales" AS "catalog_sales" - LEFT JOIN "_u_0" AS "_u_2" - ON "catalog_sales"."cs_item_sk" = "_u_2"."ss_item_sk" - CROSS JOIN "_u_1" AS "_u_3" JOIN "item_2" AS "item" ON "catalog_sales"."cs_item_sk" = "item"."i_item_sk" JOIN "date_dim_2" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" + LEFT JOIN "_u_0" AS "_u_2" + ON "catalog_sales"."cs_item_sk" = "_u_2"."ss_item_sk" + CROSS JOIN "_u_1" AS "_u_3" WHERE NOT "_u_2"."ss_item_sk" IS NULL GROUP BY @@ -2321,13 +2317,13 @@ WITH "item_2" AS ( SUM("web_sales"."ws_quantity" * "web_sales"."ws_list_price") AS "sales", COUNT(*) AS "number_sales" FROM "web_sales" AS "web_sales" - LEFT JOIN "_u_0" AS "_u_4" - ON "web_sales"."ws_item_sk" = "_u_4"."ss_item_sk" - CROSS JOIN "_u_1" AS "_u_5" JOIN "item_2" AS "item" ON "web_sales"."ws_item_sk" = "item"."i_item_sk" JOIN "date_dim_2" AS "date_dim" ON "web_sales"."ws_sold_date_sk" = "date_dim"."d_date_sk" + LEFT JOIN "_u_0" AS "_u_4" + ON "web_sales"."ws_item_sk" = "_u_4"."ss_item_sk" + CROSS JOIN "_u_1" AS "_u_5" WHERE NOT "_u_4"."ss_item_sk" IS NULL GROUP BY @@ -2492,10 +2488,6 @@ SELECT SUM("catalog_sales"."cs_ext_ship_cost") AS "total shipping cost", SUM("catalog_sales"."cs_net_profit") AS "total net profit" FROM "catalog_sales" AS "catalog_sales" -LEFT JOIN "_u_0" AS "_u_0" - ON "catalog_sales"."cs_order_number" = "_u_0"."_u_1" -LEFT JOIN "_u_3" AS "_u_3" - ON "catalog_sales"."cs_order_number" = "_u_3"."_u_4" JOIN "date_dim" AS "date_dim" ON "catalog_sales"."cs_ship_date_sk" = "date_dim"."d_date_sk" AND "date_dim"."d_date" >= '2002-3-01' @@ -2508,6 +2500,10 @@ JOIN "customer_address" AS "customer_address" JOIN "call_center" AS "call_center" ON "call_center"."cc_county" IN ('Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County') AND "catalog_sales"."cs_call_center_sk" = "call_center"."cc_call_center_sk" +LEFT JOIN "_u_0" AS "_u_0" + ON "catalog_sales"."cs_order_number" = "_u_0"."_u_1" +LEFT JOIN "_u_3" AS "_u_3" + ON "catalog_sales"."cs_order_number" = "_u_3"."_u_4" WHERE "_u_3"."_u_4" IS NULL AND ARRAY_ANY("_u_0"."_u_2", "_x" -> "catalog_sales"."cs_warehouse_sk" <> "_x") @@ -3012,13 +3008,21 @@ WITH "frequent_ss_items" AS ( SELECT "customer"."c_customer_sk" AS "c_customer_sk" FROM "store_sales" AS "store_sales" - CROSS JOIN "max_store_sales" JOIN "customer_2" AS "customer" ON "store_sales"."ss_customer_sk" = "customer"."c_customer_sk" + CROSS JOIN "max_store_sales" GROUP BY "customer"."c_customer_sk" HAVING SUM("store_sales"."ss_quantity" * "store_sales"."ss_sales_price") > 0.95 * MAX("max_store_sales"."tpcds_cmax") +), "date_dim_4" AS ( + SELECT + "date_dim"."d_date_sk" AS "d_date_sk", + "date_dim"."d_year" AS "d_year", + "date_dim"."d_moy" AS "d_moy" + FROM "date_dim" AS "date_dim" + WHERE + "date_dim"."d_moy" = 6 AND "date_dim"."d_year" = 1998 ), "_u_1" AS ( SELECT "frequent_ss_items"."item_sk" AS "item_sk" @@ -3031,36 +3035,28 @@ WITH "frequent_ss_items" AS ( FROM "best_ss_customer" GROUP BY "best_ss_customer"."c_customer_sk" -), "date_dim_4" AS ( - SELECT - "date_dim"."d_date_sk" AS "d_date_sk", - "date_dim"."d_year" AS "d_year", - "date_dim"."d_moy" AS "d_moy" - FROM "date_dim" AS "date_dim" - WHERE - "date_dim"."d_moy" = 6 AND "date_dim"."d_year" = 1998 ), "_q_1" AS ( SELECT "catalog_sales"."cs_quantity" * "catalog_sales"."cs_list_price" AS "sales" FROM "catalog_sales" AS "catalog_sales" + JOIN "date_dim_4" AS "date_dim" + ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" LEFT JOIN "_u_1" AS "_u_1" ON "catalog_sales"."cs_item_sk" = "_u_1"."item_sk" LEFT JOIN "_u_2" AS "_u_2" ON "catalog_sales"."cs_bill_customer_sk" = "_u_2"."c_customer_sk" - JOIN "date_dim_4" AS "date_dim" - ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" WHERE NOT "_u_1"."item_sk" IS NULL AND NOT "_u_2"."c_customer_sk" IS NULL UNION ALL SELECT "web_sales"."ws_quantity" * "web_sales"."ws_list_price" AS "sales" FROM "web_sales" AS "web_sales" + JOIN "date_dim_4" AS "date_dim" + ON "web_sales"."ws_sold_date_sk" = "date_dim"."d_date_sk" LEFT JOIN "_u_1" AS "_u_3" ON "web_sales"."ws_item_sk" = "_u_3"."item_sk" LEFT JOIN "_u_2" AS "_u_4" ON "web_sales"."ws_bill_customer_sk" = "_u_4"."c_customer_sk" - JOIN "date_dim_4" AS "date_dim" - ON "web_sales"."ws_sold_date_sk" = "date_dim"."d_date_sk" WHERE NOT "_u_3"."item_sk" IS NULL AND NOT "_u_4"."c_customer_sk" IS NULL ) @@ -3717,13 +3713,13 @@ SELECT "customer"."c_last_review_date" AS "c_last_review_date", "ctr1"."ctr_total_return" AS "ctr_total_return" FROM "customer_total_return" AS "ctr1" -LEFT JOIN "_u_0" AS "_u_0" - ON "ctr1"."ctr_state" = "_u_0"."_u_1" JOIN "customer" AS "customer" ON "ctr1"."ctr_customer_sk" = "customer"."c_customer_sk" JOIN "customer_address" AS "customer_address" ON "customer_address"."ca_address_sk" = "customer"."c_current_addr_sk" AND "customer_address"."ca_state" = 'IN' +LEFT JOIN "_u_0" AS "_u_0" + ON "ctr1"."ctr_state" = "_u_0"."_u_1" WHERE "ctr1"."ctr_total_return" > "_u_0"."_col_0" ORDER BY @@ -3957,10 +3953,10 @@ SELECT FROM "catalog_sales_2" AS "catalog_sales" JOIN "item" AS "item" ON "item"."i_item_sk" = "catalog_sales"."cs_item_sk" AND "item"."i_manufact_id" = 610 -LEFT JOIN "_u_0" AS "_u_0" - ON "_u_0"."_u_1" = "item"."i_item_sk" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "catalog_sales"."cs_sold_date_sk" +LEFT JOIN "_u_0" AS "_u_0" + ON "_u_0"."_u_1" = "item"."i_item_sk" WHERE "catalog_sales"."cs_ext_discount_amt" > "_u_0"."_col_0" LIMIT 100; @@ -4032,20 +4028,7 @@ FROM (SELECT * GROUP BY i_manufact_id ORDER BY total_sales LIMIT 100; -WITH "item_2" AS ( - SELECT - "item"."i_item_sk" AS "i_item_sk", - "item"."i_manufact_id" AS "i_manufact_id" - FROM "item" AS "item" -), "_u_0" AS ( - SELECT - "item"."i_manufact_id" AS "i_manufact_id" - FROM "item" AS "item" - WHERE - "item"."i_category" IN ('Books') - GROUP BY - "item"."i_manufact_id" -), "date_dim_2" AS ( +WITH "date_dim_2" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_year" AS "d_year", @@ -4060,19 +4043,32 @@ WITH "item_2" AS ( FROM "customer_address" AS "customer_address" WHERE "customer_address"."ca_gmt_offset" = -5 +), "item_2" AS ( + SELECT + "item"."i_item_sk" AS "i_item_sk", + "item"."i_manufact_id" AS "i_manufact_id" + FROM "item" AS "item" +), "_u_0" AS ( + SELECT + "item"."i_manufact_id" AS "i_manufact_id" + FROM "item" AS "item" + WHERE + "item"."i_category" IN ('Books') + GROUP BY + "item"."i_manufact_id" ), "ss" AS ( SELECT "item"."i_manufact_id" AS "i_manufact_id", SUM("store_sales"."ss_ext_sales_price") AS "total_sales" FROM "store_sales" AS "store_sales" - JOIN "item_2" AS "item" - ON "store_sales"."ss_item_sk" = "item"."i_item_sk" - LEFT JOIN "_u_0" AS "_u_0" - ON "item"."i_manufact_id" = "_u_0"."i_manufact_id" JOIN "date_dim_2" AS "date_dim" ON "store_sales"."ss_sold_date_sk" = "date_dim"."d_date_sk" JOIN "customer_address_2" AS "customer_address" ON "store_sales"."ss_addr_sk" = "customer_address"."ca_address_sk" + JOIN "item_2" AS "item" + ON "store_sales"."ss_item_sk" = "item"."i_item_sk" + LEFT JOIN "_u_0" AS "_u_0" + ON "item"."i_manufact_id" = "_u_0"."i_manufact_id" WHERE NOT "_u_0"."i_manufact_id" IS NULL GROUP BY @@ -4082,14 +4078,14 @@ WITH "item_2" AS ( "item"."i_manufact_id" AS "i_manufact_id", SUM("catalog_sales"."cs_ext_sales_price") AS "total_sales" FROM "catalog_sales" AS "catalog_sales" - JOIN "item_2" AS "item" - ON "catalog_sales"."cs_item_sk" = "item"."i_item_sk" - LEFT JOIN "_u_0" AS "_u_1" - ON "item"."i_manufact_id" = "_u_1"."i_manufact_id" JOIN "date_dim_2" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" JOIN "customer_address_2" AS "customer_address" ON "catalog_sales"."cs_bill_addr_sk" = "customer_address"."ca_address_sk" + JOIN "item_2" AS "item" + ON "catalog_sales"."cs_item_sk" = "item"."i_item_sk" + LEFT JOIN "_u_0" AS "_u_1" + ON "item"."i_manufact_id" = "_u_1"."i_manufact_id" WHERE NOT "_u_1"."i_manufact_id" IS NULL GROUP BY @@ -4099,14 +4095,14 @@ WITH "item_2" AS ( "item"."i_manufact_id" AS "i_manufact_id", SUM("web_sales"."ws_ext_sales_price") AS "total_sales" FROM "web_sales" AS "web_sales" - JOIN "item_2" AS "item" - ON "web_sales"."ws_item_sk" = "item"."i_item_sk" - LEFT JOIN "_u_0" AS "_u_2" - ON "item"."i_manufact_id" = "_u_2"."i_manufact_id" JOIN "date_dim_2" AS "date_dim" ON "web_sales"."ws_sold_date_sk" = "date_dim"."d_date_sk" JOIN "customer_address_2" AS "customer_address" ON "web_sales"."ws_bill_addr_sk" = "customer_address"."ca_address_sk" + JOIN "item_2" AS "item" + ON "web_sales"."ws_item_sk" = "item"."i_item_sk" + LEFT JOIN "_u_0" AS "_u_2" + ON "item"."i_manufact_id" = "_u_2"."i_manufact_id" WHERE NOT "_u_2"."i_manufact_id" IS NULL GROUP BY @@ -4382,16 +4378,16 @@ SELECT AVG("customer_demographics"."cd_dep_college_count") AS "_col_16", MAX("customer_demographics"."cd_dep_college_count") AS "_col_17" FROM "customer" AS "customer" +JOIN "customer_address" AS "customer_address" + ON "customer"."c_current_addr_sk" = "customer_address"."ca_address_sk" +JOIN "customer_demographics" AS "customer_demographics" + ON "customer_demographics"."cd_demo_sk" = "customer"."c_current_cdemo_sk" LEFT JOIN "_u_0" AS "_u_0" ON "customer"."c_customer_sk" = "_u_0"."_u_1" LEFT JOIN "_u_2" AS "_u_2" ON "customer"."c_customer_sk" = "_u_2"."_u_3" LEFT JOIN "_u_4" AS "_u_4" ON "customer"."c_customer_sk" = "_u_4"."_u_5" -JOIN "customer_address" AS "customer_address" - ON "customer"."c_current_addr_sk" = "customer_address"."ca_address_sk" -JOIN "customer_demographics" AS "customer_demographics" - ON "customer_demographics"."cd_demo_sk" = "customer"."c_current_cdemo_sk" WHERE NOT "_u_0"."_u_1" IS NULL AND ( @@ -4468,7 +4464,7 @@ ROLLUP ( ) ORDER BY "lochierarchy" DESC, - CASE WHEN "lochierarchy" = 0 THEN "item"."i_category" END, + CASE WHEN "lochierarchy" = 0 THEN "i_category" END, "rank_within_parent" LIMIT 100; @@ -5349,10 +5345,6 @@ SELECT "customer_address"."ca_state" AS "ca_state", SUM("web_sales"."ws_sales_price") AS "_col_2" FROM "web_sales" AS "web_sales" -JOIN "item" AS "item" - ON "web_sales"."ws_item_sk" = "item"."i_item_sk" -LEFT JOIN "_u_0" AS "_u_0" - ON "item"."i_item_id" = "_u_0"."i_item_id" JOIN "customer" AS "customer" ON "web_sales"."ws_bill_customer_sk" = "customer"."c_customer_sk" JOIN "customer_address" AS "customer_address" @@ -5361,6 +5353,10 @@ JOIN "date_dim" AS "date_dim" ON "date_dim"."d_qoy" = 1 AND "date_dim"."d_year" = 2000 AND "web_sales"."ws_sold_date_sk" = "date_dim"."d_date_sk" +JOIN "item" AS "item" + ON "web_sales"."ws_item_sk" = "item"."i_item_sk" +LEFT JOIN "_u_0" AS "_u_0" + ON "item"."i_item_id" = "_u_0"."i_item_id" WHERE NOT "_u_0"."i_item_id" IS NULL OR SUBSTR("customer_address"."ca_zip", 1, 5) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792') @@ -6145,7 +6141,7 @@ WITH web_v1 AS ( SELECT ws_item_sk item_sk, d_date, - sum(Sum(ws_sales_price)) OVER (partition BY ws_item_sk ORDER BY d_date rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) cume_sales + sum(Sum(ws_sales_price)) OVER (partition BY ws_item_sk ORDER BY d_date rows BETWEEN UNBOUNDED PRECEDING AND CURRENT row) cume_sales FROM web_sales , date_dim WHERE ws_sold_date_sk=d_date_sk @@ -6156,7 +6152,7 @@ WITH web_v1 AS ( SELECT ss_item_sk item_sk, d_date, - sum(sum(ss_sales_price)) OVER (partition BY ss_item_sk ORDER BY d_date rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) cume_sales + sum(sum(ss_sales_price)) OVER (partition BY ss_item_sk ORDER BY d_date rows BETWEEN UNBOUNDED PRECEDING AND CURRENT row) cume_sales FROM store_sales , date_dim WHERE ss_sold_date_sk=d_date_sk @@ -6171,8 +6167,8 @@ FROM ( d_date , web_sales , store_sales , - max(web_sales) OVER (partition BY item_sk ORDER BY d_date rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) web_cumulative , - max(store_sales) OVER (partition BY item_sk ORDER BY d_date rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) store_cumulative + max(web_sales) OVER (partition BY item_sk ORDER BY d_date rows BETWEEN UNBOUNDED PRECEDING AND CURRENT row) web_cumulative , + max(store_sales) OVER (partition BY item_sk ORDER BY d_date rows BETWEEN UNBOUNDED PRECEDING AND CURRENT row) store_cumulative FROM ( SELECT CASE @@ -6533,10 +6529,6 @@ WITH "cs_or_ws_sales" AS ( SUM("store_sales"."ss_ext_sales_price") AS "revenue" FROM "my_customers" CROSS JOIN "date_dim" AS "date_dim" - JOIN "_u_0" AS "_u_0" - ON "date_dim"."d_month_seq" >= "_u_0"."_col_0" - JOIN "_u_1" AS "_u_1" - ON "date_dim"."d_month_seq" <= "_u_1"."_col_0" JOIN "store_sales" AS "store_sales" ON "my_customers"."c_customer_sk" = "store_sales"."ss_customer_sk" AND "store_sales"."ss_sold_date_sk" = "date_dim"."d_date_sk" @@ -6545,6 +6537,10 @@ WITH "cs_or_ws_sales" AS ( JOIN "store" AS "store" ON "customer_address"."ca_county" = "store"."s_county" AND "customer_address"."ca_state" = "store"."s_state" + JOIN "_u_0" AS "_u_0" + ON "date_dim"."d_month_seq" >= "_u_0"."_col_0" + JOIN "_u_1" AS "_u_1" + ON "date_dim"."d_month_seq" <= "_u_1"."_col_0" GROUP BY "my_customers"."c_customer_sk" ) @@ -6674,20 +6670,7 @@ FROM (SELECT * GROUP BY i_item_id ORDER BY total_sales LIMIT 100; -WITH "item_2" AS ( - SELECT - "item"."i_item_sk" AS "i_item_sk", - "item"."i_item_id" AS "i_item_id" - FROM "item" AS "item" -), "_u_0" AS ( - SELECT - "item"."i_item_id" AS "i_item_id" - FROM "item" AS "item" - WHERE - "item"."i_color" IN ('firebrick', 'rosy', 'white') - GROUP BY - "item"."i_item_id" -), "date_dim_2" AS ( +WITH "date_dim_2" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_year" AS "d_year", @@ -6702,19 +6685,32 @@ WITH "item_2" AS ( FROM "customer_address" AS "customer_address" WHERE "customer_address"."ca_gmt_offset" = -6 +), "item_2" AS ( + SELECT + "item"."i_item_sk" AS "i_item_sk", + "item"."i_item_id" AS "i_item_id" + FROM "item" AS "item" +), "_u_0" AS ( + SELECT + "item"."i_item_id" AS "i_item_id" + FROM "item" AS "item" + WHERE + "item"."i_color" IN ('firebrick', 'rosy', 'white') + GROUP BY + "item"."i_item_id" ), "ss" AS ( SELECT "item"."i_item_id" AS "i_item_id", SUM("store_sales"."ss_ext_sales_price") AS "total_sales" FROM "store_sales" AS "store_sales" - JOIN "item_2" AS "item" - ON "store_sales"."ss_item_sk" = "item"."i_item_sk" - LEFT JOIN "_u_0" AS "_u_0" - ON "item"."i_item_id" = "_u_0"."i_item_id" JOIN "date_dim_2" AS "date_dim" ON "store_sales"."ss_sold_date_sk" = "date_dim"."d_date_sk" JOIN "customer_address_2" AS "customer_address" ON "store_sales"."ss_addr_sk" = "customer_address"."ca_address_sk" + JOIN "item_2" AS "item" + ON "store_sales"."ss_item_sk" = "item"."i_item_sk" + LEFT JOIN "_u_0" AS "_u_0" + ON "item"."i_item_id" = "_u_0"."i_item_id" WHERE NOT "_u_0"."i_item_id" IS NULL GROUP BY @@ -6724,14 +6720,14 @@ WITH "item_2" AS ( "item"."i_item_id" AS "i_item_id", SUM("catalog_sales"."cs_ext_sales_price") AS "total_sales" FROM "catalog_sales" AS "catalog_sales" - JOIN "item_2" AS "item" - ON "catalog_sales"."cs_item_sk" = "item"."i_item_sk" - LEFT JOIN "_u_0" AS "_u_1" - ON "item"."i_item_id" = "_u_1"."i_item_id" JOIN "date_dim_2" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" JOIN "customer_address_2" AS "customer_address" ON "catalog_sales"."cs_bill_addr_sk" = "customer_address"."ca_address_sk" + JOIN "item_2" AS "item" + ON "catalog_sales"."cs_item_sk" = "item"."i_item_sk" + LEFT JOIN "_u_0" AS "_u_1" + ON "item"."i_item_id" = "_u_1"."i_item_id" WHERE NOT "_u_1"."i_item_id" IS NULL GROUP BY @@ -6741,14 +6737,14 @@ WITH "item_2" AS ( "item"."i_item_id" AS "i_item_id", SUM("web_sales"."ws_ext_sales_price") AS "total_sales" FROM "web_sales" AS "web_sales" - JOIN "item_2" AS "item" - ON "web_sales"."ws_item_sk" = "item"."i_item_sk" - LEFT JOIN "_u_0" AS "_u_2" - ON "item"."i_item_id" = "_u_2"."i_item_id" JOIN "date_dim_2" AS "date_dim" ON "web_sales"."ws_sold_date_sk" = "date_dim"."d_date_sk" JOIN "customer_address_2" AS "customer_address" ON "web_sales"."ws_bill_addr_sk" = "customer_address"."ca_address_sk" + JOIN "item_2" AS "item" + ON "web_sales"."ws_item_sk" = "item"."i_item_sk" + LEFT JOIN "_u_0" AS "_u_2" + ON "item"."i_item_id" = "_u_2"."i_item_id" WHERE NOT "_u_2"."i_item_id" IS NULL GROUP BY @@ -7075,12 +7071,13 @@ WITH "wss" AS ( "wss"."thu_sales" AS "thu_sales2", "wss"."fri_sales" AS "fri_sales2", "wss"."sat_sales" AS "sat_sales2" - FROM "wss", "store" AS "store", "date_dim" AS "date_dim" - WHERE - "date_dim"."d_month_seq" <= 1219 + FROM "wss" + JOIN "store" AS "store" + ON "wss"."ss_store_sk" = "store"."s_store_sk" + JOIN "date_dim" AS "date_dim" + ON "date_dim"."d_month_seq" <= 1219 AND "date_dim"."d_month_seq" >= 1208 AND "date_dim"."d_week_seq" = "wss"."d_week_seq" - AND "wss"."ss_store_sk" = "store"."s_store_sk" ) SELECT "store"."s_store_name" AS "s_store_name1", @@ -7177,20 +7174,7 @@ GROUP BY i_item_id ORDER BY i_item_id, total_sales LIMIT 100; -WITH "item_2" AS ( - SELECT - "item"."i_item_sk" AS "i_item_sk", - "item"."i_item_id" AS "i_item_id" - FROM "item" AS "item" -), "_u_0" AS ( - SELECT - "item"."i_item_id" AS "i_item_id" - FROM "item" AS "item" - WHERE - "item"."i_category" IN ('Jewelry') - GROUP BY - "item"."i_item_id" -), "date_dim_2" AS ( +WITH "date_dim_2" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_year" AS "d_year", @@ -7205,19 +7189,32 @@ WITH "item_2" AS ( FROM "customer_address" AS "customer_address" WHERE "customer_address"."ca_gmt_offset" = -6 +), "item_2" AS ( + SELECT + "item"."i_item_sk" AS "i_item_sk", + "item"."i_item_id" AS "i_item_id" + FROM "item" AS "item" +), "_u_0" AS ( + SELECT + "item"."i_item_id" AS "i_item_id" + FROM "item" AS "item" + WHERE + "item"."i_category" IN ('Jewelry') + GROUP BY + "item"."i_item_id" ), "ss" AS ( SELECT "item"."i_item_id" AS "i_item_id", SUM("store_sales"."ss_ext_sales_price") AS "total_sales" FROM "store_sales" AS "store_sales" - JOIN "item_2" AS "item" - ON "store_sales"."ss_item_sk" = "item"."i_item_sk" - LEFT JOIN "_u_0" AS "_u_0" - ON "item"."i_item_id" = "_u_0"."i_item_id" JOIN "date_dim_2" AS "date_dim" ON "store_sales"."ss_sold_date_sk" = "date_dim"."d_date_sk" JOIN "customer_address_2" AS "customer_address" ON "store_sales"."ss_addr_sk" = "customer_address"."ca_address_sk" + JOIN "item_2" AS "item" + ON "store_sales"."ss_item_sk" = "item"."i_item_sk" + LEFT JOIN "_u_0" AS "_u_0" + ON "item"."i_item_id" = "_u_0"."i_item_id" WHERE NOT "_u_0"."i_item_id" IS NULL GROUP BY @@ -7227,14 +7224,14 @@ WITH "item_2" AS ( "item"."i_item_id" AS "i_item_id", SUM("catalog_sales"."cs_ext_sales_price") AS "total_sales" FROM "catalog_sales" AS "catalog_sales" - JOIN "item_2" AS "item" - ON "catalog_sales"."cs_item_sk" = "item"."i_item_sk" - LEFT JOIN "_u_0" AS "_u_1" - ON "item"."i_item_id" = "_u_1"."i_item_id" JOIN "date_dim_2" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" JOIN "customer_address_2" AS "customer_address" ON "catalog_sales"."cs_bill_addr_sk" = "customer_address"."ca_address_sk" + JOIN "item_2" AS "item" + ON "catalog_sales"."cs_item_sk" = "item"."i_item_sk" + LEFT JOIN "_u_0" AS "_u_1" + ON "item"."i_item_id" = "_u_1"."i_item_id" WHERE NOT "_u_1"."i_item_id" IS NULL GROUP BY @@ -7244,14 +7241,14 @@ WITH "item_2" AS ( "item"."i_item_id" AS "i_item_id", SUM("web_sales"."ws_ext_sales_price") AS "total_sales" FROM "web_sales" AS "web_sales" - JOIN "item_2" AS "item" - ON "web_sales"."ws_item_sk" = "item"."i_item_sk" - LEFT JOIN "_u_0" AS "_u_2" - ON "item"."i_item_id" = "_u_2"."i_item_id" JOIN "date_dim_2" AS "date_dim" ON "web_sales"."ws_sold_date_sk" = "date_dim"."d_date_sk" JOIN "customer_address_2" AS "customer_address" ON "web_sales"."ws_bill_addr_sk" = "customer_address"."ca_address_sk" + JOIN "item_2" AS "item" + ON "web_sales"."ws_item_sk" = "item"."i_item_sk" + LEFT JOIN "_u_0" AS "_u_2" + ON "item"."i_item_id" = "_u_2"."i_item_id" WHERE NOT "_u_2"."i_item_id" IS NULL GROUP BY @@ -7395,13 +7392,17 @@ WITH "store_2" AS ( ), "all_sales" AS ( SELECT SUM("store_sales"."ss_ext_sales_price") AS "total" - FROM "store_sales" AS "store_sales", "store_2" AS "store", "date_dim_2" AS "date_dim", "customer_2" AS "customer", "customer_address_2" AS "customer_address", "item_2" AS "item" - WHERE - "customer_address"."ca_address_sk" = "customer"."c_current_addr_sk" - AND "store_sales"."ss_customer_sk" = "customer"."c_customer_sk" - AND "store_sales"."ss_item_sk" = "item"."i_item_sk" - AND "store_sales"."ss_sold_date_sk" = "date_dim"."d_date_sk" - AND "store_sales"."ss_store_sk" = "store"."s_store_sk" + FROM "store_sales" AS "store_sales" + JOIN "store_2" AS "store" + ON "store_sales"."ss_store_sk" = "store"."s_store_sk" + JOIN "date_dim_2" AS "date_dim" + ON "store_sales"."ss_sold_date_sk" = "date_dim"."d_date_sk" + JOIN "customer_2" AS "customer" + ON "store_sales"."ss_customer_sk" = "customer"."c_customer_sk" + JOIN "customer_address_2" AS "customer_address" + ON "customer_address"."ca_address_sk" = "customer"."c_current_addr_sk" + JOIN "item_2" AS "item" + ON "store_sales"."ss_item_sk" = "item"."i_item_sk" ) SELECT "promotional_sales"."promotions" AS "promotions", @@ -9111,17 +9112,17 @@ SELECT "customer_demographics"."cd_credit_rating" AS "cd_credit_rating", COUNT(*) AS "cnt3" FROM "customer" AS "customer" +JOIN "customer_address" AS "customer_address" + ON "customer"."c_current_addr_sk" = "customer_address"."ca_address_sk" + AND "customer_address"."ca_state" IN ('KS', 'AZ', 'NE') +JOIN "customer_demographics" AS "customer_demographics" + ON "customer_demographics"."cd_demo_sk" = "customer"."c_current_cdemo_sk" LEFT JOIN "_u_0" AS "_u_0" ON "customer"."c_customer_sk" = "_u_0"."_u_1" LEFT JOIN "_u_2" AS "_u_2" ON "customer"."c_customer_sk" = "_u_2"."_u_3" LEFT JOIN "_u_4" AS "_u_4" ON "customer"."c_customer_sk" = "_u_4"."_u_5" -JOIN "customer_address" AS "customer_address" - ON "customer"."c_current_addr_sk" = "customer_address"."ca_address_sk" - AND "customer_address"."ca_state" IN ('KS', 'AZ', 'NE') -JOIN "customer_demographics" AS "customer_demographics" - ON "customer_demographics"."cd_demo_sk" = "customer"."c_current_cdemo_sk" WHERE "_u_2"."_u_3" IS NULL AND "_u_4"."_u_5" IS NULL AND NOT "_u_0"."_u_1" IS NULL GROUP BY @@ -9186,7 +9187,7 @@ WITH "store_sales_2" AS ( "store_sales"."ss_store_sk" AS "ss_store_sk", "store_sales"."ss_net_profit" AS "ss_net_profit" FROM "store_sales" AS "store_sales" -), "date_dim_2" AS ( +), "d1" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_month_seq" AS "d_month_seq" @@ -9200,7 +9201,7 @@ WITH "store_sales_2" AS ( FROM "store_sales_2" AS "store_sales" JOIN "store" AS "store" ON "store"."s_store_sk" = "store_sales"."ss_store_sk" - JOIN "date_dim_2" AS "date_dim" + JOIN "d1" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" GROUP BY "store"."s_state" @@ -9220,12 +9221,12 @@ SELECT GROUPING("store"."s_state") + GROUPING("store"."s_county") AS "lochierarchy", RANK() OVER (PARTITION BY GROUPING("store"."s_state") + GROUPING("store"."s_county"), CASE WHEN GROUPING("store"."s_county") = 0 THEN "store"."s_state" END ORDER BY SUM("store_sales"."ss_net_profit") DESC) AS "rank_within_parent" FROM "store_sales_2" AS "store_sales" +JOIN "d1" AS "d1" + ON "d1"."d_date_sk" = "store_sales"."ss_sold_date_sk" JOIN "store" AS "store" ON "store"."s_store_sk" = "store_sales"."ss_store_sk" LEFT JOIN "_u_0" AS "_u_0" ON "store"."s_state" = "_u_0"."s_state" -JOIN "date_dim_2" AS "d1" - ON "d1"."d_date_sk" = "store_sales"."ss_sold_date_sk" WHERE NOT "_u_0"."s_state" IS NULL GROUP BY @@ -9235,7 +9236,7 @@ ROLLUP ( ) ORDER BY "lochierarchy" DESC, - CASE WHEN "lochierarchy" = 0 THEN "store"."s_state" END, + CASE WHEN "lochierarchy" = 0 THEN "s_state" END, "rank_within_parent" LIMIT 100; @@ -9303,27 +9304,24 @@ WITH "date_dim_2" AS ( "catalog_sales"."cs_item_sk" AS "sold_item_sk", "catalog_sales"."cs_sold_time_sk" AS "time_sk" FROM "catalog_sales" AS "catalog_sales" - CROSS JOIN "date_dim_2" AS "date_dim" - WHERE - "date_dim"."d_date_sk" = "catalog_sales"."cs_sold_date_sk" + JOIN "date_dim_2" AS "date_dim" + ON "date_dim"."d_date_sk" = "catalog_sales"."cs_sold_date_sk" UNION ALL SELECT "store_sales"."ss_ext_sales_price" AS "ext_price", "store_sales"."ss_item_sk" AS "sold_item_sk", "store_sales"."ss_sold_time_sk" AS "time_sk" FROM "store_sales" AS "store_sales" - CROSS JOIN "date_dim_2" AS "date_dim" - WHERE - "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" + JOIN "date_dim_2" AS "date_dim" + ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" ), "tmp" AS ( SELECT "web_sales"."ws_ext_sales_price" AS "ext_price", "web_sales"."ws_item_sk" AS "sold_item_sk", "web_sales"."ws_sold_time_sk" AS "time_sk" FROM "web_sales" AS "web_sales" - CROSS JOIN "date_dim_2" AS "date_dim" - WHERE - "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" + JOIN "date_dim_2" AS "date_dim" + ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" UNION ALL SELECT "cte_4"."ext_price" AS "ext_price", @@ -10932,13 +10930,13 @@ SELECT "customer_address"."ca_location_type" AS "ca_location_type", "ctr1"."ctr_total_return" AS "ctr_total_return" FROM "customer_total_return" AS "ctr1" -LEFT JOIN "_u_0" AS "_u_0" - ON "ctr1"."ctr_state" = "_u_0"."_u_1" JOIN "customer" AS "customer" ON "ctr1"."ctr_customer_sk" = "customer"."c_customer_sk" JOIN "customer_address" AS "customer_address" ON "customer_address"."ca_address_sk" = "customer"."c_current_addr_sk" AND "customer_address"."ca_state" = 'TX' +LEFT JOIN "_u_0" AS "_u_0" + ON "ctr1"."ctr_state" = "_u_0"."_u_1" WHERE "ctr1"."ctr_total_return" > "_u_0"."_col_0" ORDER BY @@ -11089,7 +11087,12 @@ WHERE sr_items.item_id = cr_items.item_id ORDER BY sr_items.item_id, sr_item_qty LIMIT 100; -WITH "date_dim_2" AS ( +WITH "item_2" AS ( + SELECT + "item"."i_item_sk" AS "i_item_sk", + "item"."i_item_id" AS "i_item_id" + FROM "item" AS "item" +), "date_dim_2" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_date" AS "d_date" @@ -11112,22 +11115,17 @@ WITH "date_dim_2" AS ( NOT "_u_0"."d_week_seq" IS NULL GROUP BY "date_dim"."d_date" -), "item_2" AS ( - SELECT - "item"."i_item_sk" AS "i_item_sk", - "item"."i_item_id" AS "i_item_id" - FROM "item" AS "item" ), "sr_items" AS ( SELECT "item"."i_item_id" AS "item_id", SUM("store_returns"."sr_return_quantity") AS "sr_item_qty" FROM "store_returns" AS "store_returns" + JOIN "item_2" AS "item" + ON "store_returns"."sr_item_sk" = "item"."i_item_sk" JOIN "date_dim_2" AS "date_dim" ON "store_returns"."sr_returned_date_sk" = "date_dim"."d_date_sk" LEFT JOIN "_u_1" AS "_u_1" ON "date_dim"."d_date" = "_u_1"."d_date" - JOIN "item_2" AS "item" - ON "store_returns"."sr_item_sk" = "item"."i_item_sk" WHERE NOT "_u_1"."d_date" IS NULL GROUP BY @@ -11147,12 +11145,12 @@ WITH "date_dim_2" AS ( "item"."i_item_id" AS "item_id", SUM("catalog_returns"."cr_return_quantity") AS "cr_item_qty" FROM "catalog_returns" AS "catalog_returns" + JOIN "item_2" AS "item" + ON "catalog_returns"."cr_item_sk" = "item"."i_item_sk" JOIN "date_dim_2" AS "date_dim" ON "catalog_returns"."cr_returned_date_sk" = "date_dim"."d_date_sk" LEFT JOIN "_u_3" AS "_u_3" ON "date_dim"."d_date" = "_u_3"."d_date" - JOIN "item_2" AS "item" - ON "catalog_returns"."cr_item_sk" = "item"."i_item_sk" WHERE NOT "_u_3"."d_date" IS NULL GROUP BY @@ -11172,12 +11170,12 @@ WITH "date_dim_2" AS ( "item"."i_item_id" AS "item_id", SUM("web_returns"."wr_return_quantity") AS "wr_item_qty" FROM "web_returns" AS "web_returns" + JOIN "item_2" AS "item" + ON "web_returns"."wr_item_sk" = "item"."i_item_sk" JOIN "date_dim_2" AS "date_dim" ON "web_returns"."wr_returned_date_sk" = "date_dim"."d_date_sk" LEFT JOIN "_u_5" AS "_u_5" ON "date_dim"."d_date" = "_u_5"."d_date" - JOIN "item_2" AS "item" - ON "web_returns"."wr_item_sk" = "item"."i_item_sk" WHERE NOT "_u_5"."d_date" IS NULL GROUP BY @@ -11437,7 +11435,7 @@ ROLLUP ( ) ORDER BY "lochierarchy" DESC, - CASE WHEN "lochierarchy" = 0 THEN "item"."i_category" END, + CASE WHEN "lochierarchy" = 0 THEN "i_category" END, "rank_within_parent" LIMIT 100; @@ -11687,73 +11685,87 @@ WITH "store_sales_2" AS ( ), "s2" AS ( SELECT COUNT(*) AS "h9_to_9_30" - FROM "store_sales_2" AS "store_sales", "household_demographics_2" AS "household_demographics", "time_dim" AS "time_dim", "store_2" AS "store" - WHERE - "store_sales"."ss_hdemo_sk" = "household_demographics"."hd_demo_sk" - AND "store_sales"."ss_sold_time_sk" = "time_dim"."t_time_sk" - AND "store_sales"."ss_store_sk" = "store"."s_store_sk" + FROM "store_sales_2" AS "store_sales" + JOIN "household_demographics_2" AS "household_demographics" + ON "store_sales"."ss_hdemo_sk" = "household_demographics"."hd_demo_sk" + JOIN "time_dim" AS "time_dim" + ON "store_sales"."ss_sold_time_sk" = "time_dim"."t_time_sk" AND "time_dim"."t_hour" = 9 AND "time_dim"."t_minute" < 30 + JOIN "store_2" AS "store" + ON "store_sales"."ss_store_sk" = "store"."s_store_sk" ), "s3" AS ( SELECT COUNT(*) AS "h9_30_to_10" - FROM "store_sales_2" AS "store_sales", "household_demographics_2" AS "household_demographics", "time_dim" AS "time_dim", "store_2" AS "store" - WHERE - "store_sales"."ss_hdemo_sk" = "household_demographics"."hd_demo_sk" - AND "store_sales"."ss_sold_time_sk" = "time_dim"."t_time_sk" - AND "store_sales"."ss_store_sk" = "store"."s_store_sk" + FROM "store_sales_2" AS "store_sales" + JOIN "household_demographics_2" AS "household_demographics" + ON "store_sales"."ss_hdemo_sk" = "household_demographics"."hd_demo_sk" + JOIN "time_dim" AS "time_dim" + ON "store_sales"."ss_sold_time_sk" = "time_dim"."t_time_sk" AND "time_dim"."t_hour" = 9 AND "time_dim"."t_minute" >= 30 + JOIN "store_2" AS "store" + ON "store_sales"."ss_store_sk" = "store"."s_store_sk" ), "s4" AS ( SELECT COUNT(*) AS "h10_to_10_30" - FROM "store_sales_2" AS "store_sales", "household_demographics_2" AS "household_demographics", "time_dim" AS "time_dim", "store_2" AS "store" - WHERE - "store_sales"."ss_hdemo_sk" = "household_demographics"."hd_demo_sk" - AND "store_sales"."ss_sold_time_sk" = "time_dim"."t_time_sk" - AND "store_sales"."ss_store_sk" = "store"."s_store_sk" + FROM "store_sales_2" AS "store_sales" + JOIN "household_demographics_2" AS "household_demographics" + ON "store_sales"."ss_hdemo_sk" = "household_demographics"."hd_demo_sk" + JOIN "time_dim" AS "time_dim" + ON "store_sales"."ss_sold_time_sk" = "time_dim"."t_time_sk" AND "time_dim"."t_hour" = 10 AND "time_dim"."t_minute" < 30 + JOIN "store_2" AS "store" + ON "store_sales"."ss_store_sk" = "store"."s_store_sk" ), "s5" AS ( SELECT COUNT(*) AS "h10_30_to_11" - FROM "store_sales_2" AS "store_sales", "household_demographics_2" AS "household_demographics", "time_dim" AS "time_dim", "store_2" AS "store" - WHERE - "store_sales"."ss_hdemo_sk" = "household_demographics"."hd_demo_sk" - AND "store_sales"."ss_sold_time_sk" = "time_dim"."t_time_sk" - AND "store_sales"."ss_store_sk" = "store"."s_store_sk" + FROM "store_sales_2" AS "store_sales" + JOIN "household_demographics_2" AS "household_demographics" + ON "store_sales"."ss_hdemo_sk" = "household_demographics"."hd_demo_sk" + JOIN "time_dim" AS "time_dim" + ON "store_sales"."ss_sold_time_sk" = "time_dim"."t_time_sk" AND "time_dim"."t_hour" = 10 AND "time_dim"."t_minute" >= 30 + JOIN "store_2" AS "store" + ON "store_sales"."ss_store_sk" = "store"."s_store_sk" ), "s6" AS ( SELECT COUNT(*) AS "h11_to_11_30" - FROM "store_sales_2" AS "store_sales", "household_demographics_2" AS "household_demographics", "time_dim" AS "time_dim", "store_2" AS "store" - WHERE - "store_sales"."ss_hdemo_sk" = "household_demographics"."hd_demo_sk" - AND "store_sales"."ss_sold_time_sk" = "time_dim"."t_time_sk" - AND "store_sales"."ss_store_sk" = "store"."s_store_sk" + FROM "store_sales_2" AS "store_sales" + JOIN "household_demographics_2" AS "household_demographics" + ON "store_sales"."ss_hdemo_sk" = "household_demographics"."hd_demo_sk" + JOIN "time_dim" AS "time_dim" + ON "store_sales"."ss_sold_time_sk" = "time_dim"."t_time_sk" AND "time_dim"."t_hour" = 11 AND "time_dim"."t_minute" < 30 + JOIN "store_2" AS "store" + ON "store_sales"."ss_store_sk" = "store"."s_store_sk" ), "s7" AS ( SELECT COUNT(*) AS "h11_30_to_12" - FROM "store_sales_2" AS "store_sales", "household_demographics_2" AS "household_demographics", "time_dim" AS "time_dim", "store_2" AS "store" - WHERE - "store_sales"."ss_hdemo_sk" = "household_demographics"."hd_demo_sk" - AND "store_sales"."ss_sold_time_sk" = "time_dim"."t_time_sk" - AND "store_sales"."ss_store_sk" = "store"."s_store_sk" + FROM "store_sales_2" AS "store_sales" + JOIN "household_demographics_2" AS "household_demographics" + ON "store_sales"."ss_hdemo_sk" = "household_demographics"."hd_demo_sk" + JOIN "time_dim" AS "time_dim" + ON "store_sales"."ss_sold_time_sk" = "time_dim"."t_time_sk" AND "time_dim"."t_hour" = 11 AND "time_dim"."t_minute" >= 30 + JOIN "store_2" AS "store" + ON "store_sales"."ss_store_sk" = "store"."s_store_sk" ), "s8" AS ( SELECT COUNT(*) AS "h12_to_12_30" - FROM "store_sales_2" AS "store_sales", "household_demographics_2" AS "household_demographics", "time_dim" AS "time_dim", "store_2" AS "store" - WHERE - "store_sales"."ss_hdemo_sk" = "household_demographics"."hd_demo_sk" - AND "store_sales"."ss_sold_time_sk" = "time_dim"."t_time_sk" - AND "store_sales"."ss_store_sk" = "store"."s_store_sk" + FROM "store_sales_2" AS "store_sales" + JOIN "household_demographics_2" AS "household_demographics" + ON "store_sales"."ss_hdemo_sk" = "household_demographics"."hd_demo_sk" + JOIN "time_dim" AS "time_dim" + ON "store_sales"."ss_sold_time_sk" = "time_dim"."t_time_sk" AND "time_dim"."t_hour" = 12 AND "time_dim"."t_minute" < 30 + JOIN "store_2" AS "store" + ON "store_sales"."ss_store_sk" = "store"."s_store_sk" ) SELECT "s1"."h8_30_to_9" AS "h8_30_to_9", @@ -11946,13 +11958,15 @@ WITH "web_sales_2" AS ( ), "pt" AS ( SELECT COUNT(*) AS "pmc" - FROM "web_sales_2" AS "web_sales", "household_demographics_2" AS "household_demographics", "time_dim" AS "time_dim", "web_page_2" AS "web_page" - WHERE - "time_dim"."t_hour" <= 21 + FROM "web_sales_2" AS "web_sales" + JOIN "household_demographics_2" AS "household_demographics" + ON "web_sales"."ws_ship_hdemo_sk" = "household_demographics"."hd_demo_sk" + JOIN "time_dim" AS "time_dim" + ON "time_dim"."t_hour" <= 21 AND "time_dim"."t_hour" >= 20 - AND "web_sales"."ws_ship_hdemo_sk" = "household_demographics"."hd_demo_sk" AND "web_sales"."ws_sold_time_sk" = "time_dim"."t_time_sk" - AND "web_sales"."ws_web_page_sk" = "web_page"."wp_web_page_sk" + JOIN "web_page_2" AS "web_page" + ON "web_sales"."ws_web_page_sk" = "web_page"."wp_web_page_sk" ) SELECT CAST("at1"."amc" AS DECIMAL(15, 4)) / CAST("pt"."pmc" AS DECIMAL(15, 4)) AS "am_pm_ratio" @@ -11997,10 +12011,10 @@ GROUP BY cc_call_center_id, cd_education_status ORDER BY Sum(cr_net_loss) DESC; SELECT - "call_center"."cc_call_center_id" AS Call_Center, - "call_center"."cc_name" AS Call_Center_Name, - "call_center"."cc_manager" AS Manager, - SUM("catalog_returns"."cr_net_loss") AS Returns_Loss + "call_center"."cc_call_center_id" AS "call_center", + "call_center"."cc_name" AS "call_center_name", + "call_center"."cc_manager" AS "manager", + SUM("catalog_returns"."cr_net_loss") AS "returns_loss" FROM "call_center" AS "call_center" JOIN "catalog_returns" AS "catalog_returns" ON "catalog_returns"."cr_call_center_sk" = "call_center"."cc_call_center_sk" @@ -12096,10 +12110,10 @@ SELECT FROM "web_sales_2" AS "web_sales" JOIN "item" AS "item" ON "item"."i_item_sk" = "web_sales"."ws_item_sk" AND "item"."i_manufact_id" = 718 -LEFT JOIN "_u_0" AS "_u_0" - ON "_u_0"."_u_1" = "item"."i_item_sk" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" +LEFT JOIN "_u_0" AS "_u_0" + ON "_u_0"."_u_1" = "item"."i_item_sk" WHERE "web_sales"."ws_ext_discount_amt" > "_u_0"."_col_0" ORDER BY @@ -12208,10 +12222,6 @@ SELECT SUM("web_sales"."ws_ext_ship_cost") AS "total shipping cost", SUM("web_sales"."ws_net_profit") AS "total net profit" FROM "web_sales" AS "web_sales" -LEFT JOIN "_u_0" AS "_u_0" - ON "web_sales"."ws_order_number" = "_u_0"."_u_1" -LEFT JOIN "_u_3" AS "_u_3" - ON "web_sales"."ws_order_number" = "_u_3"."_u_4" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date" >= '2000-3-01' AND "web_sales"."ws_ship_date_sk" = "date_dim"."d_date_sk" @@ -12224,6 +12234,10 @@ JOIN "customer_address" AS "customer_address" JOIN "web_site" AS "web_site" ON "web_sales"."ws_web_site_sk" = "web_site"."web_site_sk" AND "web_site"."web_company_name" = 'pri' +LEFT JOIN "_u_0" AS "_u_0" + ON "web_sales"."ws_order_number" = "_u_0"."_u_1" +LEFT JOIN "_u_3" AS "_u_3" + ON "web_sales"."ws_order_number" = "_u_3"."_u_4" WHERE "_u_3"."_u_4" IS NULL AND ARRAY_ANY("_u_0"."_u_2", "_x" -> "web_sales"."ws_warehouse_sk" <> "_x") @@ -12303,10 +12317,6 @@ SELECT SUM("web_sales"."ws_ext_ship_cost") AS "total shipping cost", SUM("web_sales"."ws_net_profit") AS "total net profit" FROM "web_sales" AS "web_sales" -LEFT JOIN "_u_0" AS "_u_0" - ON "web_sales"."ws_order_number" = "_u_0"."ws_order_number" -LEFT JOIN "_u_1" AS "_u_1" - ON "web_sales"."ws_order_number" = "_u_1"."wr_order_number" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date" >= '2000-4-01' AND "web_sales"."ws_ship_date_sk" = "date_dim"."d_date_sk" @@ -12319,6 +12329,10 @@ JOIN "customer_address" AS "customer_address" JOIN "web_site" AS "web_site" ON "web_sales"."ws_web_site_sk" = "web_site"."web_site_sk" AND "web_site"."web_company_name" = 'pri' +LEFT JOIN "_u_0" AS "_u_0" + ON "web_sales"."ws_order_number" = "_u_0"."ws_order_number" +LEFT JOIN "_u_1" AS "_u_1" + ON "web_sales"."ws_order_number" = "_u_1"."wr_order_number" WHERE NOT "_u_0"."ws_order_number" IS NULL AND NOT "_u_1"."wr_order_number" IS NULL ORDER BY diff --git a/tests/fixtures/optimizer/tpc-h/tpc-h.sql b/tests/fixtures/optimizer/tpc-h/tpc-h.sql index ae50f92..a25e247 100644 --- a/tests/fixtures/optimizer/tpc-h/tpc-h.sql +++ b/tests/fixtures/optimizer/tpc-h/tpc-h.sql @@ -99,19 +99,19 @@ order by p_partkey limit 100; -WITH "partsupp_2" AS ( - SELECT - "partsupp"."ps_partkey" AS "ps_partkey", - "partsupp"."ps_suppkey" AS "ps_suppkey", - "partsupp"."ps_supplycost" AS "ps_supplycost" - FROM "partsupp" AS "partsupp" -), "region_2" AS ( +WITH "region_2" AS ( SELECT "region"."r_regionkey" AS "r_regionkey", "region"."r_name" AS "r_name" FROM "region" AS "region" WHERE "region"."r_name" = 'EUROPE' +), "partsupp_2" AS ( + SELECT + "partsupp"."ps_partkey" AS "ps_partkey", + "partsupp"."ps_suppkey" AS "ps_suppkey", + "partsupp"."ps_supplycost" AS "ps_supplycost" + FROM "partsupp" AS "partsupp" ), "_u_0" AS ( SELECT MIN("partsupp"."ps_supplycost") AS "_col_0", @@ -136,8 +136,6 @@ SELECT "supplier"."s_phone" AS "s_phone", "supplier"."s_comment" AS "s_comment" FROM "part" AS "part" -LEFT JOIN "_u_0" AS "_u_0" - ON "part"."p_partkey" = "_u_0"."_u_1" CROSS JOIN "region_2" AS "region" JOIN "nation" AS "nation" ON "nation"."n_regionkey" = "region"."r_regionkey" @@ -146,6 +144,8 @@ JOIN "partsupp_2" AS "partsupp" JOIN "supplier" AS "supplier" ON "supplier"."s_nationkey" = "nation"."n_nationkey" AND "supplier"."s_suppkey" = "partsupp"."ps_suppkey" +LEFT JOIN "_u_0" AS "_u_0" + ON "part"."p_partkey" = "_u_0"."_u_1" WHERE "part"."p_size" = 15 AND "part"."p_type" LIKE '%BRASS' @@ -681,11 +681,11 @@ SELECT "partsupp"."ps_partkey" AS "ps_partkey", SUM("partsupp"."ps_supplycost" * "partsupp"."ps_availqty") AS "value" FROM "partsupp" AS "partsupp" -CROSS JOIN "_u_0" AS "_u_0" JOIN "supplier_2" AS "supplier" ON "partsupp"."ps_suppkey" = "supplier"."s_suppkey" JOIN "nation_2" AS "nation" ON "supplier"."s_nationkey" = "nation"."n_nationkey" +CROSS JOIN "_u_0" AS "_u_0" GROUP BY "partsupp"."ps_partkey" HAVING @@ -950,13 +950,13 @@ SELECT "part"."p_size" AS "p_size", COUNT(DISTINCT "partsupp"."ps_suppkey") AS "supplier_cnt" FROM "partsupp" AS "partsupp" -LEFT JOIN "_u_0" AS "_u_0" - ON "partsupp"."ps_suppkey" = "_u_0"."s_suppkey" JOIN "part" AS "part" ON "part"."p_brand" <> 'Brand#45' AND "part"."p_partkey" = "partsupp"."ps_partkey" AND "part"."p_size" IN (49, 14, 23, 45, 19, 3, 36, 9) AND NOT "part"."p_type" LIKE 'MEDIUM POLISHED%' +LEFT JOIN "_u_0" AS "_u_0" + ON "partsupp"."ps_suppkey" = "_u_0"."s_suppkey" WHERE "_u_0"."s_suppkey" IS NULL GROUP BY @@ -1066,10 +1066,10 @@ SELECT FROM "customer" AS "customer" JOIN "orders" AS "orders" ON "customer"."c_custkey" = "orders"."o_custkey" -LEFT JOIN "_u_0" AS "_u_0" - ON "orders"."o_orderkey" = "_u_0"."l_orderkey" JOIN "lineitem" AS "lineitem" ON "orders"."o_orderkey" = "lineitem"."l_orderkey" +LEFT JOIN "_u_0" AS "_u_0" + ON "orders"."o_orderkey" = "_u_0"."l_orderkey" WHERE NOT "_u_0"."l_orderkey" IS NULL GROUP BY @@ -1260,10 +1260,10 @@ SELECT "supplier"."s_name" AS "s_name", "supplier"."s_address" AS "s_address" FROM "supplier" AS "supplier" -LEFT JOIN "_u_4" AS "_u_4" - ON "supplier"."s_suppkey" = "_u_4"."ps_suppkey" JOIN "nation" AS "nation" ON "nation"."n_name" = 'CANADA' AND "supplier"."s_nationkey" = "nation"."n_nationkey" +LEFT JOIN "_u_4" AS "_u_4" + ON "supplier"."s_suppkey" = "_u_4"."ps_suppkey" WHERE NOT "_u_4"."ps_suppkey" IS NULL ORDER BY @@ -1337,15 +1337,15 @@ FROM "supplier" AS "supplier" JOIN "lineitem" AS "lineitem" ON "lineitem"."l_receiptdate" > "lineitem"."l_commitdate" AND "supplier"."s_suppkey" = "lineitem"."l_suppkey" -LEFT JOIN "_u_0" AS "_u_0" - ON "_u_0"."l_orderkey" = "lineitem"."l_orderkey" -LEFT JOIN "_u_2" AS "_u_2" - ON "_u_2"."l_orderkey" = "lineitem"."l_orderkey" JOIN "orders" AS "orders" ON "orders"."o_orderkey" = "lineitem"."l_orderkey" AND "orders"."o_orderstatus" = 'F' JOIN "nation" AS "nation" ON "nation"."n_name" = 'SAUDI ARABIA' AND "supplier"."s_nationkey" = "nation"."n_nationkey" +LEFT JOIN "_u_0" AS "_u_0" + ON "_u_0"."l_orderkey" = "lineitem"."l_orderkey" +LEFT JOIN "_u_2" AS "_u_2" + ON "_u_2"."l_orderkey" = "lineitem"."l_orderkey" WHERE ( "_u_2"."l_orderkey" IS NULL diff --git a/tests/fixtures/partial.sql b/tests/fixtures/partial.sql index c6be364..6c578ab 100644 --- a/tests/fixtures/partial.sql +++ b/tests/fixtures/partial.sql @@ -2,7 +2,7 @@ SELECT a FROM SELECT a FROM x WHERE SELECT a + a * -SELECT a FROM x JOIN +SELECT a FROM x, SELECT a FROM x GROUP BY WITH a AS (SELECT 1), b AS (SELECT 2) SELECT FROM x diff --git a/tests/test_build.py b/tests/test_build.py index 509b857..1e28689 100644 --- a/tests/test_build.py +++ b/tests/test_build.py @@ -54,6 +54,8 @@ class TestBuild(unittest.TestCase): (lambda: 1 >= x, "x <= 1"), (lambda: x.eq(1), "x = 1"), (lambda: x.neq(1), "x <> 1"), + (lambda: x.is_(exp.Null()), "x IS NULL"), + (lambda: x.as_("y"), "x AS y"), (lambda: x.isin(1, "2"), "x IN (1, '2')"), (lambda: x.isin(query="select 1"), "x IN (SELECT 1)"), (lambda: x.between(1, 2), "x BETWEEN 1 AND 2"), @@ -86,13 +88,8 @@ class TestBuild(unittest.TestCase): lambda: select("x").select("y", append=False).from_("tbl"), "SELECT y FROM tbl", ), - (lambda: select("x").from_("tbl").from_("tbl2"), "SELECT x FROM tbl, tbl2"), ( - lambda: select("x").from_("tbl, tbl2", "tbl3").from_("tbl4"), - "SELECT x FROM tbl, tbl2, tbl3, tbl4", - ), - ( - lambda: select("x").from_("tbl").from_("tbl2", append=False), + lambda: select("x").from_("tbl").from_("tbl2"), "SELECT x FROM tbl2", ), (lambda: select("SUM(x) AS y"), "SELECT SUM(x) AS y"), @@ -285,10 +282,12 @@ class TestBuild(unittest.TestCase): ( lambda: select("x").from_("tbl").cluster_by("y"), "SELECT x FROM tbl CLUSTER BY y", + "hive", ), ( lambda: select("x").from_("tbl").sort_by("y"), "SELECT x FROM tbl SORT BY y", + "hive", ), ( lambda: select("x").from_("tbl").order_by("x, y DESC"), @@ -297,10 +296,12 @@ class TestBuild(unittest.TestCase): ( lambda: select("x").from_("tbl").cluster_by("x, y DESC"), "SELECT x FROM tbl CLUSTER BY x, y DESC", + "hive", ), ( lambda: select("x").from_("tbl").sort_by("x, y DESC"), "SELECT x FROM tbl SORT BY x, y DESC", + "hive", ), ( lambda: select("x", "y", "z", "a").from_("tbl").order_by("x, y", "z").order_by("a"), @@ -312,10 +313,12 @@ class TestBuild(unittest.TestCase): .cluster_by("x, y", "z") .cluster_by("a"), "SELECT x, y, z, a FROM tbl CLUSTER BY x, y, z, a", + "hive", ), ( lambda: select("x", "y", "z", "a").from_("tbl").sort_by("x, y", "z").sort_by("a"), "SELECT x, y, z, a FROM tbl SORT BY x, y, z, a", + "hive", ), (lambda: select("x").from_("tbl").limit(10), "SELECT x FROM tbl LIMIT 10"), ( @@ -393,7 +396,7 @@ class TestBuild(unittest.TestCase): .with_("tbl", as_=select("x").from_("tbl2")) .from_("tbl") .join("tbl3"), - "WITH tbl AS (SELECT x FROM tbl2) SELECT x FROM tbl JOIN tbl3", + "WITH tbl AS (SELECT x FROM tbl2) SELECT x FROM tbl, tbl3", ), ( lambda: select("x") @@ -594,6 +597,22 @@ class TestBuild(unittest.TestCase): "postgres", ), ( + lambda: exp.insert("SELECT * FROM tbl2", "tbl"), + "INSERT INTO tbl SELECT * FROM tbl2", + ), + ( + lambda: exp.insert("SELECT * FROM tbl2", "tbl", overwrite=True), + "INSERT OVERWRITE TABLE tbl SELECT * FROM tbl2", + ), + ( + lambda: exp.insert("VALUES (1, 2), (3, 4)", "tbl", columns=["cola", "colb"]), + "INSERT INTO tbl (cola, colb) VALUES (1, 2), (3, 4)", + ), + ( + lambda: exp.insert("SELECT * FROM cte", "t").with_("cte", as_="SELECT x FROM tbl"), + "WITH cte AS (SELECT x FROM tbl) INSERT INTO t SELECT * FROM cte", + ), + ( lambda: exp.convert((exp.column("x"), exp.column("y"))).isin((1, 2), (3, 4)), "(x, y) IN ((1, 2), (3, 4))", "postgres", diff --git a/tests/test_executor.py b/tests/test_executor.py index 6bf7d6a..a121dea 100644 --- a/tests/test_executor.py +++ b/tests/test_executor.py @@ -58,6 +58,7 @@ class TestExecutor(unittest.TestCase): def test_py_dialect(self): self.assertEqual(Python().generate(parse_one("'x '''")), r"'x \''") + self.assertEqual(Python().generate(parse_one("MAP([1], [2])")), "MAP([1], [2])") def test_optimized_tpch(self): for i, (sql, optimized) in enumerate(self.sqls[:20], start=1): @@ -566,6 +567,7 @@ class TestExecutor(unittest.TestCase): ("IF(false, 1, 0)", 0), ("CASE WHEN 0 = 1 THEN 'foo' ELSE 'bar' END", "bar"), ("CAST('2022-01-01' AS DATE) + INTERVAL '1' DAY", date(2022, 1, 2)), + ("INTERVAL '1' week", datetime.timedelta(weeks=1)), ("1 IN (1, 2, 3)", True), ("1 IN (2, 3)", False), ("NULL IS NULL", True), @@ -592,6 +594,14 @@ class TestExecutor(unittest.TestCase): ), ("YEAR(CURRENT_DATE()) = (YEAR(CURRENT_DATE()))", True), ("YEAR(CURRENT_DATE()) <> (YEAR(CURRENT_DATE()))", False), + ("1::bool", True), + ("0::bool", False), + ("MAP(['a'], [1]).a", 1), + ("MAP()", {}), + ("STRFTIME('%j', '2023-03-23 15:00:00')", "082"), + ("STRFTIME('%j', NULL)", None), + ("DATESTRTODATE('2022-01-01')", date(2022, 1, 1)), + ("TIMESTRTOTIME('2022-01-01')", datetime.datetime(2022, 1, 1)), ]: with self.subTest(sql): result = execute(f"SELECT {sql}") @@ -599,5 +609,27 @@ class TestExecutor(unittest.TestCase): def test_case_sensitivity(self): result = execute("SELECT A AS A FROM X", tables={"x": [{"a": 1}]}) + self.assertEqual(result.columns, ("a",)) + self.assertEqual(result.rows, [(1,)]) + + result = execute('SELECT A AS "A" FROM X', tables={"x": [{"a": 1}]}) self.assertEqual(result.columns, ("A",)) self.assertEqual(result.rows, [(1,)]) + + def test_nested_table_reference(self): + tables = { + "some_catalog": { + "some_schema": { + "some_table": [ + {"id": 1, "price": 1.0}, + {"id": 2, "price": 2.0}, + {"id": 3, "price": 3.0}, + ] + } + } + } + + result = execute("SELECT * FROM some_catalog.some_schema.some_table s", tables=tables) + + self.assertEqual(result.columns, ("id", "price")) + self.assertEqual(result.rows, [(1, 1.0), (2, 2.0), (3, 3.0)]) diff --git a/tests/test_expressions.py b/tests/test_expressions.py index e7588b5..7735e78 100644 --- a/tests/test_expressions.py +++ b/tests/test_expressions.py @@ -147,8 +147,8 @@ class TestExpressions(unittest.TestCase): ["a", "B", "e", "*", "zz", "z"], ) self.assertEqual( - [e.alias_or_name for e in expression.args["from"].expressions], - ["bar", "baz"], + {e.alias_or_name for e in expression.find_all(exp.Table)}, + {"bar", "baz"}, ) expression = parse_one( @@ -164,9 +164,10 @@ class TestExpressions(unittest.TestCase): ["first", "second"], ) + self.assertEqual("first", expression.args["from"].alias_or_name) self.assertEqual( - [e.alias_or_name for e in expression.args["from"].expressions], - ["first", "second", "third"], + [e.alias_or_name for e in expression.args["joins"]], + ["second", "third"], ) self.assertEqual(parse_one("x.*").name, "*") @@ -185,10 +186,10 @@ class TestExpressions(unittest.TestCase): def test_replace_tables(self): self.assertEqual( exp.replace_tables( - parse_one("select * from a AS a join b join c.a join d.a join e.a"), + parse_one("select * from a AS a, b, c.a, d.a cross join e.a"), {"a": "a1", "b": "b.a", "c.a": "c.a2", "d.a": "d2"}, ).sql(), - "SELECT * FROM a1 AS a JOIN b.a JOIN c.a2 JOIN d2 JOIN e.a", + "SELECT * FROM a1 AS a, b.a, c.a2, d2 CROSS JOIN e.a", ) def test_replace_placeholders(self): @@ -532,6 +533,7 @@ class TestExpressions(unittest.TestCase): self.assertIsInstance(parse_one("YEAR(a)"), exp.Year) self.assertIsInstance(parse_one("HLL(a)"), exp.Hll) self.assertIsInstance(parse_one("ARRAY(time, foo)"), exp.Array) + self.assertIsInstance(parse_one("STANDARD_HASH('hello', 'sha256')"), exp.StandardHash) def test_column(self): column = parse_one("a.b.c.d") @@ -610,7 +612,6 @@ class TestExpressions(unittest.TestCase): "FORMAT": "parquet", "PARTITIONED_BY": (exp.to_identifier("a"), exp.to_identifier("b")), "custom": 1, - "TABLE_FORMAT": exp.to_identifier("test_format"), "ENGINE": None, "COLLATE": True, } @@ -622,7 +623,6 @@ class TestExpressions(unittest.TestCase): this=exp.Tuple(expressions=[exp.to_identifier("a"), exp.to_identifier("b")]) ), exp.Property(this=exp.Literal.string("custom"), value=exp.Literal.number(1)), - exp.TableFormatProperty(this=exp.to_identifier("test_format")), exp.EngineProperty(this=exp.null()), exp.CollateProperty(this=exp.true()), ] @@ -726,10 +726,6 @@ FROM foo""", self.assertEqual(catalog_db_and_table.args.get("catalog"), exp.to_identifier("catalog")) with self.assertRaises(ValueError): exp.to_table(1) - empty_string = exp.to_table("") - self.assertEqual(empty_string.name, "") - self.assertIsNone(table_only.args.get("db")) - self.assertIsNone(table_only.args.get("catalog")) def test_to_column(self): column_only = exp.to_column("column_name") diff --git a/tests/test_helper.py b/tests/test_helper.py index d37c03a..82d917e 100644 --- a/tests/test_helper.py +++ b/tests/test_helper.py @@ -1,6 +1,7 @@ import unittest -from sqlglot.helper import tsort +from sqlglot.dialects import BigQuery, Dialect, Snowflake +from sqlglot.helper import name_sequence, tsort class TestHelper(unittest.TestCase): @@ -29,3 +30,40 @@ class TestHelper(unittest.TestCase): "c": [], } ) + + def test_compare_dialects(self): + bigquery_class = Dialect["bigquery"] + bigquery_object = BigQuery() + bigquery_string = "bigquery" + + snowflake_class = Dialect["snowflake"] + snowflake_object = Snowflake() + snowflake_string = "snowflake" + + self.assertEqual(snowflake_class, snowflake_class) + self.assertEqual(snowflake_class, snowflake_object) + self.assertEqual(snowflake_class, snowflake_string) + self.assertEqual(snowflake_object, snowflake_object) + self.assertEqual(snowflake_object, snowflake_string) + + self.assertNotEqual(snowflake_class, bigquery_class) + self.assertNotEqual(snowflake_class, bigquery_object) + self.assertNotEqual(snowflake_class, bigquery_string) + self.assertNotEqual(snowflake_object, bigquery_object) + self.assertNotEqual(snowflake_object, bigquery_string) + + self.assertTrue(snowflake_class in {"snowflake", "bigquery"}) + self.assertTrue(snowflake_object in {"snowflake", "bigquery"}) + self.assertFalse(snowflake_class in {"bigquery", "redshift"}) + self.assertFalse(snowflake_object in {"bigquery", "redshift"}) + + def test_name_sequence(self): + s1 = name_sequence("a") + s2 = name_sequence("b") + + self.assertEqual(s1(), "a0") + self.assertEqual(s1(), "a1") + self.assertEqual(s2(), "b0") + self.assertEqual(s1(), "a2") + self.assertEqual(s2(), "b1") + self.assertEqual(s2(), "b2") diff --git a/tests/test_lineage.py b/tests/test_lineage.py index 1d13dd3..f33a2c2 100644 --- a/tests/test_lineage.py +++ b/tests/test_lineage.py @@ -2,12 +2,18 @@ from __future__ import annotations import unittest +import sqlglot from sqlglot.lineage import lineage +from sqlglot.schema import MappingSchema class TestLineage(unittest.TestCase): maxDiff = None + @classmethod + def setUpClass(cls): + sqlglot.schema = MappingSchema() + def test_lineage(self) -> None: node = lineage( "a", @@ -140,17 +146,43 @@ class TestLineage(unittest.TestCase): self.assertEqual(node.alias, "") downstream = node.downstream[0] - self.assertEqual( - downstream.source.sql(), - "SELECT t.a AS a FROM (VALUES (1), (2)) AS t(a)", - ) + self.assertEqual(downstream.source.sql(), "SELECT t.a AS a FROM (VALUES (1), (2)) AS t(a)") self.assertEqual(downstream.expression.sql(), "t.a AS a") self.assertEqual(downstream.alias, "y") downstream = downstream.downstream[0] + self.assertEqual(downstream.source.sql(), "(VALUES (1), (2)) AS t(a)") + self.assertEqual(downstream.expression.sql(), "a") + self.assertEqual(downstream.alias, "") + + def test_lineage_cte_name_appears_in_schema(self) -> None: + schema = {"a": {"b": {"t1": {"c1": "int"}, "t2": {"c2": "int"}}}} + + node = lineage( + "c2", + "WITH t1 AS (SELECT * FROM a.b.t2), inter AS (SELECT * FROM t1) SELECT * FROM inter", + schema=schema, + ) + self.assertEqual( - downstream.source.sql(), - "(VALUES (1), (2)) AS t(a)", + node.source.sql(), + "WITH t1 AS (SELECT t2.c2 AS c2 FROM a.b.t2 AS t2), inter AS (SELECT t1.c2 AS c2 FROM t1) SELECT inter.c2 AS c2 FROM inter", ) - self.assertEqual(downstream.expression.sql(), "a") + self.assertEqual(node.alias, "") + + downstream = node.downstream[0] + self.assertEqual(downstream.source.sql(), "SELECT t1.c2 AS c2 FROM t1") + self.assertEqual(downstream.expression.sql(), "t1.c2 AS c2") + self.assertEqual(downstream.alias, "") + + downstream = downstream.downstream[0] + self.assertEqual(downstream.source.sql(), "SELECT t2.c2 AS c2 FROM a.b.t2 AS t2") + self.assertEqual(downstream.expression.sql(), "t2.c2 AS c2") + self.assertEqual(downstream.alias, "") + + downstream = downstream.downstream[0] + self.assertEqual(downstream.source.sql(), "a.b.t2 AS t2") + self.assertEqual(downstream.expression.sql(), "a.b.t2 AS t2") self.assertEqual(downstream.alias, "") + + self.assertEqual(downstream.downstream, []) diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py index 423cb84..2ae6da9 100644 --- a/tests/test_optimizer.py +++ b/tests/test_optimizer.py @@ -20,19 +20,20 @@ from tests.helpers import ( ) -def parse_and_optimize(func, sql, dialect, **kwargs): - return func(parse_one(sql, read=dialect), **kwargs) +def parse_and_optimize(func, sql, read_dialect, **kwargs): + return func(parse_one(sql, read=read_dialect), **kwargs) def qualify_columns(expression, **kwargs): - expression = optimizer.qualify_tables.qualify_tables(expression) - expression = optimizer.qualify_columns.qualify_columns(expression, **kwargs) + expression = optimizer.qualify.qualify( + expression, infer_schema=True, validate_qualify_columns=False, identify=False, **kwargs + ) return expression def pushdown_projections(expression, **kwargs): expression = optimizer.qualify_tables.qualify_tables(expression) - expression = optimizer.qualify_columns.qualify_columns(expression, **kwargs) + expression = optimizer.qualify_columns.qualify_columns(expression, infer_schema=True, **kwargs) expression = optimizer.pushdown_projections.pushdown_projections(expression, **kwargs) return expression @@ -98,7 +99,7 @@ class TestOptimizer(unittest.TestCase): }, } - def check_file(self, file, func, pretty=False, execute=False, **kwargs): + def check_file(self, file, func, pretty=False, execute=False, set_dialect=False, **kwargs): with ProcessPoolExecutor() as pool: results = {} @@ -113,6 +114,9 @@ class TestOptimizer(unittest.TestCase): if leave_tables_isolated is not None: func_kwargs["leave_tables_isolated"] = string_to_bool(leave_tables_isolated) + if set_dialect and dialect: + func_kwargs["dialect"] = dialect + future = pool.submit(parse_and_optimize, func, sql, dialect, **func_kwargs) results[future] = ( sql, @@ -141,13 +145,24 @@ class TestOptimizer(unittest.TestCase): assert_frame_equal(df1, df2) def test_optimize(self): + self.assertEqual(optimizer.optimize("x = 1 + 1", identify=None).sql(), "x = 2") + schema = { "x": {"a": "INT", "b": "INT"}, "y": {"b": "INT", "c": "INT"}, "z": {"a": "INT", "c": "INT"}, + "u": {"f": "INT", "g": "INT", "h": "TEXT"}, } - self.check_file("optimizer", optimizer.optimize, pretty=True, execute=True, schema=schema) + self.check_file( + "optimizer", + optimizer.optimize, + infer_schema=True, + pretty=True, + execute=True, + schema=schema, + set_dialect=True, + ) def test_isolate_table_selects(self): self.check_file( @@ -183,6 +198,15 @@ class TestOptimizer(unittest.TestCase): self.check_file("normalize", normalize) def test_qualify_columns(self): + self.assertEqual( + optimizer.qualify_columns.qualify_columns( + parse_one("select y from x"), + schema={}, + infer_schema=False, + ).sql(), + "SELECT y AS y FROM x", + ) + self.check_file("qualify_columns", qualify_columns, execute=True, schema=self.schema) def test_qualify_columns__with_invisible(self): @@ -198,8 +222,12 @@ class TestOptimizer(unittest.TestCase): ) optimizer.qualify_columns.validate_qualify_columns(expression) - def test_lower_identities(self): - self.check_file("lower_identities", optimizer.lower_identities.lower_identities) + def test_normalize_identifiers(self): + self.check_file( + "normalize_identifiers", + optimizer.normalize_identifiers.normalize_identifiers, + set_dialect=True, + ) def test_pushdown_projection(self): self.check_file("pushdown_projections", pushdown_projections, schema=self.schema) @@ -221,24 +249,20 @@ class TestOptimizer(unittest.TestCase): def test_pushdown_predicates(self): self.check_file("pushdown_predicates", optimizer.pushdown_predicates.pushdown_predicates) - def test_expand_laterals(self): + def test_expand_alias_refs(self): # check order of lateral expansion with no schema self.assertEqual( - optimizer.optimize("SELECT a + 1 AS d, d + 1 AS e FROM x " "").sql(), - 'SELECT "x"."a" + 1 AS "d", "x"."a" + 2 AS "e" FROM "x" AS "x"', - ) - - self.check_file( - "expand_laterals", - optimizer.expand_laterals.expand_laterals, - pretty=True, - execute=True, + optimizer.optimize("SELECT a + 1 AS d, d + 1 AS e FROM x WHERE e > 1 GROUP BY e").sql(), + 'SELECT "x"."a" + 1 AS "d", "x"."a" + 2 AS "e" FROM "x" AS "x" WHERE "x"."a" + 2 > 1 GROUP BY "x"."a" + 2', ) - def test_expand_multi_table_selects(self): - self.check_file( - "expand_multi_table_selects", - optimizer.expand_multi_table_selects.expand_multi_table_selects, + self.assertEqual( + optimizer.qualify_columns.qualify_columns( + parse_one("SELECT CAST(x AS INT) AS y FROM z AS z"), + schema={"l": {"c": "int"}}, + infer_schema=False, + ).sql(), + "SELECT CAST(x AS INT) AS y FROM z AS z", ) def test_optimize_joins(self): @@ -280,8 +304,8 @@ class TestOptimizer(unittest.TestCase): optimize = partial( optimizer.optimize, rules=[ - optimizer.qualify_tables.qualify_tables, - optimizer.qualify_columns.qualify_columns, + optimizer.qualify.qualify, + optimizer.qualify_columns.quote_identifiers, annotate_types, optimizer.canonicalize.canonicalize, ], @@ -396,7 +420,7 @@ FROM READ_CSV('tests/fixtures/optimizer/tpc-h/nation.csv.gz', 'delimiter', '|') self.assertEqual(expression.type.this, exp.DataType.Type.TIMESTAMPTZ) self.assertEqual(expression.this.type.this, exp.DataType.Type.VARCHAR) self.assertEqual(expression.args["to"].type.this, exp.DataType.Type.TIMESTAMPTZ) - self.assertEqual(expression.args["to"].expressions[0].type.this, exp.DataType.Type.INT) + self.assertEqual(expression.args["to"].expressions[0].this.type.this, exp.DataType.Type.INT) expression = annotate_types(parse_one("ARRAY(1)::ARRAY<INT>")) self.assertEqual(expression.type, parse_one("ARRAY<INT>", into=exp.DataType)) @@ -450,7 +474,7 @@ FROM READ_CSV('tests/fixtures/optimizer/tpc-h/nation.csv.gz', 'delimiter', '|') expression.expressions[0].type.this, exp.DataType.Type.FLOAT ) # a.cola AS cola - addition_alias = expression.args["from"].expressions[0].this.expressions[0] + addition_alias = expression.args["from"].this.this.expressions[0] self.assertEqual( addition_alias.type.this, exp.DataType.Type.FLOAT ) # x.cola + y.cola AS cola @@ -663,3 +687,30 @@ FROM READ_CSV('tests/fixtures/optimizer/tpc-h/nation.csv.gz', 'delimiter', '|') optimizer.optimize(parse_one("SELECT * FROM a"), schema=schema), parse_one('SELECT "a"."b c" AS "b c", "a"."d e" AS "d e" FROM "a" AS "a"'), ) + + def test_quotes(self): + schema = { + "example": { + '"source"': { + "id": "text", + '"name"': "text", + '"payload"': "text", + } + } + } + + expected = parse_one( + """ + SELECT + "source"."ID" AS "ID", + "source"."name" AS "name", + "source"."payload" AS "payload" + FROM "EXAMPLE"."source" AS "source" + """, + read="snowflake", + ).sql(pretty=True, dialect="snowflake") + + for func in (optimizer.qualify.qualify, optimizer.optimize): + source_query = parse_one('SELECT * FROM example."source"', read="snowflake") + transformed = func(source_query, dialect="snowflake", schema=schema) + self.assertEqual(transformed.sql(pretty=True, dialect="snowflake"), expected) diff --git a/tests/test_parser.py b/tests/test_parser.py index 816471e..84ae0b5 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -15,14 +15,23 @@ class TestParser(unittest.TestCase): self.assertIsInstance(parse_one("left join foo", into=exp.Join), exp.Join) self.assertIsInstance(parse_one("int", into=exp.DataType), exp.DataType) self.assertIsInstance(parse_one("array<int>", into=exp.DataType), exp.DataType) + self.assertIsInstance(parse_one("foo", into=exp.Table), exp.Table) + + with self.assertRaises(ParseError) as ctx: + parse_one("SELECT * FROM tbl", into=exp.Table) + + self.assertEqual( + str(ctx.exception), + "Failed to parse 'SELECT * FROM tbl' into <class 'sqlglot.expressions.Table'>", + ) def test_parse_into_error(self): - expected_message = "Failed to parse into [<class 'sqlglot.expressions.From'>]" + expected_message = "Failed to parse 'SELECT 1;' into [<class 'sqlglot.expressions.From'>]" expected_errors = [ { "description": "Invalid expression / Unexpected token", "line": 1, - "col": 7, + "col": 6, "start_context": "", "highlight": "SELECT", "end_context": " 1;", @@ -30,17 +39,18 @@ class TestParser(unittest.TestCase): } ] with self.assertRaises(ParseError) as ctx: - parse_one("SELECT 1;", "sqlite", [exp.From]) + parse_one("SELECT 1;", read="sqlite", into=[exp.From]) + self.assertEqual(str(ctx.exception), expected_message) self.assertEqual(ctx.exception.errors, expected_errors) def test_parse_into_errors(self): - expected_message = "Failed to parse into [<class 'sqlglot.expressions.From'>, <class 'sqlglot.expressions.Join'>]" + expected_message = "Failed to parse 'SELECT 1;' into [<class 'sqlglot.expressions.From'>, <class 'sqlglot.expressions.Join'>]" expected_errors = [ { "description": "Invalid expression / Unexpected token", "line": 1, - "col": 7, + "col": 6, "start_context": "", "highlight": "SELECT", "end_context": " 1;", @@ -49,7 +59,7 @@ class TestParser(unittest.TestCase): { "description": "Invalid expression / Unexpected token", "line": 1, - "col": 7, + "col": 6, "start_context": "", "highlight": "SELECT", "end_context": " 1;", @@ -58,6 +68,7 @@ class TestParser(unittest.TestCase): ] with self.assertRaises(ParseError) as ctx: parse_one("SELECT 1;", "sqlite", [exp.From, exp.Join]) + self.assertEqual(str(ctx.exception), expected_message) self.assertEqual(ctx.exception.errors, expected_errors) @@ -75,7 +86,7 @@ class TestParser(unittest.TestCase): def test_table(self): tables = [t.sql() for t in parse_one("select * from a, b.c, .d").find_all(exp.Table)] - self.assertEqual(tables, ["a", "b.c", "d"]) + self.assertEqual(set(tables), {"a", "b.c", "d"}) def test_union_order(self): self.assertIsInstance(parse_one("SELECT * FROM (SELECT 1) UNION SELECT 2"), exp.Union) @@ -92,7 +103,7 @@ class TestParser(unittest.TestCase): self.assertEqual(len(parse_one("select * from (select 1) x cross join y").args["joins"]), 1) self.assertEqual( parse_one("""SELECT * FROM x CROSS JOIN y, z LATERAL VIEW EXPLODE(y)""").sql(), - """SELECT * FROM x, z CROSS JOIN y LATERAL VIEW EXPLODE(y)""", + """SELECT * FROM x CROSS JOIN y, z LATERAL VIEW EXPLODE(y)""", ) self.assertIsNone( parse_one("create table a as (select b from c) index").find(exp.TableAlias) @@ -156,8 +167,8 @@ class TestParser(unittest.TestCase): assert expression.expressions[2].alias == "c" assert expression.expressions[3].alias == "D" assert expression.expressions[4].alias == "y|z'" - table = expression.args["from"].expressions[0] - assert table.this.name == "z" + table = expression.args["from"].this + assert table.name == "z" assert table.args["db"].name == "y" def test_multi(self): @@ -168,8 +179,8 @@ class TestParser(unittest.TestCase): ) assert len(expressions) == 2 - assert expressions[0].args["from"].expressions[0].this.name == "a" - assert expressions[1].args["from"].expressions[0].this.name == "b" + assert expressions[0].args["from"].name == "a" + assert expressions[1].args["from"].name == "b" expressions = parse("SELECT 1; ; SELECT 2") @@ -202,6 +213,15 @@ class TestParser(unittest.TestCase): with self.assertRaises(ParseError): parse_one("WITH cte AS (SELECT * FROM x)") + self.assertEqual( + parse_one( + "CREATE TABLE t (i UInt8) ENGINE = AggregatingMergeTree() ORDER BY tuple()", + read="clickhouse", + error_level=ErrorLevel.RAISE, + ).sql(dialect="clickhouse"), + "CREATE TABLE t (i UInt8) ENGINE=AggregatingMergeTree() ORDER BY tuple()", + ) + def test_space(self): self.assertEqual( parse_one("SELECT ROW() OVER(PARTITION BY x) FROM x GROUP BY y").sql(), @@ -292,6 +312,7 @@ class TestParser(unittest.TestCase): self.assertIsInstance(parse_one("TIMESTAMP('2022-01-01')"), exp.Func) self.assertIsInstance(parse_one("TIMESTAMP()"), exp.Func) self.assertIsInstance(parse_one("map.x"), exp.Column) + self.assertIsInstance(parse_one("CAST(x AS CHAR(5))").to.expressions[0], exp.DataTypeSize) def test_set_expression(self): set_ = parse_one("SET") @@ -415,39 +436,56 @@ class TestParser(unittest.TestCase): ) PIVOT (AVG(price), MAX(quality) FOR partname IN ('prop' AS prop1, 'rudder')) """ - multiple_aggregates_not_aliased_with_quoted_identifier = """ + multiple_aggregates_not_aliased_with_quoted_identifier_spark = """ SELECT * FROM ( SELECT partname, price, quality FROM part ) PIVOT (AVG(`PrIcE`), MAX(quality) FOR partname IN ('prop' AS prop1, 'rudder')) """ + multiple_aggregates_not_aliased_with_quoted_identifier_duckdb = """ + SELECT * FROM ( + SELECT partname, price, quality FROM part + ) PIVOT (AVG("PrIcE"), MAX(quality) FOR partname IN ('prop' AS prop1, 'rudder')) + """ + query_to_column_names = { nothing_aliased: { "bigquery": ["prop", "rudder"], + "duckdb": ["prop", "rudder"], "redshift": ["prop", "rudder"], - "snowflake": ['"prop"', '"rudder"'], + "snowflake": ['''"'prop'"''', '''"'rudder'"'''], "spark": ["prop", "rudder"], }, everything_aliased: { "bigquery": ["avg_price_prop1", "avg_price_rudder1"], + "duckdb": ["prop1_avg_price", "rudder1_avg_price"], "redshift": ["prop1_avg_price", "rudder1_avg_price"], "spark": ["prop1", "rudder1"], }, only_pivot_columns_aliased: { "bigquery": ["prop1", "rudder1"], + "duckdb": ["prop1", "rudder1"], "redshift": ["prop1", "rudder1"], "spark": ["prop1", "rudder1"], }, columns_partially_aliased: { "bigquery": ["prop1", "rudder"], + "duckdb": ["prop1", "rudder"], "redshift": ["prop1", "rudder"], "spark": ["prop1", "rudder"], }, multiple_aggregates_aliased: { "bigquery": ["p_prop1", "q_prop1", "p_rudder", "q_rudder"], + "duckdb": ["prop1_p", "prop1_q", "rudder_p", "rudder_q"], "spark": ["prop1_p", "prop1_q", "rudder_p", "rudder_q"], }, multiple_aggregates_not_aliased: { + "duckdb": [ + '"prop1_avg(price)"', + '"prop1_max(quality)"', + '"rudder_avg(price)"', + '"rudder_max(quality)"', + ], "spark": [ "`prop1_avg(price)`", "`prop1_max(quality)`", @@ -455,7 +493,7 @@ class TestParser(unittest.TestCase): "`rudder_max(quality)`", ], }, - multiple_aggregates_not_aliased_with_quoted_identifier: { + multiple_aggregates_not_aliased_with_quoted_identifier_spark: { "spark": [ "`prop1_avg(PrIcE)`", "`prop1_max(quality)`", @@ -463,10 +501,23 @@ class TestParser(unittest.TestCase): "`rudder_max(quality)`", ], }, + multiple_aggregates_not_aliased_with_quoted_identifier_duckdb: { + "duckdb": [ + '"prop1_avg(PrIcE)"', + '"prop1_max(quality)"', + '"rudder_avg(PrIcE)"', + '"rudder_max(quality)"', + ], + }, } for query, dialect_columns in query_to_column_names.items(): for dialect, expected_columns in dialect_columns.items(): expr = parse_one(query, read=dialect) - columns = expr.args["from"].expressions[0].args["pivots"][0].args["columns"] + columns = expr.args["from"].this.args["pivots"][0].args["columns"] self.assertEqual(expected_columns, [col.sql(dialect=dialect) for col in columns]) + + def test_parse_properties(self): + self.assertEqual( + parse_one("create materialized table x").sql(), "CREATE MATERIALIZED TABLE x" + ) diff --git a/tests/test_schema.py b/tests/test_schema.py index 92cf04a..b03e7e7 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -210,14 +210,22 @@ class TestSchema(unittest.TestCase): self.assertEqual(schema.column_names(table_z), ["a", "B"]) self.assertEqual(schema.column_names(table_w), ["c"]) - # Clickhouse supports both `` and "" for identifier quotes; sqlglot uses "" when generating sql schema = MappingSchema(schema={"x": {"`y`": "INT"}}, dialect="clickhouse") self.assertEqual(schema.column_names(exp.Table(this="x")), ["y"]) - # Check that add_table normalizes both the table and the column names to be added/updated + # Check that add_table normalizes both the table and the column names to be added / updated schema = MappingSchema() schema.add_table("Foo", {"SomeColumn": "INT", '"SomeColumn"': "DOUBLE"}) + self.assertEqual(schema.column_names(exp.Table(this="fOO")), ["somecolumn", "SomeColumn"]) - table_foo = exp.Table(this="fOO") + # Check that names are normalized to uppercase for Snowflake + schema = MappingSchema(schema={"x": {"foo": "int", '"bLa"': "int"}}, dialect="snowflake") + self.assertEqual(schema.column_names(exp.Table(this="x")), ["FOO", "bLa"]) - self.assertEqual(schema.column_names(table_foo), ["somecolumn", "SomeColumn"]) + # Check that switching off the normalization logic works as expected + schema = MappingSchema(schema={"x": {"foo": "int"}}, normalize=False, dialect="snowflake") + self.assertEqual(schema.column_names(exp.Table(this="x")), ["foo"]) + + # Check that the correct dialect is used when calling schema methods + schema = MappingSchema(schema={"[Fo]": {"x": "int"}}, dialect="tsql") + self.assertEqual(schema.column_names("[Fo]"), schema.column_names("`Fo`", dialect="spark")) diff --git a/tests/test_tokens.py b/tests/test_tokens.py index f70d70e..30af34f 100644 --- a/tests/test_tokens.py +++ b/tests/test_tokens.py @@ -20,7 +20,7 @@ class TestTokens(unittest.TestCase): for sql, comment in sql_comment: self.assertEqual(tokenizer.tokenize(sql)[0].comments, comment) - def test_token_line(self): + def test_token_line_col(self): tokens = Tokenizer().tokenize( """SELECT /* line break @@ -30,10 +30,23 @@ line break x""" ) + self.assertEqual(tokens[0].line, 1) + self.assertEqual(tokens[0].col, 6) self.assertEqual(tokens[1].line, 5) self.assertEqual(tokens[1].col, 3) - self.assertEqual(tokens[-1].line, 6) - self.assertEqual(tokens[-1].col, 1) + self.assertEqual(tokens[2].line, 5) + self.assertEqual(tokens[2].col, 4) + self.assertEqual(tokens[3].line, 6) + self.assertEqual(tokens[3].col, 1) + + tokens = Tokenizer().tokenize("SELECT .") + + self.assertEqual(tokens[1].line, 1) + self.assertEqual(tokens[1].col, 8) + + self.assertEqual(Tokenizer().tokenize("'''abc'")[0].start, 0) + self.assertEqual(Tokenizer().tokenize("'''abc'")[0].end, 6) + self.assertEqual(Tokenizer().tokenize("'abc'")[0].start, 0) def test_command(self): tokens = Tokenizer().tokenize("SHOW;") @@ -51,7 +64,7 @@ x""" self.assertEqual(tokens[3].token_type, TokenType.SEMICOLON) def test_error_msg(self): - with self.assertRaisesRegex(ValueError, "Error tokenizing 'select.*"): + with self.assertRaisesRegex(ValueError, "Error tokenizing 'select /'"): Tokenizer().tokenize("select /*") def test_jinja(self): diff --git a/tests/test_transpile.py b/tests/test_transpile.py index ad8ec72..1085b09 100644 --- a/tests/test_transpile.py +++ b/tests/test_transpile.py @@ -224,7 +224,10 @@ FROM bar /* comment 5 */, tbl /* comment 6 */""", ) self.validate( """ - select a from b + select a + -- from + from b + -- where where foo -- comment 1 and bar @@ -233,7 +236,9 @@ FROM bar /* comment 5 */, tbl /* comment 6 */""", """, """SELECT a +/* from */ FROM b +/* where */ WHERE foo /* comment 1 */ AND bar AND bla /* comment 2 */""", pretty=True, @@ -550,14 +555,14 @@ FROM v""", def test_error_level(self, logger): invalid = "x + 1. (" expected_messages = [ - "Required keyword: 'expressions' missing for <class 'sqlglot.expressions.Aliases'>. Line 1, Col: 9.\n x + 1. \033[4m(\033[0m", - "Expecting ). Line 1, Col: 9.\n x + 1. \033[4m(\033[0m", + "Required keyword: 'expressions' missing for <class 'sqlglot.expressions.Aliases'>. Line 1, Col: 8.\n x + 1. \033[4m(\033[0m", + "Expecting ). Line 1, Col: 8.\n x + 1. \033[4m(\033[0m", ] expected_errors = [ { "description": "Required keyword: 'expressions' missing for <class 'sqlglot.expressions.Aliases'>", "line": 1, - "col": 9, + "col": 8, "start_context": "x + 1. ", "highlight": "(", "end_context": "", @@ -566,7 +571,7 @@ FROM v""", { "description": "Expecting )", "line": 1, - "col": 9, + "col": 8, "start_context": "x + 1. ", "highlight": "(", "end_context": "", @@ -580,26 +585,28 @@ FROM v""", with self.assertRaises(ParseError) as ctx: transpile(invalid, error_level=ErrorLevel.IMMEDIATE) + self.assertEqual(str(ctx.exception), expected_messages[0]) self.assertEqual(ctx.exception.errors[0], expected_errors[0]) with self.assertRaises(ParseError) as ctx: transpile(invalid, error_level=ErrorLevel.RAISE) + self.assertEqual(str(ctx.exception), "\n\n".join(expected_messages)) self.assertEqual(ctx.exception.errors, expected_errors) more_than_max_errors = "((((" expected_messages = ( - "Required keyword: 'this' missing for <class 'sqlglot.expressions.Paren'>. Line 1, Col: 5.\n (((\033[4m(\033[0m\n\n" - "Expecting ). Line 1, Col: 5.\n (((\033[4m(\033[0m\n\n" - "Expecting ). Line 1, Col: 5.\n (((\033[4m(\033[0m\n\n" + "Required keyword: 'this' missing for <class 'sqlglot.expressions.Paren'>. Line 1, Col: 4.\n (((\033[4m(\033[0m\n\n" + "Expecting ). Line 1, Col: 4.\n (((\033[4m(\033[0m\n\n" + "Expecting ). Line 1, Col: 4.\n (((\033[4m(\033[0m\n\n" "... and 2 more" ) expected_errors = [ { "description": "Required keyword: 'this' missing for <class 'sqlglot.expressions.Paren'>", "line": 1, - "col": 5, + "col": 4, "start_context": "(((", "highlight": "(", "end_context": "", @@ -608,7 +615,7 @@ FROM v""", { "description": "Expecting )", "line": 1, - "col": 5, + "col": 4, "start_context": "(((", "highlight": "(", "end_context": "", @@ -620,6 +627,7 @@ FROM v""", with self.assertRaises(ParseError) as ctx: transpile(more_than_max_errors, error_level=ErrorLevel.RAISE) + self.assertEqual(str(ctx.exception), expected_messages) self.assertEqual(ctx.exception.errors, expected_errors) |