From f10d022e11dcd1015db1a74ce9f4198ebdcb7f40 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 16 Oct 2023 13:37:39 +0200 Subject: Merging upstream version 18.13.0. Signed-off-by: Daniel Baumann --- tests/dialects/test_duckdb.py | 8 +++ tests/dialects/test_mysql.py | 3 + tests/dialects/test_oracle.py | 15 +++- tests/dialects/test_redshift.py | 12 ++++ tests/dialects/test_spark.py | 11 +++ tests/dialects/test_teradata.py | 8 +++ tests/dialects/test_tsql.py | 6 +- tests/fixtures/optimizer/optimizer.sql | 29 ++++++++ tests/fixtures/optimizer/pushdown_projections.sql | 9 +++ tests/fixtures/optimizer/simplify.sql | 86 ++++++++++++++++++++++- tests/fixtures/optimizer/tpc-ds/tpc-ds.sql | 39 ++++++---- tests/test_executor.py | 6 ++ tests/test_optimizer.py | 11 ++- tests/test_schema.py | 5 ++ 14 files changed, 228 insertions(+), 20 deletions(-) (limited to 'tests') diff --git a/tests/dialects/test_duckdb.py b/tests/dialects/test_duckdb.py index 240f6f9..54553b3 100644 --- a/tests/dialects/test_duckdb.py +++ b/tests/dialects/test_duckdb.py @@ -790,3 +790,11 @@ class TestDuckDB(Validator): "duckdb": "ALTER TABLE db.t1 RENAME TO t2", }, ) + + def test_timestamps_with_units(self): + self.validate_all( + "SELECT w::TIMESTAMP_S, x::TIMESTAMP_MS, y::TIMESTAMP_US, z::TIMESTAMP_NS", + write={ + "duckdb": "SELECT CAST(w AS TIMESTAMP_S), CAST(x AS TIMESTAMP_MS), CAST(y AS TIMESTAMP), CAST(z AS TIMESTAMP_NS)", + }, + ) diff --git a/tests/dialects/test_mysql.py b/tests/dialects/test_mysql.py index 14a864b..b9d1d26 100644 --- a/tests/dialects/test_mysql.py +++ b/tests/dialects/test_mysql.py @@ -65,6 +65,9 @@ class TestMySQL(Validator): self.validate_identity( "INSERT INTO x VALUES (1, 'a', 2.0) ON DUPLICATE KEY UPDATE x.id = 1" ) + self.validate_identity( + "CREATE OR REPLACE VIEW my_view AS SELECT column1 AS `boo`, column2 AS `foo` FROM my_table WHERE column3 = 'some_value' UNION SELECT q.* FROM fruits_table, JSON_TABLE(Fruits, '$[*]' COLUMNS(id VARCHAR(255) PATH '$.$id', value VARCHAR(255) PATH '$.value')) AS q", + ) self.validate_all( "CREATE TABLE z (a INT) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x'", diff --git a/tests/dialects/test_oracle.py b/tests/dialects/test_oracle.py index 5572ec1..d92eea5 100644 --- a/tests/dialects/test_oracle.py +++ b/tests/dialects/test_oracle.py @@ -234,21 +234,30 @@ MATCH_RECOGNIZE ( def test_json_table(self): self.validate_identity( - "SELECT * FROM JSON_TABLE(foo FORMAT JSON, 'bla' ERROR ON ERROR NULL ON EMPTY COLUMNS (foo PATH 'bar'))" + "SELECT * FROM JSON_TABLE(foo FORMAT JSON, 'bla' ERROR ON ERROR NULL ON EMPTY COLUMNS(foo PATH 'bar'))" ) self.validate_identity( "SELECT * FROM JSON_TABLE(foo FORMAT JSON, 'bla' ERROR ON ERROR NULL ON EMPTY COLUMNS foo PATH 'bar')", - "SELECT * FROM JSON_TABLE(foo FORMAT JSON, 'bla' ERROR ON ERROR NULL ON EMPTY COLUMNS (foo PATH 'bar'))", + "SELECT * FROM JSON_TABLE(foo FORMAT JSON, 'bla' ERROR ON ERROR NULL ON EMPTY COLUMNS(foo PATH 'bar'))", ) self.validate_identity( """SELECT CASE WHEN DBMS_LOB.GETLENGTH(info) < 32000 THEN DBMS_LOB.SUBSTR(info) END AS info_txt, info AS info_clob FROM schemaname.tablename ar -INNER JOIN JSON_TABLE(:emps, '$[*]' COLUMNS (empno NUMBER PATH '$')) jt +INNER JOIN JSON_TABLE(:emps, '$[*]' COLUMNS(empno NUMBER PATH '$')) jt ON ar.empno = jt.empno""", pretty=True, ) + self.validate_identity( + """SELECT + * +FROM JSON_TABLE(res, '$.info[*]' COLUMNS( + tempid NUMBER PATH '$.tempid', + NESTED PATH '$.calid[*]' COLUMNS(last_dt PATH '$.last_dt ') +)) src""", + pretty=True, + ) def test_connect_by(self): start = "START WITH last_name = 'King'" diff --git a/tests/dialects/test_redshift.py b/tests/dialects/test_redshift.py index 9f2761f..f182feb 100644 --- a/tests/dialects/test_redshift.py +++ b/tests/dialects/test_redshift.py @@ -6,6 +6,18 @@ class TestRedshift(Validator): dialect = "redshift" def test_redshift(self): + self.validate_all( + "SELECT APPROXIMATE COUNT(DISTINCT y)", + read={ + "spark": "SELECT APPROX_COUNT_DISTINCT(y)", + }, + write={ + "redshift": "SELECT APPROXIMATE COUNT(DISTINCT y)", + "spark": "SELECT APPROX_COUNT_DISTINCT(y)", + }, + ) + self.validate_identity("SELECT APPROXIMATE AS y") + self.validate_identity( "SELECT 'a''b'", "SELECT 'a\\'b'", diff --git a/tests/dialects/test_spark.py b/tests/dialects/test_spark.py index 0148e55..9bb9d79 100644 --- a/tests/dialects/test_spark.py +++ b/tests/dialects/test_spark.py @@ -361,7 +361,18 @@ TBLPROPERTIES ( "SELECT CAST(123456 AS VARCHAR(3))", write={ "": "SELECT TRY_CAST(123456 AS TEXT)", + "databricks": "SELECT TRY_CAST(123456 AS STRING)", "spark": "SELECT CAST(123456 AS STRING)", + "spark2": "SELECT CAST(123456 AS STRING)", + }, + ) + self.validate_all( + "SELECT TRY_CAST('a' AS INT)", + write={ + "": "SELECT TRY_CAST('a' AS INT)", + "databricks": "SELECT TRY_CAST('a' AS INT)", + "spark": "SELECT TRY_CAST('a' AS INT)", + "spark2": "SELECT CAST('a' AS INT)", }, ) self.validate_all( diff --git a/tests/dialects/test_teradata.py b/tests/dialects/test_teradata.py index f3615ff..9dbac8c 100644 --- a/tests/dialects/test_teradata.py +++ b/tests/dialects/test_teradata.py @@ -48,6 +48,14 @@ class TestTeradata(Validator): self.validate_identity("HELP STATISTICS personnel.employee FROM my_qcd") def test_create(self): + self.validate_identity( + "REPLACE VIEW view_b (COL1, COL2) AS LOCKING ROW FOR ACCESS SELECT COL1, COL2 FROM table_b", + "CREATE OR REPLACE VIEW view_b (COL1, COL2) AS LOCKING ROW FOR ACCESS SELECT COL1, COL2 FROM table_b", + ) + self.validate_identity( + "REPLACE VIEW view_b (COL1, COL2) AS LOCKING ROW FOR ACCESS SELECT COL1, COL2 FROM table_b", + "CREATE OR REPLACE VIEW view_b (COL1, COL2) AS LOCKING ROW FOR ACCESS SELECT COL1, COL2 FROM table_b", + ) self.validate_identity("CREATE TABLE x (y INT) PRIMARY INDEX (y) PARTITION BY y INDEX (y)") self.validate_identity("CREATE TABLE x (y INT) PARTITION BY y INDEX (y)") self.validate_identity( diff --git a/tests/dialects/test_tsql.py b/tests/dialects/test_tsql.py index fbd913d..f9a720a 100644 --- a/tests/dialects/test_tsql.py +++ b/tests/dialects/test_tsql.py @@ -970,19 +970,19 @@ WHERE self.validate_all( "TRY_CONVERT(NVARCHAR, x, 121)", write={ - "spark": "CAST(DATE_FORMAT(x, 'yyyy-MM-dd HH:mm:ss.SSSSSS') AS VARCHAR(30))", + "spark": "TRY_CAST(DATE_FORMAT(x, 'yyyy-MM-dd HH:mm:ss.SSSSSS') AS VARCHAR(30))", }, ) self.validate_all( "TRY_CONVERT(INT, x)", write={ - "spark": "CAST(x AS INT)", + "spark": "TRY_CAST(x AS INT)", }, ) self.validate_all( "TRY_CAST(x AS INT)", write={ - "spark": "CAST(x AS INT)", + "spark": "TRY_CAST(x AS INT)", }, ) self.validate_all( diff --git a/tests/fixtures/optimizer/optimizer.sql b/tests/fixtures/optimizer/optimizer.sql index 4cc62c9..a812bc5 100644 --- a/tests/fixtures/optimizer/optimizer.sql +++ b/tests/fixtures/optimizer/optimizer.sql @@ -15,6 +15,35 @@ SELECT "q"."x" AS "x" FROM UNNEST(ARRAY(1, 2)) AS "q"("x", "y"); +# title: explode_outer +# dialect: spark +# execute: false +CREATE OR REPLACE TEMPORARY VIEW latest_boo AS +SELECT + TRIM(split(points, ':')[0]) as points_type, + TRIM(split(points, ':')[1]) as points_value +FROM ( + SELECT + explode_outer(split(object_pointsText, ',')) as points + FROM ( + SELECT + object_pointstext, + FROM boo + ) + WHERE object_pointstext IS NOT NULL + ); +CREATE OR REPLACE TEMPORARY VIEW `latest_boo` AS +SELECT + TRIM(SPLIT(`_q_1`.`points`, ':')[0]) AS `points_type`, + TRIM(SPLIT(`_q_1`.`points`, ':')[1]) AS `points_value` +FROM ( + SELECT + EXPLODE_OUTER(SPLIT(`boo`.`object_pointstext`, ',')) AS `points` + FROM `boo` AS `boo` + WHERE + NOT `boo`.`object_pointstext` IS NULL +) AS `_q_1`; + # title: Union in CTE WITH cte AS ( ( diff --git a/tests/fixtures/optimizer/pushdown_projections.sql b/tests/fixtures/optimizer/pushdown_projections.sql index 70fd9b0..8497c5b 100644 --- a/tests/fixtures/optimizer/pushdown_projections.sql +++ b/tests/fixtures/optimizer/pushdown_projections.sql @@ -70,6 +70,15 @@ WITH cte AS (SELECT 1 AS x, 3 AS z) SELECT cte.a AS a, cte.z AS z FROM cte AS ct WITH cte(x, y, z) AS (SELECT 1, 2, 3) SELECT a, z FROM (SELECT * FROM cte AS cte(b)) AS cte(a); WITH cte AS (SELECT 1 AS x, 3 AS z) SELECT cte.a AS a, cte.z AS z FROM (SELECT cte.b AS a, cte.z AS z FROM cte AS cte(b)) AS cte; +WITH y AS (SELECT a FROM x) SELECT 1 FROM y; +WITH y AS (SELECT 1 AS _ FROM x AS x) SELECT 1 AS "1" FROM y; + +WITH y AS (SELECT SUM(a) FROM x) SELECT 1 FROM y; +WITH y AS (SELECT MAX(1) AS _ FROM x AS x) SELECT 1 AS "1" FROM y; + +WITH y AS (SELECT a FROM x GROUP BY a) SELECT 1 FROM y; +WITH y AS (SELECT 1 AS _ FROM x AS x GROUP BY x.a) SELECT 1 AS "1" FROM y; + -------------------------------------- -- Unknown Star Expansion -------------------------------------- diff --git a/tests/fixtures/optimizer/simplify.sql b/tests/fixtures/optimizer/simplify.sql index a9ae192..e54170c 100644 --- a/tests/fixtures/optimizer/simplify.sql +++ b/tests/fixtures/optimizer/simplify.sql @@ -625,7 +625,7 @@ t0.x = t1.x AND t0.y < t1.y AND t0.y <= t1.y; t0.x = t1.x AND t0.y < t1.y AND t0.y <= t1.y; -------------------------------------- --- Coalesce +-- COALESCE -------------------------------------- COALESCE(x); x; @@ -669,18 +669,45 @@ a AND b AND (ROW() OVER () = 1 OR ROW() OVER () IS NULL); CONCAT(x, y); CONCAT(x, y); +CONCAT_WS(sep, x, y); +CONCAT_WS(sep, x, y); + CONCAT(x); x; CONCAT('a', 'b', 'c'); 'abc'; +CONCAT('a', NULL); +CONCAT('a', NULL); + +CONCAT_WS('-', 'a', 'b', 'c'); +'a-b-c'; + CONCAT('a', x, y, 'b', 'c'); CONCAT('a', x, y, 'bc'); +CONCAT_WS('-', 'a', x, y, 'b', 'c'); +CONCAT_WS('-', 'a', x, y, 'b-c'); + 'a' || 'b'; 'ab'; +CONCAT_WS('-', 'a'); +'a'; + +CONCAT_WS('-', x, y); +CONCAT_WS('-', x, y); + +CONCAT_WS('', x, y); +CONCAT_WS('', x, y); + +CONCAT_WS('-', x); +CONCAT_WS('-', x); + +CONCAT_WS(sep, 'a', 'b'); +CONCAT_WS(sep, 'a', 'b'); + 'a' || 'b' || x; CONCAT('ab', x); @@ -837,3 +864,60 @@ x < CAST('2020-01-07' AS DATE); x - INTERVAL '1' day = CAST(y AS DATE); x - INTERVAL '1' day = CAST(y AS DATE); + +-------------------------------------- +-- Constant Propagation +-------------------------------------- +x = 5 AND y = x; +x = 5 AND y = 5; + +5 = x AND y = x; +y = 5 AND 5 = x; + +x = 5 OR y = x; +x = 5 OR y = x; + +(x = 5 AND y = x) OR y = 1; +(x = 5 AND y = 5) OR y = 1; + +t.x = 5 AND y = x; +t.x = 5 AND y = x; + +t.x = 'a' AND y = CONCAT_WS('-', t.x, 'b'); +t.x = 'a' AND y = 'a-b'; + +x = 5 AND y = x AND y + 1 < 5; +FALSE; + +x = 5 AND x = 6; +FALSE; + +x = 5 AND (y = x OR z = 1); +x = 5 AND (y = x OR z = 1); + +x = 5 AND x + 3 = 8; +x = 5; + +x = 5 AND (SELECT x FROM t WHERE y = 1); +x = 5 AND (SELECT x FROM t WHERE y = 1); + +x = 1 AND y > 0 AND (SELECT z = 5 FROM t WHERE y = 1); +x = 1 AND y > 0 AND (SELECT z = 5 FROM t WHERE y = 1); + +x = 1 AND x = y AND (SELECT z FROM t WHERE a AND (b OR c)); +x = 1 AND (SELECT z FROM t WHERE a AND (b OR c)) AND 1 = y; + +t1.a = 39 AND t2.b = t1.a AND t3.c = t2.b; +t1.a = 39 AND t2.b = 39 AND t3.c = 39; + +x = 1 AND CASE WHEN x = 5 THEN FALSE ELSE TRUE END; +x = 1 AND CASE WHEN FALSE THEN FALSE ELSE TRUE END; + +x = 1 AND IF(x = 5, FALSE, TRUE); +x = 1 AND CASE WHEN FALSE THEN FALSE ELSE TRUE END; + +x = y AND CASE WHEN x = 5 THEN FALSE ELSE TRUE END; +x = y AND CASE WHEN x = 5 THEN FALSE ELSE TRUE END; + +x = 1 AND CASE WHEN y = 5 THEN x = z END; +x = 1 AND CASE WHEN y = 5 THEN 1 = z END; diff --git a/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql b/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql index 2218182..91b553e 100644 --- a/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql +++ b/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql @@ -2029,18 +2029,33 @@ JOIN "date_dim" AS "date_dim" ON "date_dim"."d_year" = 2001 AND "store_sales"."ss_sold_date_sk" = "date_dim"."d_date_sk" JOIN "household_demographics" AS "household_demographics" - ON "customer_demographics"."cd_demo_sk" = "store_sales"."ss_cdemo_sk" - AND "customer_demographics"."cd_education_status" = 'Advanced Degree' - AND "customer_demographics"."cd_education_status" = 'Primary' - AND "customer_demographics"."cd_education_status" = 'Secondary' - AND "customer_demographics"."cd_marital_status" = 'D' - AND "customer_demographics"."cd_marital_status" = 'M' - AND "customer_demographics"."cd_marital_status" = 'U' - AND "household_demographics"."hd_dep_count" = 1 - AND "household_demographics"."hd_dep_count" = 3 - AND "store_sales"."ss_hdemo_sk" = "household_demographics"."hd_demo_sk" - AND "store_sales"."ss_sales_price" <= 100.00 - AND "store_sales"."ss_sales_price" >= 150.00 + ON ( + "customer_demographics"."cd_demo_sk" = "store_sales"."ss_cdemo_sk" + AND "customer_demographics"."cd_education_status" = 'Advanced Degree' + AND "customer_demographics"."cd_marital_status" = 'U' + AND "household_demographics"."hd_dep_count" = 3 + AND "store_sales"."ss_hdemo_sk" = "household_demographics"."hd_demo_sk" + AND "store_sales"."ss_sales_price" <= 150.00 + AND "store_sales"."ss_sales_price" >= 100.00 + ) + OR ( + "customer_demographics"."cd_demo_sk" = "store_sales"."ss_cdemo_sk" + AND "customer_demographics"."cd_education_status" = 'Primary' + AND "customer_demographics"."cd_marital_status" = 'M' + AND "household_demographics"."hd_dep_count" = 1 + AND "store_sales"."ss_hdemo_sk" = "household_demographics"."hd_demo_sk" + AND "store_sales"."ss_sales_price" <= 100.00 + AND "store_sales"."ss_sales_price" >= 50.00 + ) + OR ( + "customer_demographics"."cd_demo_sk" = "store_sales"."ss_cdemo_sk" + AND "customer_demographics"."cd_education_status" = 'Secondary' + AND "customer_demographics"."cd_marital_status" = 'D' + AND "household_demographics"."hd_dep_count" = 1 + AND "store_sales"."ss_hdemo_sk" = "household_demographics"."hd_demo_sk" + AND "store_sales"."ss_sales_price" <= 200.00 + AND "store_sales"."ss_sales_price" >= 150.00 + ) JOIN "store" AS "store" ON "store"."s_store_sk" = "store_sales"."ss_store_sk"; diff --git a/tests/test_executor.py b/tests/test_executor.py index c6b85c9..721550e 100644 --- a/tests/test_executor.py +++ b/tests/test_executor.py @@ -4,6 +4,7 @@ from datetime import date from multiprocessing import Pool import duckdb +import numpy as np import pandas as pd from pandas.testing import assert_frame_equal @@ -94,6 +95,11 @@ class TestExecutor(unittest.TestCase): sql, _ = self.sqls[i] a = self.cached_execute(sql) b = pd.DataFrame(table.rows, columns=table.columns) + + # The executor represents NULL values as None, whereas DuckDB represents them as NaN, + # and so the following is done to silence Pandas' "Mismatched null-like values" warnings + b = b.fillna(value=np.nan) + assert_frame_equal(a, b, check_dtype=False, check_index_type=False) def test_execute_callable(self): diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py index 8fc3273..c43a84e 100644 --- a/tests/test_optimizer.py +++ b/tests/test_optimizer.py @@ -45,6 +45,10 @@ def normalize(expression, **kwargs): return optimizer.simplify.simplify(expression) +def simplify(expression, **kwargs): + return optimizer.simplify.simplify(expression, constant_propagation=True, **kwargs) + + class TestOptimizer(unittest.TestCase): maxDiff = None @@ -271,7 +275,7 @@ class TestOptimizer(unittest.TestCase): self.check_file("pushdown_projections", pushdown_projections, schema=self.schema) def test_simplify(self): - self.check_file("simplify", optimizer.simplify.simplify) + self.check_file("simplify", simplify) expression = parse_one("TRUE AND TRUE AND TRUE") self.assertEqual(exp.true(), optimizer.simplify.simplify(expression)) @@ -823,6 +827,11 @@ FROM READ_CSV('tests/fixtures/optimizer/tpc-h/nation.csv.gz', 'delimiter', '|') self.assertEqual(exp.DataType.Type.ARRAY, expression.selects[0].type.this) self.assertEqual(expression.selects[0].type.sql(), "ARRAY") + schema = MappingSchema({"t": {"c": "STRUCT<`f` STRING>"}}, dialect="bigquery") + expression = annotate_types(parse_one("SELECT t.c FROM t"), schema=schema) + + self.assertEqual(expression.selects[0].type.sql(dialect="bigquery"), "STRUCT<`f` STRING>") + def test_type_annotation_cache(self): sql = "SELECT 1 + 1" expression = annotate_types(parse_one(sql)) diff --git a/tests/test_schema.py b/tests/test_schema.py index 626fa11..34c507d 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -272,3 +272,8 @@ class TestSchema(unittest.TestCase): str(ctx.exception), "Table z must match the schema's nesting level: 2.", ) + + def test_has_column(self): + schema = MappingSchema({"x": {"c": "int"}}) + self.assertTrue(schema.has_column("x", exp.column("c"))) + self.assertFalse(schema.has_column("x", exp.column("k"))) -- cgit v1.2.3