From d1db1c46fc67b1ff310a4067326e03a0063c8750 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 26 Aug 2024 10:12:56 +0200 Subject: Merging upstream version 25.16.1. Signed-off-by: Daniel Baumann --- tests/dialects/test_dialect.py | 138 ++++++++++++++++++++++++++++++++++------- 1 file changed, 117 insertions(+), 21 deletions(-) (limited to 'tests/dialects/test_dialect.py') diff --git a/tests/dialects/test_dialect.py b/tests/dialects/test_dialect.py index 522c42c..8f3c183 100644 --- a/tests/dialects/test_dialect.py +++ b/tests/dialects/test_dialect.py @@ -160,7 +160,7 @@ class TestDialect(Validator): "CAST(a AS TEXT)", write={ "bigquery": "CAST(a AS STRING)", - "clickhouse": "CAST(a AS String)", + "clickhouse": "CAST(a AS Nullable(String))", "drill": "CAST(a AS VARCHAR)", "duckdb": "CAST(a AS TEXT)", "materialize": "CAST(a AS TEXT)", @@ -181,7 +181,7 @@ class TestDialect(Validator): "CAST(a AS BINARY(4))", write={ "bigquery": "CAST(a AS BYTES)", - "clickhouse": "CAST(a AS BINARY(4))", + "clickhouse": "CAST(a AS Nullable(BINARY(4)))", "drill": "CAST(a AS VARBINARY(4))", "duckdb": "CAST(a AS BLOB(4))", "materialize": "CAST(a AS BYTEA(4))", @@ -201,7 +201,7 @@ class TestDialect(Validator): "CAST(a AS VARBINARY(4))", write={ "bigquery": "CAST(a AS BYTES)", - "clickhouse": "CAST(a AS String)", + "clickhouse": "CAST(a AS Nullable(String))", "duckdb": "CAST(a AS BLOB(4))", "materialize": "CAST(a AS BYTEA(4))", "mysql": "CAST(a AS VARBINARY(4))", @@ -219,19 +219,19 @@ class TestDialect(Validator): self.validate_all( "CAST(MAP('a', '1') AS MAP(TEXT, TEXT))", write={ - "clickhouse": "CAST(map('a', '1') AS Map(String, String))", + "clickhouse": "CAST(map('a', '1') AS Map(String, Nullable(String)))", }, ) self.validate_all( "CAST(ARRAY(1, 2) AS ARRAY)", write={ - "clickhouse": "CAST([1, 2] AS Array(Int8))", + "clickhouse": "CAST([1, 2] AS Array(Nullable(Int8)))", }, ) self.validate_all( - "CAST((1, 2) AS STRUCT)", + "CAST((1, 2, 3, 4) AS STRUCT)", write={ - "clickhouse": "CAST((1, 2) AS Tuple(a Int8, b Int16, c Int32, d Int64))", + "clickhouse": "CAST((1, 2, 3, 4) AS Tuple(a Nullable(Int8), b Nullable(Int16), c Nullable(Int32), d Nullable(Int64)))", }, ) self.validate_all( @@ -327,20 +327,10 @@ class TestDialect(Validator): "postgres": "CAST(a AS DOUBLE PRECISION)", "redshift": "CAST(a AS DOUBLE PRECISION)", }, - write={ - "duckdb": "CAST(a AS DOUBLE)", - "drill": "CAST(a AS DOUBLE)", - "postgres": "CAST(a AS DOUBLE PRECISION)", - "redshift": "CAST(a AS DOUBLE PRECISION)", - "doris": "CAST(a AS DOUBLE)", - }, - ) - - self.validate_all( - "CAST(a AS DOUBLE)", write={ "bigquery": "CAST(a AS FLOAT64)", - "clickhouse": "CAST(a AS Float64)", + "clickhouse": "CAST(a AS Nullable(Float64))", + "doris": "CAST(a AS DOUBLE)", "drill": "CAST(a AS DOUBLE)", "duckdb": "CAST(a AS DOUBLE)", "materialize": "CAST(a AS DOUBLE PRECISION)", @@ -592,7 +582,7 @@ class TestDialect(Validator): "hive": "CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, 'yyyy-MM-ddTHH:mm:ss')) AS TIMESTAMP)", "presto": "DATE_PARSE(x, '%Y-%m-%dT%T')", "drill": "TO_TIMESTAMP(x, 'yyyy-MM-dd''T''HH:mm:ss')", - "redshift": "TO_TIMESTAMP(x, 'YYYY-MM-DDTHH:MI:SS')", + "redshift": "TO_TIMESTAMP(x, 'YYYY-MM-DDTHH24:MI:SS')", "spark": "TO_TIMESTAMP(x, 'yyyy-MM-ddTHH:mm:ss')", }, ) @@ -647,14 +637,66 @@ class TestDialect(Validator): self.validate_all( "TIME_STR_TO_TIME('2020-01-01')", write={ - "drill": "CAST('2020-01-01' AS TIMESTAMP)", + "bigquery": "CAST('2020-01-01' AS DATETIME)", + "databricks": "CAST('2020-01-01' AS TIMESTAMP)", "duckdb": "CAST('2020-01-01' AS TIMESTAMP)", + "tsql": "CAST('2020-01-01' AS DATETIME2)", + "mysql": "CAST('2020-01-01' AS DATETIME)", + "postgres": "CAST('2020-01-01' AS TIMESTAMP)", + "redshift": "CAST('2020-01-01' AS TIMESTAMP)", + "snowflake": "CAST('2020-01-01' AS TIMESTAMP)", + "spark": "CAST('2020-01-01' AS TIMESTAMP)", + "trino": "CAST('2020-01-01' AS TIMESTAMP)", + "clickhouse": "CAST('2020-01-01' AS Nullable(DateTime))", + "drill": "CAST('2020-01-01' AS TIMESTAMP)", "hive": "CAST('2020-01-01' AS TIMESTAMP)", "presto": "CAST('2020-01-01' AS TIMESTAMP)", "sqlite": "'2020-01-01'", "doris": "CAST('2020-01-01' AS DATETIME)", }, ) + self.validate_all( + "TIME_STR_TO_TIME('2020-01-01 12:13:14-08:00', 'America/Los_Angeles')", + write={ + "bigquery": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)", + "databricks": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)", + "duckdb": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMPTZ)", + "tsql": "CAST('2020-01-01 12:13:14-08:00' AS DATETIMEOFFSET) AT TIME ZONE 'UTC'", + "mysql": "CAST('2020-01-01 12:13:14-08:00' AS DATETIME)", + "postgres": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMPTZ)", + "redshift": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP WITH TIME ZONE)", + "snowflake": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMPTZ)", + "spark": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)", + "trino": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP WITH TIME ZONE)", + "clickhouse": "CAST('2020-01-01 12:13:14' AS Nullable(DateTime('America/Los_Angeles')))", + "drill": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)", + "hive": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)", + "presto": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP WITH TIME ZONE)", + "sqlite": "'2020-01-01 12:13:14-08:00'", + "doris": "CAST('2020-01-01 12:13:14-08:00' AS DATETIME)", + }, + ) + self.validate_all( + "TIME_STR_TO_TIME(col, 'America/Los_Angeles')", + write={ + "bigquery": "CAST(col AS TIMESTAMP)", + "databricks": "CAST(col AS TIMESTAMP)", + "duckdb": "CAST(col AS TIMESTAMPTZ)", + "tsql": "CAST(col AS DATETIMEOFFSET) AT TIME ZONE 'UTC'", + "mysql": "CAST(col AS DATETIME)", + "postgres": "CAST(col AS TIMESTAMPTZ)", + "redshift": "CAST(col AS TIMESTAMP WITH TIME ZONE)", + "snowflake": "CAST(col AS TIMESTAMPTZ)", + "spark": "CAST(col AS TIMESTAMP)", + "trino": "CAST(col AS TIMESTAMP WITH TIME ZONE)", + "clickhouse": "CAST(col AS Nullable(DateTime('America/Los_Angeles')))", + "drill": "CAST(col AS TIMESTAMP)", + "hive": "CAST(col AS TIMESTAMP)", + "presto": "CAST(col AS TIMESTAMP WITH TIME ZONE)", + "sqlite": "col", + "doris": "CAST(col AS DATETIME)", + }, + ) self.validate_all( "TIME_STR_TO_UNIX('2020-01-01')", write={ @@ -2115,6 +2157,17 @@ SELECT }, ) + # needs to preserve the target alias in then WHEN condition but not in the THEN clause + self.validate_all( + """MERGE INTO foo AS target USING (SELECT a, b FROM tbl) AS src ON src.a = target.a + WHEN MATCHED AND target.a <> src.a THEN UPDATE SET target.b = 'FOO' + WHEN NOT MATCHED THEN INSERT (target.a, target.b) VALUES (src.a, src.b)""", + write={ + "trino": """MERGE INTO foo AS target USING (SELECT a, b FROM tbl) AS src ON src.a = target.a WHEN MATCHED AND target.a <> src.a THEN UPDATE SET b = 'FOO' WHEN NOT MATCHED THEN INSERT (a, b) VALUES (src.a, src.b)""", + "postgres": """MERGE INTO foo AS target USING (SELECT a, b FROM tbl) AS src ON src.a = target.a WHEN MATCHED AND target.a <> src.a THEN UPDATE SET b = 'FOO' WHEN NOT MATCHED THEN INSERT (a, b) VALUES (src.a, src.b)""", + }, + ) + def test_substring(self): self.validate_all( "SUBSTR('123456', 2, 3)", @@ -2603,3 +2656,46 @@ FROM subquery2""", "trino": f"SELECT {pad_func}('bar', 5, ' ')", }, ) + + def test_generate_date_array(self): + self.validate_all( + "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))", + write={ + "bigquery": "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), INTERVAL '1' WEEK))", + "databricks": "SELECT * FROM EXPLODE(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), INTERVAL '1' WEEK))", + "duckdb": "SELECT * FROM UNNEST(CAST(GENERATE_SERIES(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (7 * INTERVAL '1' DAY)) AS DATE[]))", + "mysql": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) FROM _generated_dates WHERE CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates", + "postgres": "SELECT * FROM (SELECT CAST(value AS DATE) FROM GENERATE_SERIES(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), INTERVAL '1 WEEK') AS value) AS _unnested_generate_series", + "presto": "SELECT * FROM UNNEST(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (1 * INTERVAL '7' DAY)))", + "redshift": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates", + "snowflake": "SELECT * FROM (SELECT DATEADD(WEEK, CAST(value AS INT), CAST('2020-01-01' AS DATE)) AS value FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(WEEK, CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE)) + 1 - 1) + 1))) AS _u(seq, key, path, index, value, this))", + "spark": "SELECT * FROM EXPLODE(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), INTERVAL '1' WEEK))", + "trino": "SELECT * FROM UNNEST(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (1 * INTERVAL '7' DAY)))", + "tsql": "WITH _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_value AS date_value FROM _generated_dates) AS _generated_dates", + }, + ) + self.validate_all( + "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates", + write={ + "mysql": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) FROM _generated_dates WHERE CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) <= CAST('2020-02-01' AS DATE)), dates AS (SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates) SELECT * FROM dates", + "redshift": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)), dates AS (SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates) SELECT * FROM dates", + "tsql": "WITH _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)), dates AS (SELECT * FROM (SELECT date_value AS date_value FROM _generated_dates) AS _generated_dates) SELECT * FROM dates", + }, + ) + self.validate_all( + "WITH dates1 AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))), dates2 AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-03-01', INTERVAL 1 MONTH))) SELECT * FROM dates1 CROSS JOIN dates2", + write={ + "mysql": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) FROM _generated_dates WHERE CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) <= CAST('2020-02-01' AS DATE)), _generated_dates_1(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATE_ADD(date_value, INTERVAL 1 MONTH) AS DATE) FROM _generated_dates_1 WHERE CAST(DATE_ADD(date_value, INTERVAL 1 MONTH) AS DATE) <= CAST('2020-03-01' AS DATE)), dates1 AS (SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates), dates2 AS (SELECT * FROM (SELECT date_value FROM _generated_dates_1) AS _generated_dates_1) SELECT * FROM dates1 CROSS JOIN dates2", + "redshift": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)), _generated_dates_1(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(MONTH, 1, date_value) AS DATE) FROM _generated_dates_1 WHERE CAST(DATEADD(MONTH, 1, date_value) AS DATE) <= CAST('2020-03-01' AS DATE)), dates1 AS (SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates), dates2 AS (SELECT * FROM (SELECT date_value FROM _generated_dates_1) AS _generated_dates_1) SELECT * FROM dates1 CROSS JOIN dates2", + "tsql": "WITH _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)), _generated_dates_1(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(MONTH, 1, date_value) AS DATE) FROM _generated_dates_1 WHERE CAST(DATEADD(MONTH, 1, date_value) AS DATE) <= CAST('2020-03-01' AS DATE)), dates1 AS (SELECT * FROM (SELECT date_value AS date_value FROM _generated_dates) AS _generated_dates), dates2 AS (SELECT * FROM (SELECT date_value AS date_value FROM _generated_dates_1) AS _generated_dates_1) SELECT * FROM dates1 CROSS JOIN dates2", + }, + ) + self.validate_all( + "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK)) AS _q(date_week)", + write={ + "mysql": "WITH RECURSIVE _generated_dates(date_week) AS (SELECT CAST('2020-01-01' AS DATE) AS date_week UNION ALL SELECT CAST(DATE_ADD(date_week, INTERVAL 1 WEEK) AS DATE) FROM _generated_dates WHERE CAST(DATE_ADD(date_week, INTERVAL 1 WEEK) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_week FROM _generated_dates) AS _generated_dates", + "redshift": "WITH RECURSIVE _generated_dates(date_week) AS (SELECT CAST('2020-01-01' AS DATE) AS date_week UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_week) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_week) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_week FROM _generated_dates) AS _generated_dates", + "snowflake": "SELECT * FROM (SELECT DATEADD(WEEK, CAST(date_week AS INT), CAST('2020-01-01' AS DATE)) AS date_week FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(WEEK, CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE)) + 1 - 1) + 1))) AS _q(seq, key, path, index, date_week, this)) AS _q(date_week)", + "tsql": "WITH _generated_dates(date_week) AS (SELECT CAST('2020-01-01' AS DATE) AS date_week UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_week) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_week) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_week AS date_week FROM _generated_dates) AS _generated_dates", + }, + ) -- cgit v1.2.3