From a1f10f8d39404d9bae42a64efaf505fa12f34c1a Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 26 Aug 2024 10:12:52 +0200 Subject: Adding upstream version 25.16.1. Signed-off-by: Daniel Baumann --- tests/dialects/test_athena.py | 13 ++- tests/dialects/test_bigquery.py | 135 +++++++++++++++++++++++++++-- tests/dialects/test_clickhouse.py | 174 +++++++++++++++++++++++++++++++------- tests/dialects/test_databricks.py | 8 ++ tests/dialects/test_dialect.py | 138 +++++++++++++++++++++++++----- tests/dialects/test_duckdb.py | 92 ++++++++++++-------- tests/dialects/test_hive.py | 10 +++ tests/dialects/test_mysql.py | 12 +++ tests/dialects/test_oracle.py | 31 +++++-- tests/dialects/test_postgres.py | 59 +++++++------ tests/dialects/test_presto.py | 7 ++ tests/dialects/test_redshift.py | 20 ++++- tests/dialects/test_snowflake.py | 55 +++++++++++- tests/dialects/test_spark.py | 39 ++++++++- tests/dialects/test_teradata.py | 5 ++ tests/dialects/test_trino.py | 32 +++++++ tests/dialects/test_tsql.py | 29 ++++++- 17 files changed, 722 insertions(+), 137 deletions(-) (limited to 'tests/dialects') diff --git a/tests/dialects/test_athena.py b/tests/dialects/test_athena.py index 3288ada..5522976 100644 --- a/tests/dialects/test_athena.py +++ b/tests/dialects/test_athena.py @@ -6,6 +6,15 @@ class TestAthena(Validator): maxDiff = None def test_athena(self): + self.validate_identity( + "CREATE TABLE IF NOT EXISTS t (name STRING) LOCATION 's3://bucket/tmp/mytable/' TBLPROPERTIES ('table_type'='iceberg', 'FORMAT'='parquet')" + ) + self.validate_identity( + "UNLOAD (SELECT name1, address1, comment1, key1 FROM table1) " + "TO 's3://amzn-s3-demo-bucket/ partitioned/' " + "WITH (format = 'TEXTFILE', partitioned_by = ARRAY['key1'])", + check_command_warning=True, + ) self.validate_identity( """USING EXTERNAL FUNCTION some_function(input VARBINARY) RETURNS VARCHAR @@ -14,7 +23,3 @@ class TestAthena(Validator): some_function(1)""", check_command_warning=True, ) - - self.validate_identity( - "CREATE TABLE IF NOT EXISTS t (name STRING) LOCATION 's3://bucket/tmp/mytable/' TBLPROPERTIES ('table_type'='iceberg', 'FORMAT'='parquet')" - ) diff --git a/tests/dialects/test_bigquery.py b/tests/dialects/test_bigquery.py index 45f3856..8c9f093 100644 --- a/tests/dialects/test_bigquery.py +++ b/tests/dialects/test_bigquery.py @@ -1,4 +1,6 @@ from unittest import mock +import datetime +import pytz from sqlglot import ( ErrorLevel, @@ -103,6 +105,7 @@ LANGUAGE js AS select_with_quoted_udf = self.validate_identity("SELECT `p.d.UdF`(data) FROM `p.d.t`") self.assertEqual(select_with_quoted_udf.selects[0].name, "p.d.UdF") + self.validate_identity("SELECT * FROM READ_CSV('bla.csv')") self.validate_identity("CAST(x AS STRUCT>)") self.validate_identity("assert.true(1 = 1)") self.validate_identity("SELECT ARRAY_TO_STRING(list, '--') AS text") @@ -446,7 +449,7 @@ LANGUAGE js AS write={ "bigquery": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE), MONTH)", "duckdb": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))", - "clickhouse": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))", + "clickhouse": "SELECT LAST_DAY(CAST('2008-11-25' AS Nullable(DATE)))", "mysql": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))", "oracle": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))", "postgres": "SELECT CAST(DATE_TRUNC('MONTH', CAST('2008-11-25' AS DATE)) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)", @@ -510,6 +513,20 @@ LANGUAGE js AS "duckdb": "SELECT STRFTIME(CAST('2023-12-25' AS DATE), '%Y%m%d')", }, ) + self.validate_all( + "SELECT FORMAT_DATETIME('%Y%m%d %H:%M:%S', DATETIME '2023-12-25 15:30:00')", + write={ + "bigquery": "SELECT FORMAT_DATETIME('%Y%m%d %H:%M:%S', CAST('2023-12-25 15:30:00' AS DATETIME))", + "duckdb": "SELECT STRFTIME(CAST('2023-12-25 15:30:00' AS TIMESTAMP), '%Y%m%d %H:%M:%S')", + }, + ) + self.validate_all( + "SELECT FORMAT_DATETIME('%x', '2023-12-25 15:30:00')", + write={ + "bigquery": "SELECT FORMAT_DATETIME('%x', '2023-12-25 15:30:00')", + "duckdb": "SELECT STRFTIME(CAST('2023-12-25 15:30:00' AS TIMESTAMP), '%x')", + }, + ) self.validate_all( "SELECT COUNTIF(x)", read={ @@ -636,6 +653,7 @@ LANGUAGE js AS write={ "bigquery": "SELECT DATETIME_TRUNC('2023-01-01T01:01:01', HOUR)", "databricks": "SELECT DATE_TRUNC('HOUR', '2023-01-01T01:01:01')", + "duckdb": "SELECT DATE_TRUNC('HOUR', CAST('2023-01-01T01:01:01' AS DATETIME))", }, ), ) @@ -1209,10 +1227,9 @@ LANGUAGE js AS "SELECT * FROM a WHERE b IN UNNEST([1, 2, 3])", write={ "bigquery": "SELECT * FROM a WHERE b IN UNNEST([1, 2, 3])", - "mysql": "SELECT * FROM a WHERE b IN (SELECT UNNEST(ARRAY(1, 2, 3)))", "presto": "SELECT * FROM a WHERE b IN (SELECT UNNEST(ARRAY[1, 2, 3]))", - "hive": "SELECT * FROM a WHERE b IN (SELECT UNNEST(ARRAY(1, 2, 3)))", - "spark": "SELECT * FROM a WHERE b IN (SELECT UNNEST(ARRAY(1, 2, 3)))", + "hive": "SELECT * FROM a WHERE b IN (SELECT EXPLODE(ARRAY(1, 2, 3)))", + "spark": "SELECT * FROM a WHERE b IN (SELECT EXPLODE(ARRAY(1, 2, 3)))", }, ) self.validate_all( @@ -1256,6 +1273,13 @@ LANGUAGE js AS "starrocks": "DATE_DIFF('MINUTE', CAST('2010-07-07' AS DATE), CAST('2008-12-25' AS DATE))", }, ) + self.validate_all( + "DATE_DIFF('2021-01-01', '2020-01-01', DAY)", + write={ + "bigquery": "DATE_DIFF('2021-01-01', '2020-01-01', DAY)", + "duckdb": "DATE_DIFF('DAY', CAST('2020-01-01' AS DATE), CAST('2021-01-01' AS DATE))", + }, + ) self.validate_all( "CURRENT_DATE('UTC')", write={ @@ -1402,6 +1426,57 @@ WHERE "": "SELECT LENGTH(foo)", }, ) + self.validate_all( + "SELECT TIME_DIFF('12:00:00', '12:30:00', MINUTE)", + write={ + "duckdb": "SELECT DATE_DIFF('MINUTE', CAST('12:30:00' AS TIME), CAST('12:00:00' AS TIME))", + "bigquery": "SELECT TIME_DIFF('12:00:00', '12:30:00', MINUTE)", + }, + ) + self.validate_all( + "ARRAY_CONCAT([1, 2], [3, 4], [5, 6])", + write={ + "bigquery": "ARRAY_CONCAT([1, 2], [3, 4], [5, 6])", + "duckdb": "ARRAY_CONCAT([1, 2], ARRAY_CONCAT([3, 4], [5, 6]))", + "postgres": "ARRAY_CAT(ARRAY[1, 2], ARRAY_CAT(ARRAY[3, 4], ARRAY[5, 6]))", + "redshift": "ARRAY_CONCAT(ARRAY(1, 2), ARRAY_CONCAT(ARRAY(3, 4), ARRAY(5, 6)))", + "snowflake": "ARRAY_CAT([1, 2], ARRAY_CAT([3, 4], [5, 6]))", + "hive": "CONCAT(ARRAY(1, 2), ARRAY(3, 4), ARRAY(5, 6))", + "spark2": "CONCAT(ARRAY(1, 2), ARRAY(3, 4), ARRAY(5, 6))", + "spark": "CONCAT(ARRAY(1, 2), ARRAY(3, 4), ARRAY(5, 6))", + "databricks": "CONCAT(ARRAY(1, 2), ARRAY(3, 4), ARRAY(5, 6))", + "presto": "CONCAT(ARRAY[1, 2], ARRAY[3, 4], ARRAY[5, 6])", + "trino": "CONCAT(ARRAY[1, 2], ARRAY[3, 4], ARRAY[5, 6])", + }, + ) + self.validate_all( + "SELECT GENERATE_DATE_ARRAY('2016-10-05', '2016-10-08')", + write={ + "duckdb": "SELECT CAST(GENERATE_SERIES(CAST('2016-10-05' AS DATE), CAST('2016-10-08' AS DATE), INTERVAL 1 DAY) AS DATE[])", + "bigquery": "SELECT GENERATE_DATE_ARRAY('2016-10-05', '2016-10-08', INTERVAL 1 DAY)", + }, + ) + self.validate_all( + "SELECT GENERATE_DATE_ARRAY('2016-10-05', '2016-10-08', INTERVAL '1' MONTH)", + write={ + "duckdb": "SELECT CAST(GENERATE_SERIES(CAST('2016-10-05' AS DATE), CAST('2016-10-08' AS DATE), INTERVAL '1' MONTH) AS DATE[])", + "bigquery": "SELECT GENERATE_DATE_ARRAY('2016-10-05', '2016-10-08', INTERVAL '1' MONTH)", + }, + ) + self.validate_all( + "SELECT GENERATE_TIMESTAMP_ARRAY('2016-10-05 00:00:00', '2016-10-07 00:00:00', INTERVAL '1' DAY)", + write={ + "duckdb": "SELECT GENERATE_SERIES(CAST('2016-10-05 00:00:00' AS TIMESTAMP), CAST('2016-10-07 00:00:00' AS TIMESTAMP), INTERVAL '1' DAY)", + "bigquery": "SELECT GENERATE_TIMESTAMP_ARRAY('2016-10-05 00:00:00', '2016-10-07 00:00:00', INTERVAL '1' DAY)", + }, + ) + self.validate_all( + "SELECT PARSE_DATE('%A %b %e %Y', 'Thursday Dec 25 2008')", + write={ + "bigquery": "SELECT PARSE_DATE('%A %b %e %Y', 'Thursday Dec 25 2008')", + "duckdb": "SELECT CAST(STRPTIME('Thursday Dec 25 2008', '%A %b %-d %Y') AS DATE)", + }, + ) def test_errors(self): with self.assertRaises(TokenError): @@ -1794,14 +1869,14 @@ OPTIONS ( "SELECT * FROM UNNEST(ARRAY>[])", write={ "bigquery": "SELECT * FROM UNNEST(CAST([] AS ARRAY>))", - "duckdb": "SELECT * FROM UNNEST(CAST([] AS STRUCT(x BIGINT)[]))", + "duckdb": "SELECT * FROM (SELECT UNNEST(CAST([] AS STRUCT(x BIGINT)[]), max_depth => 2))", }, ) self.validate_all( "SELECT * FROM UNNEST(ARRAY>[STRUCT(1, DATETIME '2023-11-01 09:34:01', 74, 'INACTIVE'),STRUCT(4, DATETIME '2023-11-01 09:38:01', 80, 'ACTIVE')])", write={ "bigquery": "SELECT * FROM UNNEST(CAST([STRUCT(1, CAST('2023-11-01 09:34:01' AS DATETIME), 74, 'INACTIVE'), STRUCT(4, CAST('2023-11-01 09:38:01' AS DATETIME), 80, 'ACTIVE')] AS ARRAY>))", - "duckdb": "SELECT * FROM UNNEST(CAST([ROW(1, CAST('2023-11-01 09:34:01' AS TIMESTAMP), 74, 'INACTIVE'), ROW(4, CAST('2023-11-01 09:38:01' AS TIMESTAMP), 80, 'ACTIVE')] AS STRUCT(device_id BIGINT, time TIMESTAMP, signal BIGINT, state TEXT)[]))", + "duckdb": "SELECT * FROM (SELECT UNNEST(CAST([ROW(1, CAST('2023-11-01 09:34:01' AS TIMESTAMP), 74, 'INACTIVE'), ROW(4, CAST('2023-11-01 09:38:01' AS TIMESTAMP), 80, 'ACTIVE')] AS STRUCT(device_id BIGINT, time TIMESTAMP, signal BIGINT, state TEXT)[]), max_depth => 2))", }, ) self.validate_all( @@ -1811,3 +1886,51 @@ OPTIONS ( "duckdb": "SELECT CAST(ROW(1, ROW('c_str')) AS STRUCT(a BIGINT, b STRUCT(c TEXT)))", }, ) + + def test_convert(self): + for value, expected in [ + (datetime.datetime(2023, 1, 1), "CAST('2023-01-01 00:00:00' AS DATETIME)"), + (datetime.datetime(2023, 1, 1, 12, 13, 14), "CAST('2023-01-01 12:13:14' AS DATETIME)"), + ( + datetime.datetime(2023, 1, 1, 12, 13, 14, tzinfo=datetime.timezone.utc), + "CAST('2023-01-01 12:13:14+00:00' AS TIMESTAMP)", + ), + ( + pytz.timezone("America/Los_Angeles").localize( + datetime.datetime(2023, 1, 1, 12, 13, 14) + ), + "CAST('2023-01-01 12:13:14-08:00' AS TIMESTAMP)", + ), + ]: + with self.subTest(value): + self.assertEqual(exp.convert(value).sql(dialect=self.dialect), expected) + + def test_unnest(self): + self.validate_all( + "SELECT name, laps FROM UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps), STRUCT('Makhloufi' AS name, [24.5, 25.4, 26.6, 26.1] AS laps)])", + write={ + "bigquery": "SELECT name, laps FROM UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps), STRUCT('Makhloufi' AS name, [24.5, 25.4, 26.6, 26.1] AS laps)])", + "duckdb": "SELECT name, laps FROM (SELECT UNNEST([{'name': 'Rudisha', 'laps': [23.4, 26.3, 26.4, 26.1]}, {'name': 'Makhloufi', 'laps': [24.5, 25.4, 26.6, 26.1]}], max_depth => 2))", + }, + ) + self.validate_all( + "WITH Races AS (SELECT '800M' AS race) SELECT race, name, laps FROM Races AS r CROSS JOIN UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)])", + write={ + "bigquery": "WITH Races AS (SELECT '800M' AS race) SELECT race, name, laps FROM Races AS r CROSS JOIN UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)])", + "duckdb": "WITH Races AS (SELECT '800M' AS race) SELECT race, name, laps FROM Races AS r CROSS JOIN (SELECT UNNEST([{'name': 'Rudisha', 'laps': [23.4, 26.3, 26.4, 26.1]}], max_depth => 2))", + }, + ) + self.validate_all( + "SELECT participant FROM UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)]) AS participant", + write={ + "bigquery": "SELECT participant FROM UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)]) AS participant", + "duckdb": "SELECT participant FROM (SELECT UNNEST([{'name': 'Rudisha', 'laps': [23.4, 26.3, 26.4, 26.1]}], max_depth => 2)) AS participant", + }, + ) + self.validate_all( + "WITH Races AS (SELECT '800M' AS race) SELECT race, participant FROM Races AS r CROSS JOIN UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)]) AS participant", + write={ + "bigquery": "WITH Races AS (SELECT '800M' AS race) SELECT race, participant FROM Races AS r CROSS JOIN UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)]) AS participant", + "duckdb": "WITH Races AS (SELECT '800M' AS race) SELECT race, participant FROM Races AS r CROSS JOIN (SELECT UNNEST([{'name': 'Rudisha', 'laps': [23.4, 26.3, 26.4, 26.1]}], max_depth => 2)) AS participant", + }, + ) diff --git a/tests/dialects/test_clickhouse.py b/tests/dialects/test_clickhouse.py index 2356ad0..53cd0a9 100644 --- a/tests/dialects/test_clickhouse.py +++ b/tests/dialects/test_clickhouse.py @@ -1,4 +1,7 @@ +from datetime import date from sqlglot import exp, parse_one +from sqlglot.dialects import ClickHouse +from sqlglot.expressions import convert from tests.dialects.test_dialect import Validator from sqlglot.errors import ErrorLevel @@ -7,27 +10,27 @@ class TestClickhouse(Validator): dialect = "clickhouse" def test_clickhouse(self): - self.validate_identity("SELECT toFloat(like)") - self.validate_identity("SELECT like") + for string_type_enum in ClickHouse.Generator.STRING_TYPE_MAPPING: + self.validate_identity(f"CAST(x AS {string_type_enum.value})", "CAST(x AS String)") - string_types = [ - "BLOB", - "LONGBLOB", - "LONGTEXT", - "MEDIUMBLOB", - "MEDIUMTEXT", - "TINYBLOB", - "TINYTEXT", - "VARCHAR(255)", - ] + # Arrays, maps and tuples can't be Nullable in ClickHouse + for non_nullable_type in ("ARRAY", "MAP", "STRUCT(a: INT)"): + try_cast = parse_one(f"TRY_CAST(x AS {non_nullable_type})") + target_type = try_cast.to.sql("clickhouse") + self.assertEqual(try_cast.sql("clickhouse"), f"CAST(x AS {target_type})") - for string_type in string_types: - self.validate_identity(f"CAST(x AS {string_type})", "CAST(x AS String)") + for nullable_type in ("INT", "UINT", "BIGINT", "FLOAT", "DOUBLE", "TEXT", "DATE", "UUID"): + try_cast = parse_one(f"TRY_CAST(x AS {nullable_type})") + target_type = exp.DataType.build(nullable_type, dialect="clickhouse").sql("clickhouse") + self.assertEqual(try_cast.sql("clickhouse"), f"CAST(x AS Nullable({target_type}))") expr = parse_one("count(x)") self.assertEqual(expr.sql(dialect="clickhouse"), "COUNT(x)") self.assertIsNone(expr._meta) + self.validate_identity("@macro").assert_is(exp.Parameter).this.assert_is(exp.Var) + self.validate_identity("SELECT toFloat(like)") + self.validate_identity("SELECT like") self.validate_identity("SELECT STR_TO_DATE(str, fmt, tz)") self.validate_identity("SELECT STR_TO_DATE('05 12 2000', '%d %m %Y')") self.validate_identity("SELECT EXTRACT(YEAR FROM toDateTime('2023-02-01'))") @@ -40,7 +43,7 @@ class TestClickhouse(Validator): self.validate_identity("SELECT * FROM (SELECT a FROM b SAMPLE 0.01)") self.validate_identity("SELECT * FROM (SELECT a FROM b SAMPLE 1 / 10 OFFSET 1 / 2)") self.validate_identity("SELECT sum(foo * bar) FROM bla SAMPLE 10000000") - self.validate_identity("CAST(x AS Nested(ID UInt32, Serial UInt32, EventTime DATETIME))") + self.validate_identity("CAST(x AS Nested(ID UInt32, Serial UInt32, EventTime DateTime))") self.validate_identity("CAST(x AS Enum('hello' = 1, 'world' = 2))") self.validate_identity("CAST(x AS Enum('hello', 'world'))") self.validate_identity("CAST(x AS Enum('hello' = 1, 'world'))") @@ -79,7 +82,8 @@ class TestClickhouse(Validator): self.validate_identity("SELECT * FROM foo WHERE x GLOBAL IN (SELECT * FROM bar)") self.validate_identity("position(haystack, needle)") self.validate_identity("position(haystack, needle, position)") - self.validate_identity("CAST(x AS DATETIME)") + self.validate_identity("CAST(x AS DATETIME)", "CAST(x AS DateTime)") + self.validate_identity("CAST(x AS TIMESTAMPTZ)", "CAST(x AS DateTime)") self.validate_identity("CAST(x as MEDIUMINT)", "CAST(x AS Int32)") self.validate_identity("SELECT arrayJoin([1, 2, 3] AS src) AS dst, 'Hello', src") self.validate_identity("""SELECT JSONExtractString('{"x": {"y": 1}}', 'x', 'y')""") @@ -207,11 +211,16 @@ class TestClickhouse(Validator): }, ) self.validate_all( - "SELECT CAST('2020-01-01' AS TIMESTAMP) + INTERVAL '500' MICROSECOND", + "SELECT CAST('2020-01-01' AS Nullable(DateTime)) + INTERVAL '500' MICROSECOND", read={ "duckdb": "SELECT TIMESTAMP '2020-01-01' + INTERVAL '500 us'", "postgres": "SELECT TIMESTAMP '2020-01-01' + INTERVAL '500 us'", }, + write={ + "clickhouse": "SELECT CAST('2020-01-01' AS Nullable(DateTime)) + INTERVAL '500' MICROSECOND", + "duckdb": "SELECT CAST('2020-01-01' AS DATETIME) + INTERVAL '500' MICROSECOND", + "postgres": "SELECT CAST('2020-01-01' AS TIMESTAMP) + INTERVAL '500 MICROSECOND'", + }, ) self.validate_all( "SELECT CURRENT_DATE()", @@ -471,6 +480,47 @@ class TestClickhouse(Validator): parse_one("Tuple(select Int64)", into=exp.DataType, read="clickhouse"), exp.DataType ) + self.validate_identity("INSERT INTO t (col1, col2) VALUES ('abcd', 1234)") + self.validate_all( + "INSERT INTO t (col1, col2) VALUES ('abcd', 1234)", + read={ + # looks like values table function, but should be parsed as VALUES block + "clickhouse": "INSERT INTO t (col1, col2) values('abcd', 1234)" + }, + write={ + "clickhouse": "INSERT INTO t (col1, col2) VALUES ('abcd', 1234)", + "postgres": "INSERT INTO t (col1, col2) VALUES ('abcd', 1234)", + }, + ) + + def test_clickhouse_values(self): + values = exp.select("*").from_( + exp.values([exp.tuple_(1, 2, 3)], alias="subq", columns=["a", "b", "c"]) + ) + self.assertEqual( + values.sql("clickhouse"), + "SELECT * FROM (SELECT 1 AS a, 2 AS b, 3 AS c) AS subq", + ) + + self.validate_identity("INSERT INTO t (col1, col2) VALUES ('abcd', 1234)") + self.validate_identity( + "INSERT INTO t (col1, col2) FORMAT Values('abcd', 1234)", + "INSERT INTO t (col1, col2) VALUES ('abcd', 1234)", + ) + + self.validate_all( + "SELECT col FROM (SELECT 1 AS col) AS _t", + read={ + "duckdb": "SELECT col FROM (VALUES (1)) AS _t(col)", + }, + ) + self.validate_all( + "SELECT col1, col2 FROM (SELECT 1 AS col1, 2 AS col2 UNION ALL SELECT 3, 4) AS _t", + read={ + "duckdb": "SELECT col1, col2 FROM (VALUES (1, 2), (3, 4)) AS _t(col1, col2)", + }, + ) + def test_cte(self): self.validate_identity("WITH 'x' AS foo SELECT foo") self.validate_identity("WITH ['c'] AS field_names SELECT field_names") @@ -531,7 +581,7 @@ class TestClickhouse(Validator): self.validate_all( "SELECT {abc: UInt32}, {b: String}, {c: DateTime},{d: Map(String, Array(UInt8))}, {e: Tuple(UInt8, String)}", write={ - "clickhouse": "SELECT {abc: UInt32}, {b: String}, {c: DATETIME}, {d: Map(String, Array(UInt8))}, {e: Tuple(UInt8, String)}", + "clickhouse": "SELECT {abc: UInt32}, {b: String}, {c: DateTime}, {d: Map(String, Array(UInt8))}, {e: Tuple(UInt8, String)}", "": "SELECT :abc, :b, :c, :d, :e", }, ) @@ -562,14 +612,77 @@ class TestClickhouse(Validator): ) def test_ddl(self): + db_table_expr = exp.Table(this=None, db=exp.to_identifier("foo"), catalog=None) + create_with_cluster = exp.Create( + this=db_table_expr, + kind="DATABASE", + properties=exp.Properties(expressions=[exp.OnCluster(this=exp.to_identifier("c"))]), + ) + self.assertEqual(create_with_cluster.sql("clickhouse"), "CREATE DATABASE foo ON CLUSTER c") + + ctas_with_comment = exp.Create( + this=exp.table_("foo"), + kind="TABLE", + expression=exp.select("*").from_("db.other_table"), + properties=exp.Properties( + expressions=[ + exp.EngineProperty(this=exp.var("Memory")), + exp.SchemaCommentProperty(this=exp.Literal.string("foo")), + ], + ), + ) + self.assertEqual( + ctas_with_comment.sql("clickhouse"), + "CREATE TABLE foo ENGINE=Memory AS (SELECT * FROM db.other_table) COMMENT 'foo'", + ) + + self.validate_identity("""CREATE TABLE ip_data (ip4 IPv4, ip6 IPv6) ENGINE=TinyLog()""") + self.validate_identity("""CREATE TABLE dates (dt1 Date32) ENGINE=TinyLog()""") + self.validate_identity("CREATE TABLE named_tuples (a Tuple(select String, i Int64))") + self.validate_identity("""CREATE TABLE t (a String) EMPTY AS SELECT * FROM dummy""") + self.validate_identity( + "CREATE TABLE t1 (a String EPHEMERAL, b String EPHEMERAL func(), c String MATERIALIZED func(), d String ALIAS func()) ENGINE=TinyLog()" + ) + self.validate_identity( + "CREATE TABLE t (a String, b String, c UInt64, PROJECTION p1 (SELECT a, sum(c) GROUP BY a, b), PROJECTION p2 (SELECT b, sum(c) GROUP BY b)) ENGINE=MergeTree()" + ) + self.validate_identity( + """CREATE TABLE xyz (ts DateTime, data String) ENGINE=MergeTree() ORDER BY ts SETTINGS index_granularity = 8192 COMMENT '{"key": "value"}'""" + ) + self.validate_identity( + "INSERT INTO FUNCTION s3('a', 'b', 'c', 'd', 'e') PARTITION BY CONCAT(s1, s2, s3, s4) SETTINGS set1 = 1, set2 = '2' SELECT * FROM some_table SETTINGS foo = 3" + ) self.validate_identity( 'CREATE TABLE data5 ("x" UInt32, "y" UInt32) ENGINE=MergeTree ORDER BY (round(y / 1000000000), cityHash64(x)) SAMPLE BY cityHash64(x)' ) self.validate_identity( "CREATE TABLE foo (x UInt32) TTL time_column + INTERVAL '1' MONTH DELETE WHERE column = 'value'" ) - self.validate_identity("CREATE TABLE named_tuples (a Tuple(select String, i Int64))") + self.validate_identity( + "CREATE TABLE a ENGINE=Memory AS SELECT 1 AS c COMMENT 'foo'", + "CREATE TABLE a ENGINE=Memory AS (SELECT 1 AS c) COMMENT 'foo'", + ) + self.validate_all( + "CREATE DATABASE x", + read={ + "duckdb": "CREATE SCHEMA x", + }, + write={ + "clickhouse": "CREATE DATABASE x", + "duckdb": "CREATE SCHEMA x", + }, + ) + self.validate_all( + "DROP DATABASE x", + read={ + "duckdb": "DROP SCHEMA x", + }, + write={ + "clickhouse": "DROP DATABASE x", + "duckdb": "DROP SCHEMA x", + }, + ) self.validate_all( """ CREATE TABLE example1 ( @@ -583,7 +696,7 @@ class TestClickhouse(Validator): """, write={ "clickhouse": """CREATE TABLE example1 ( - timestamp DATETIME, + timestamp DateTime, x UInt32 TTL now() + INTERVAL '1' MONTH, y String TTL timestamp + INTERVAL '1' DAY, z String @@ -661,7 +774,7 @@ SETTINGS """, write={ "clickhouse": """CREATE TABLE example_table ( - d DATETIME, + d DateTime, a Int32 ) ENGINE=MergeTree @@ -688,7 +801,7 @@ TTL """, write={ "clickhouse": """CREATE TABLE table_with_where ( - d DATETIME, + d DateTime, a Int32 ) ENGINE=MergeTree @@ -716,7 +829,7 @@ WHERE """, write={ "clickhouse": """CREATE TABLE table_for_recompression ( - d DATETIME, + d DateTime, key UInt64, value String ) @@ -748,7 +861,7 @@ SETTINGS """, write={ "clickhouse": """CREATE TABLE table_for_aggregation ( - d DATETIME, + d DateTime, k1 Int32, k2 Int32, x Int32, @@ -855,8 +968,6 @@ LIFETIME(MIN 0 MAX 0)""", }, pretty=True, ) - self.validate_identity("""CREATE TABLE ip_data (ip4 IPv4, ip6 IPv6) ENGINE=TinyLog()""") - self.validate_identity("""CREATE TABLE dates (dt1 Date32) ENGINE=TinyLog()""") self.validate_all( """ CREATE TABLE t ( @@ -873,12 +984,6 @@ LIFETIME(MIN 0 MAX 0)""", }, pretty=True, ) - self.validate_identity( - "CREATE TABLE t1 (a String EPHEMERAL, b String EPHEMERAL func(), c String MATERIALIZED func(), d String ALIAS func()) ENGINE=TinyLog()" - ) - self.validate_identity( - "CREATE TABLE t (a String, b String, c UInt64, PROJECTION p1 (SELECT a, sum(c) GROUP BY a, b), PROJECTION p2 (SELECT b, sum(c) GROUP BY b)) ENGINE=MergeTree()" - ) def test_agg_functions(self): def extract_agg_func(query): @@ -934,3 +1039,8 @@ LIFETIME(MIN 0 MAX 0)""", f"SELECT {func_alias}(SECOND, 1, bar)", f"SELECT {func_name}(SECOND, 1, bar)", ) + + def test_convert(self): + self.assertEqual( + convert(date(2020, 1, 1)).sql(dialect=self.dialect), "toDate('2020-01-01')" + ) diff --git a/tests/dialects/test_databricks.py b/tests/dialects/test_databricks.py index 90499fb..a46794b 100644 --- a/tests/dialects/test_databricks.py +++ b/tests/dialects/test_databricks.py @@ -256,3 +256,11 @@ class TestDatabricks(Validator): "databricks": "WITH x AS (SELECT 1) SELECT * FROM x", }, ) + + def test_streaming_tables(self): + self.validate_identity( + "CREATE STREAMING TABLE raw_data AS SELECT * FROM STREAM READ_FILES('abfss://container@storageAccount.dfs.core.windows.net/base/path')" + ) + self.validate_identity( + "CREATE OR REFRESH STREAMING TABLE csv_data (id INT, ts TIMESTAMP, event STRING) AS SELECT * FROM STREAM READ_FILES('s3://bucket/path', format => 'csv', schema => 'id int, ts timestamp, event string')" + ) diff --git a/tests/dialects/test_dialect.py b/tests/dialects/test_dialect.py index 522c42c..8f3c183 100644 --- a/tests/dialects/test_dialect.py +++ b/tests/dialects/test_dialect.py @@ -160,7 +160,7 @@ class TestDialect(Validator): "CAST(a AS TEXT)", write={ "bigquery": "CAST(a AS STRING)", - "clickhouse": "CAST(a AS String)", + "clickhouse": "CAST(a AS Nullable(String))", "drill": "CAST(a AS VARCHAR)", "duckdb": "CAST(a AS TEXT)", "materialize": "CAST(a AS TEXT)", @@ -181,7 +181,7 @@ class TestDialect(Validator): "CAST(a AS BINARY(4))", write={ "bigquery": "CAST(a AS BYTES)", - "clickhouse": "CAST(a AS BINARY(4))", + "clickhouse": "CAST(a AS Nullable(BINARY(4)))", "drill": "CAST(a AS VARBINARY(4))", "duckdb": "CAST(a AS BLOB(4))", "materialize": "CAST(a AS BYTEA(4))", @@ -201,7 +201,7 @@ class TestDialect(Validator): "CAST(a AS VARBINARY(4))", write={ "bigquery": "CAST(a AS BYTES)", - "clickhouse": "CAST(a AS String)", + "clickhouse": "CAST(a AS Nullable(String))", "duckdb": "CAST(a AS BLOB(4))", "materialize": "CAST(a AS BYTEA(4))", "mysql": "CAST(a AS VARBINARY(4))", @@ -219,19 +219,19 @@ class TestDialect(Validator): self.validate_all( "CAST(MAP('a', '1') AS MAP(TEXT, TEXT))", write={ - "clickhouse": "CAST(map('a', '1') AS Map(String, String))", + "clickhouse": "CAST(map('a', '1') AS Map(String, Nullable(String)))", }, ) self.validate_all( "CAST(ARRAY(1, 2) AS ARRAY)", write={ - "clickhouse": "CAST([1, 2] AS Array(Int8))", + "clickhouse": "CAST([1, 2] AS Array(Nullable(Int8)))", }, ) self.validate_all( - "CAST((1, 2) AS STRUCT)", + "CAST((1, 2, 3, 4) AS STRUCT)", write={ - "clickhouse": "CAST((1, 2) AS Tuple(a Int8, b Int16, c Int32, d Int64))", + "clickhouse": "CAST((1, 2, 3, 4) AS Tuple(a Nullable(Int8), b Nullable(Int16), c Nullable(Int32), d Nullable(Int64)))", }, ) self.validate_all( @@ -327,20 +327,10 @@ class TestDialect(Validator): "postgres": "CAST(a AS DOUBLE PRECISION)", "redshift": "CAST(a AS DOUBLE PRECISION)", }, - write={ - "duckdb": "CAST(a AS DOUBLE)", - "drill": "CAST(a AS DOUBLE)", - "postgres": "CAST(a AS DOUBLE PRECISION)", - "redshift": "CAST(a AS DOUBLE PRECISION)", - "doris": "CAST(a AS DOUBLE)", - }, - ) - - self.validate_all( - "CAST(a AS DOUBLE)", write={ "bigquery": "CAST(a AS FLOAT64)", - "clickhouse": "CAST(a AS Float64)", + "clickhouse": "CAST(a AS Nullable(Float64))", + "doris": "CAST(a AS DOUBLE)", "drill": "CAST(a AS DOUBLE)", "duckdb": "CAST(a AS DOUBLE)", "materialize": "CAST(a AS DOUBLE PRECISION)", @@ -592,7 +582,7 @@ class TestDialect(Validator): "hive": "CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, 'yyyy-MM-ddTHH:mm:ss')) AS TIMESTAMP)", "presto": "DATE_PARSE(x, '%Y-%m-%dT%T')", "drill": "TO_TIMESTAMP(x, 'yyyy-MM-dd''T''HH:mm:ss')", - "redshift": "TO_TIMESTAMP(x, 'YYYY-MM-DDTHH:MI:SS')", + "redshift": "TO_TIMESTAMP(x, 'YYYY-MM-DDTHH24:MI:SS')", "spark": "TO_TIMESTAMP(x, 'yyyy-MM-ddTHH:mm:ss')", }, ) @@ -647,14 +637,66 @@ class TestDialect(Validator): self.validate_all( "TIME_STR_TO_TIME('2020-01-01')", write={ - "drill": "CAST('2020-01-01' AS TIMESTAMP)", + "bigquery": "CAST('2020-01-01' AS DATETIME)", + "databricks": "CAST('2020-01-01' AS TIMESTAMP)", "duckdb": "CAST('2020-01-01' AS TIMESTAMP)", + "tsql": "CAST('2020-01-01' AS DATETIME2)", + "mysql": "CAST('2020-01-01' AS DATETIME)", + "postgres": "CAST('2020-01-01' AS TIMESTAMP)", + "redshift": "CAST('2020-01-01' AS TIMESTAMP)", + "snowflake": "CAST('2020-01-01' AS TIMESTAMP)", + "spark": "CAST('2020-01-01' AS TIMESTAMP)", + "trino": "CAST('2020-01-01' AS TIMESTAMP)", + "clickhouse": "CAST('2020-01-01' AS Nullable(DateTime))", + "drill": "CAST('2020-01-01' AS TIMESTAMP)", "hive": "CAST('2020-01-01' AS TIMESTAMP)", "presto": "CAST('2020-01-01' AS TIMESTAMP)", "sqlite": "'2020-01-01'", "doris": "CAST('2020-01-01' AS DATETIME)", }, ) + self.validate_all( + "TIME_STR_TO_TIME('2020-01-01 12:13:14-08:00', 'America/Los_Angeles')", + write={ + "bigquery": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)", + "databricks": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)", + "duckdb": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMPTZ)", + "tsql": "CAST('2020-01-01 12:13:14-08:00' AS DATETIMEOFFSET) AT TIME ZONE 'UTC'", + "mysql": "CAST('2020-01-01 12:13:14-08:00' AS DATETIME)", + "postgres": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMPTZ)", + "redshift": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP WITH TIME ZONE)", + "snowflake": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMPTZ)", + "spark": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)", + "trino": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP WITH TIME ZONE)", + "clickhouse": "CAST('2020-01-01 12:13:14' AS Nullable(DateTime('America/Los_Angeles')))", + "drill": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)", + "hive": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)", + "presto": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP WITH TIME ZONE)", + "sqlite": "'2020-01-01 12:13:14-08:00'", + "doris": "CAST('2020-01-01 12:13:14-08:00' AS DATETIME)", + }, + ) + self.validate_all( + "TIME_STR_TO_TIME(col, 'America/Los_Angeles')", + write={ + "bigquery": "CAST(col AS TIMESTAMP)", + "databricks": "CAST(col AS TIMESTAMP)", + "duckdb": "CAST(col AS TIMESTAMPTZ)", + "tsql": "CAST(col AS DATETIMEOFFSET) AT TIME ZONE 'UTC'", + "mysql": "CAST(col AS DATETIME)", + "postgres": "CAST(col AS TIMESTAMPTZ)", + "redshift": "CAST(col AS TIMESTAMP WITH TIME ZONE)", + "snowflake": "CAST(col AS TIMESTAMPTZ)", + "spark": "CAST(col AS TIMESTAMP)", + "trino": "CAST(col AS TIMESTAMP WITH TIME ZONE)", + "clickhouse": "CAST(col AS Nullable(DateTime('America/Los_Angeles')))", + "drill": "CAST(col AS TIMESTAMP)", + "hive": "CAST(col AS TIMESTAMP)", + "presto": "CAST(col AS TIMESTAMP WITH TIME ZONE)", + "sqlite": "col", + "doris": "CAST(col AS DATETIME)", + }, + ) self.validate_all( "TIME_STR_TO_UNIX('2020-01-01')", write={ @@ -2115,6 +2157,17 @@ SELECT }, ) + # needs to preserve the target alias in then WHEN condition but not in the THEN clause + self.validate_all( + """MERGE INTO foo AS target USING (SELECT a, b FROM tbl) AS src ON src.a = target.a + WHEN MATCHED AND target.a <> src.a THEN UPDATE SET target.b = 'FOO' + WHEN NOT MATCHED THEN INSERT (target.a, target.b) VALUES (src.a, src.b)""", + write={ + "trino": """MERGE INTO foo AS target USING (SELECT a, b FROM tbl) AS src ON src.a = target.a WHEN MATCHED AND target.a <> src.a THEN UPDATE SET b = 'FOO' WHEN NOT MATCHED THEN INSERT (a, b) VALUES (src.a, src.b)""", + "postgres": """MERGE INTO foo AS target USING (SELECT a, b FROM tbl) AS src ON src.a = target.a WHEN MATCHED AND target.a <> src.a THEN UPDATE SET b = 'FOO' WHEN NOT MATCHED THEN INSERT (a, b) VALUES (src.a, src.b)""", + }, + ) + def test_substring(self): self.validate_all( "SUBSTR('123456', 2, 3)", @@ -2603,3 +2656,46 @@ FROM subquery2""", "trino": f"SELECT {pad_func}('bar', 5, ' ')", }, ) + + def test_generate_date_array(self): + self.validate_all( + "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))", + write={ + "bigquery": "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), INTERVAL '1' WEEK))", + "databricks": "SELECT * FROM EXPLODE(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), INTERVAL '1' WEEK))", + "duckdb": "SELECT * FROM UNNEST(CAST(GENERATE_SERIES(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (7 * INTERVAL '1' DAY)) AS DATE[]))", + "mysql": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) FROM _generated_dates WHERE CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates", + "postgres": "SELECT * FROM (SELECT CAST(value AS DATE) FROM GENERATE_SERIES(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), INTERVAL '1 WEEK') AS value) AS _unnested_generate_series", + "presto": "SELECT * FROM UNNEST(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (1 * INTERVAL '7' DAY)))", + "redshift": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates", + "snowflake": "SELECT * FROM (SELECT DATEADD(WEEK, CAST(value AS INT), CAST('2020-01-01' AS DATE)) AS value FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(WEEK, CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE)) + 1 - 1) + 1))) AS _u(seq, key, path, index, value, this))", + "spark": "SELECT * FROM EXPLODE(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), INTERVAL '1' WEEK))", + "trino": "SELECT * FROM UNNEST(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (1 * INTERVAL '7' DAY)))", + "tsql": "WITH _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_value AS date_value FROM _generated_dates) AS _generated_dates", + }, + ) + self.validate_all( + "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates", + write={ + "mysql": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) FROM _generated_dates WHERE CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) <= CAST('2020-02-01' AS DATE)), dates AS (SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates) SELECT * FROM dates", + "redshift": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)), dates AS (SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates) SELECT * FROM dates", + "tsql": "WITH _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)), dates AS (SELECT * FROM (SELECT date_value AS date_value FROM _generated_dates) AS _generated_dates) SELECT * FROM dates", + }, + ) + self.validate_all( + "WITH dates1 AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))), dates2 AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-03-01', INTERVAL 1 MONTH))) SELECT * FROM dates1 CROSS JOIN dates2", + write={ + "mysql": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) FROM _generated_dates WHERE CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) <= CAST('2020-02-01' AS DATE)), _generated_dates_1(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATE_ADD(date_value, INTERVAL 1 MONTH) AS DATE) FROM _generated_dates_1 WHERE CAST(DATE_ADD(date_value, INTERVAL 1 MONTH) AS DATE) <= CAST('2020-03-01' AS DATE)), dates1 AS (SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates), dates2 AS (SELECT * FROM (SELECT date_value FROM _generated_dates_1) AS _generated_dates_1) SELECT * FROM dates1 CROSS JOIN dates2", + "redshift": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)), _generated_dates_1(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(MONTH, 1, date_value) AS DATE) FROM _generated_dates_1 WHERE CAST(DATEADD(MONTH, 1, date_value) AS DATE) <= CAST('2020-03-01' AS DATE)), dates1 AS (SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates), dates2 AS (SELECT * FROM (SELECT date_value FROM _generated_dates_1) AS _generated_dates_1) SELECT * FROM dates1 CROSS JOIN dates2", + "tsql": "WITH _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)), _generated_dates_1(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(MONTH, 1, date_value) AS DATE) FROM _generated_dates_1 WHERE CAST(DATEADD(MONTH, 1, date_value) AS DATE) <= CAST('2020-03-01' AS DATE)), dates1 AS (SELECT * FROM (SELECT date_value AS date_value FROM _generated_dates) AS _generated_dates), dates2 AS (SELECT * FROM (SELECT date_value AS date_value FROM _generated_dates_1) AS _generated_dates_1) SELECT * FROM dates1 CROSS JOIN dates2", + }, + ) + self.validate_all( + "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK)) AS _q(date_week)", + write={ + "mysql": "WITH RECURSIVE _generated_dates(date_week) AS (SELECT CAST('2020-01-01' AS DATE) AS date_week UNION ALL SELECT CAST(DATE_ADD(date_week, INTERVAL 1 WEEK) AS DATE) FROM _generated_dates WHERE CAST(DATE_ADD(date_week, INTERVAL 1 WEEK) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_week FROM _generated_dates) AS _generated_dates", + "redshift": "WITH RECURSIVE _generated_dates(date_week) AS (SELECT CAST('2020-01-01' AS DATE) AS date_week UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_week) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_week) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_week FROM _generated_dates) AS _generated_dates", + "snowflake": "SELECT * FROM (SELECT DATEADD(WEEK, CAST(date_week AS INT), CAST('2020-01-01' AS DATE)) AS date_week FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(WEEK, CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE)) + 1 - 1) + 1))) AS _q(seq, key, path, index, date_week, this)) AS _q(date_week)", + "tsql": "WITH _generated_dates(date_week) AS (SELECT CAST('2020-01-01' AS DATE) AS date_week UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_week) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_week) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_week AS date_week FROM _generated_dates) AS _generated_dates", + }, + ) diff --git a/tests/dialects/test_duckdb.py b/tests/dialects/test_duckdb.py index 15af086..3fb9930 100644 --- a/tests/dialects/test_duckdb.py +++ b/tests/dialects/test_duckdb.py @@ -8,8 +8,6 @@ class TestDuckDB(Validator): dialect = "duckdb" def test_duckdb(self): - self.validate_identity("x::int[3]", "CAST(x AS INT[3])") - with self.assertRaises(ParseError): parse_one("1 //", read="duckdb") @@ -34,31 +32,6 @@ class TestDuckDB(Validator): "mysql": "SELECT `straight_join`", }, ) - self.validate_all( - "SELECT CAST('2020-01-01 12:05:01' AS TIMESTAMP)", - read={ - "duckdb": "SELECT CAST('2020-01-01 12:05:01' AS TIMESTAMP)", - "snowflake": "SELECT CAST('2020-01-01 12:05:01' AS TIMESTAMPNTZ)", - }, - ) - self.validate_all( - "SELECT CAST('2020-01-01' AS DATE) + INTERVAL (day_offset) DAY FROM t", - read={ - "duckdb": "SELECT CAST('2020-01-01' AS DATE) + INTERVAL (day_offset) DAY FROM t", - "mysql": "SELECT DATE '2020-01-01' + INTERVAL day_offset DAY FROM t", - }, - ) - self.validate_all( - "SELECT CAST('09:05:03' AS TIME) + INTERVAL 2 HOUR", - read={ - "bigquery": "SELECT TIME_ADD(CAST('09:05:03' AS TIME), INTERVAL 2 HOUR)", - "snowflake": "SELECT TIMEADD(HOUR, 2, TO_TIME('09:05:03'))", - }, - write={ - "duckdb": "SELECT CAST('09:05:03' AS TIME) + INTERVAL '2' HOUR", - "snowflake": "SELECT CAST('09:05:03' AS TIME) + INTERVAL '2 HOUR'", - }, - ) self.validate_all( 'STRUCT_PACK("a b" := 1)', write={ @@ -112,7 +85,9 @@ class TestDuckDB(Validator): self.validate_all( "CREATE TEMPORARY FUNCTION f1(a, b) AS (a + b)", - read={"bigquery": "CREATE TEMP FUNCTION f1(a INT64, b INT64) AS (a + b)"}, + read={ + "bigquery": "CREATE TEMP FUNCTION f1(a INT64, b INT64) AS (a + b)", + }, ) self.validate_identity("SELECT 1 WHERE x > $1") self.validate_identity("SELECT 1 WHERE x > $name") @@ -128,13 +103,17 @@ class TestDuckDB(Validator): ) self.validate_all( - "{'a': 1, 'b': '2'}", write={"presto": "CAST(ROW(1, '2') AS ROW(a INTEGER, b VARCHAR))"} + "{'a': 1, 'b': '2'}", + write={ + "presto": "CAST(ROW(1, '2') AS ROW(a INTEGER, b VARCHAR))", + }, ) self.validate_all( "struct_pack(a := 1, b := 2)", - write={"presto": "CAST(ROW(1, 2) AS ROW(a INTEGER, b INTEGER))"}, + write={ + "presto": "CAST(ROW(1, 2) AS ROW(a INTEGER, b INTEGER))", + }, ) - self.validate_all( "struct_pack(a := 1, b := x)", write={ @@ -818,6 +797,9 @@ class TestDuckDB(Validator): ) self.validate_identity("SELECT LENGTH(foo)") + self.validate_identity("SELECT ARRAY[1, 2, 3]", "SELECT [1, 2, 3]") + + self.validate_identity("SELECT * FROM (DESCRIBE t)") def test_array_index(self): with self.assertLogs(helper_logger) as cm: @@ -909,12 +891,12 @@ class TestDuckDB(Validator): "EPOCH_MS(x)", write={ "bigquery": "TIMESTAMP_MILLIS(x)", + "clickhouse": "fromUnixTimestamp64Milli(CAST(x AS Nullable(Int64)))", "duckdb": "EPOCH_MS(x)", + "mysql": "FROM_UNIXTIME(x / POWER(10, 3))", + "postgres": "TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / 10 ^ 3)", "presto": "FROM_UNIXTIME(CAST(x AS DOUBLE) / POW(10, 3))", "spark": "TIMESTAMP_MILLIS(x)", - "clickhouse": "fromUnixTimestamp64Milli(CAST(x AS Int64))", - "postgres": "TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / 10 ^ 3)", - "mysql": "FROM_UNIXTIME(x / POWER(10, 3))", }, ) self.validate_all( @@ -958,7 +940,7 @@ class TestDuckDB(Validator): self.validate_all( "STRPTIME(x, '%-m/%-d/%y %-I:%M %p')", write={ - "bigquery": "PARSE_TIMESTAMP('%-m/%-d/%y %-I:%M %p', x)", + "bigquery": "PARSE_TIMESTAMP('%-m/%e/%y %-I:%M %p', x)", "duckdb": "STRPTIME(x, '%-m/%-d/%y %-I:%M %p')", "presto": "DATE_PARSE(x, '%c/%e/%y %l:%i %p')", "hive": "CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, 'M/d/yy h:mm a')) AS TIMESTAMP)", @@ -1023,6 +1005,7 @@ class TestDuckDB(Validator): self.validate_identity("ARRAY((SELECT id FROM t))") def test_cast(self): + self.validate_identity("x::int[3]", "CAST(x AS INT[3])") self.validate_identity("CAST(x AS REAL)") self.validate_identity("CAST(x AS UINTEGER)") self.validate_identity("CAST(x AS UBIGINT)") @@ -1076,6 +1059,39 @@ class TestDuckDB(Validator): "STRUCT_PACK(a := 'b')::STRUCT(a TEXT)", "CAST(ROW('b') AS STRUCT(a TEXT))", ) + + self.validate_all( + "CAST(x AS TIME)", + read={ + "duckdb": "CAST(x AS TIME)", + "presto": "CAST(x AS TIME(6))", + }, + ) + self.validate_all( + "SELECT CAST('2020-01-01 12:05:01' AS TIMESTAMP)", + read={ + "duckdb": "SELECT CAST('2020-01-01 12:05:01' AS TIMESTAMP)", + "snowflake": "SELECT CAST('2020-01-01 12:05:01' AS TIMESTAMPNTZ)", + }, + ) + self.validate_all( + "SELECT CAST('2020-01-01' AS DATE) + INTERVAL (day_offset) DAY FROM t", + read={ + "duckdb": "SELECT CAST('2020-01-01' AS DATE) + INTERVAL (day_offset) DAY FROM t", + "mysql": "SELECT DATE '2020-01-01' + INTERVAL day_offset DAY FROM t", + }, + ) + self.validate_all( + "SELECT CAST('09:05:03' AS TIME) + INTERVAL 2 HOUR", + read={ + "bigquery": "SELECT TIME_ADD(CAST('09:05:03' AS TIME), INTERVAL 2 HOUR)", + "snowflake": "SELECT TIMEADD(HOUR, 2, TO_TIME('09:05:03'))", + }, + write={ + "duckdb": "SELECT CAST('09:05:03' AS TIME) + INTERVAL '2' HOUR", + "snowflake": "SELECT CAST('09:05:03' AS TIME) + INTERVAL '2 HOUR'", + }, + ) self.validate_all( "CAST(x AS VARCHAR(5))", write={ @@ -1156,6 +1172,12 @@ class TestDuckDB(Validator): }, ) + self.validate_identity("SELECT x::INT[3][3]", "SELECT CAST(x AS INT[3][3])") + self.validate_identity( + """SELECT ARRAY[[[1]]]::INT[1][1][1]""", + """SELECT CAST([[[1]]] AS INT[1][1][1])""", + ) + def test_encode_decode(self): self.validate_all( "ENCODE(x)", diff --git a/tests/dialects/test_hive.py b/tests/dialects/test_hive.py index 65ea0e5..c2768bf 100644 --- a/tests/dialects/test_hive.py +++ b/tests/dialects/test_hive.py @@ -171,6 +171,16 @@ class TestHive(Validator): self.validate_identity( """CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')""" ) + self.validate_identity("ALTER VIEW v1 AS SELECT x, UPPER(s) AS s FROM t2") + self.validate_identity("ALTER VIEW v1 (c1, c2) AS SELECT x, UPPER(s) AS s FROM t2") + self.validate_identity( + "ALTER VIEW v7 (c1 COMMENT 'Comment for c1', c2) AS SELECT t1.c1, t1.c2 FROM t1" + ) + self.validate_identity("ALTER VIEW db1.v1 RENAME TO db2.v2") + self.validate_identity("ALTER VIEW v1 SET TBLPROPERTIES ('tblp1'='1', 'tblp2'='2')") + self.validate_identity( + "ALTER VIEW v1 UNSET TBLPROPERTIES ('tblp1', 'tblp2')", check_command_warning=True + ) def test_lateral_view(self): self.validate_all( diff --git a/tests/dialects/test_mysql.py b/tests/dialects/test_mysql.py index 65b4c4a..c598cb5 100644 --- a/tests/dialects/test_mysql.py +++ b/tests/dialects/test_mysql.py @@ -24,6 +24,18 @@ class TestMySQL(Validator): self.validate_identity("ALTER TABLE t ADD INDEX `i` (`c`)") self.validate_identity("ALTER TABLE t ADD UNIQUE `i` (`c`)") self.validate_identity("ALTER TABLE test_table MODIFY COLUMN test_column LONGTEXT") + self.validate_identity("ALTER VIEW v AS SELECT a, b, c, d FROM foo") + self.validate_identity("ALTER VIEW v AS SELECT * FROM foo WHERE c > 100") + self.validate_identity( + "ALTER ALGORITHM = MERGE VIEW v AS SELECT * FROM foo", check_command_warning=True + ) + self.validate_identity( + "ALTER DEFINER = 'admin'@'localhost' VIEW v AS SELECT * FROM foo", + check_command_warning=True, + ) + self.validate_identity( + "ALTER SQL SECURITY = DEFINER VIEW v AS SELECT * FROM foo", check_command_warning=True + ) self.validate_identity( "INSERT INTO things (a, b) VALUES (1, 2) AS new_data ON DUPLICATE KEY UPDATE id = LAST_INSERT_ID(id), a = new_data.a, b = new_data.b" ) diff --git a/tests/dialects/test_oracle.py b/tests/dialects/test_oracle.py index 1d9fd99..4813a49 100644 --- a/tests/dialects/test_oracle.py +++ b/tests/dialects/test_oracle.py @@ -6,6 +6,7 @@ class TestOracle(Validator): dialect = "oracle" def test_oracle(self): + self.validate_identity("1 /* /* */") self.validate_all( "SELECT CONNECT_BY_ROOT x y", write={ @@ -13,8 +14,9 @@ class TestOracle(Validator): "oracle": "SELECT CONNECT_BY_ROOT x AS y", }, ) - self.parse_one("ALTER TABLE tbl_name DROP FOREIGN KEY fk_symbol").assert_is(exp.AlterTable) + self.parse_one("ALTER TABLE tbl_name DROP FOREIGN KEY fk_symbol").assert_is(exp.Alter) + self.validate_identity("SYSDATE") self.validate_identity("CREATE GLOBAL TEMPORARY TABLE t AS SELECT * FROM orders") self.validate_identity("CREATE PRIVATE TEMPORARY TABLE t AS SELECT * FROM orders") self.validate_identity("REGEXP_REPLACE('source', 'search')") @@ -43,6 +45,9 @@ class TestOracle(Validator): self.validate_identity("SELECT COUNT(*) * 10 FROM orders SAMPLE (10) SEED (1)") self.validate_identity("SELECT * FROM V$SESSION") self.validate_identity("SELECT TO_DATE('January 15, 1989, 11:00 A.M.')") + self.validate_identity( + "SELECT * FROM test UNPIVOT INCLUDE NULLS (value FOR Description IN (col AS 'PREFIX ' || CHR(38) || ' SUFFIX'))" + ) self.validate_identity( "SELECT last_name, employee_id, manager_id, LEVEL FROM employees START WITH employee_id = 100 CONNECT BY PRIOR employee_id = manager_id ORDER SIBLINGS BY last_name" ) @@ -72,10 +77,6 @@ class TestOracle(Validator): "SELECT JSON_OBJECTAGG(KEY department_name VALUE department_id) FROM dep WHERE id <= 30", "SELECT JSON_OBJECTAGG(department_name: department_id) FROM dep WHERE id <= 30", ) - self.validate_identity( - "SYSDATE", - "CURRENT_TIMESTAMP", - ) self.validate_identity( "SELECT last_name, department_id, salary, MIN(salary) KEEP (DENSE_RANK FIRST ORDER BY commission_pct) " 'OVER (PARTITION BY department_id) AS "Worst", MAX(salary) KEEP (DENSE_RANK LAST ORDER BY commission_pct) ' @@ -88,7 +89,6 @@ class TestOracle(Validator): self.validate_identity( "SELECT * FROM T ORDER BY I OFFSET NVL(:variable1, 10) ROWS FETCH NEXT NVL(:variable2, 10) ROWS ONLY", ) - self.validate_identity("NVL(x, y)").assert_is(exp.Anonymous) self.validate_identity( "SELECT * FROM t SAMPLE (.25)", "SELECT * FROM t SAMPLE (0.25)", @@ -98,6 +98,16 @@ class TestOracle(Validator): "SELECT * FROM t START WITH col CONNECT BY NOCYCLE PRIOR col1 = col2" ) + self.validate_all( + "SELECT * FROM test WHERE MOD(col1, 4) = 3", + read={ + "duckdb": "SELECT * FROM test WHERE col1 % 4 = 3", + }, + write={ + "duckdb": "SELECT * FROM test WHERE col1 % 4 = 3", + "oracle": "SELECT * FROM test WHERE MOD(col1, 4) = 3", + }, + ) self.validate_all( "CURRENT_TIMESTAMP BETWEEN TO_DATE(f.C_SDATE, 'yyyy/mm/dd') AND TO_DATE(f.C_EDATE, 'yyyy/mm/dd')", read={ @@ -242,6 +252,15 @@ class TestOracle(Validator): """SELECT * FROM t ORDER BY a ASC, b ASC NULLS FIRST, c DESC NULLS LAST, d DESC""", ) + self.validate_all( + "NVL(NULL, 1)", + write={ + "oracle": "NVL(NULL, 1)", + "": "COALESCE(NULL, 1)", + "clickhouse": "COALESCE(NULL, 1)", + }, + ) + def test_join_marker(self): self.validate_identity("SELECT e1.x, e2.x FROM e e1, e e2 WHERE e1.y (+) = e2.y") diff --git a/tests/dialects/test_postgres.py b/tests/dialects/test_postgres.py index 5df5e31..075c274 100644 --- a/tests/dialects/test_postgres.py +++ b/tests/dialects/test_postgres.py @@ -28,7 +28,7 @@ class TestPostgres(Validator): alter_table_only = """ALTER TABLE ONLY "Album" ADD CONSTRAINT "FK_AlbumArtistId" FOREIGN KEY ("ArtistId") REFERENCES "Artist" ("ArtistId") ON DELETE NO ACTION ON UPDATE NO ACTION""" expr = self.parse_one(alter_table_only) - self.assertIsInstance(expr, exp.AlterTable) + self.assertIsInstance(expr, exp.Alter) self.assertEqual(expr.sql(dialect="postgres"), alter_table_only) self.validate_identity("STRING_TO_ARRAY('xx~^~yy~^~zz', '~^~', 'yy')") @@ -548,47 +548,54 @@ class TestPostgres(Validator): "postgres": "x # y", }, ) + self.validate_all( + "SELECT GENERATE_SERIES(1, 5)", + write={ + "bigquery": UnsupportedError, + "postgres": "SELECT GENERATE_SERIES(1, 5)", + }, + ) + self.validate_all( + "WITH dates AS (SELECT GENERATE_SERIES('2020-01-01'::DATE, '2024-01-01'::DATE, '1 day'::INTERVAL) AS date), date_table AS (SELECT DISTINCT DATE_TRUNC('MONTH', date) AS date FROM dates) SELECT * FROM date_table", + write={ + "duckdb": "WITH dates AS (SELECT UNNEST(GENERATE_SERIES(CAST('2020-01-01' AS DATE), CAST('2024-01-01' AS DATE), CAST('1 day' AS INTERVAL))) AS date), date_table AS (SELECT DISTINCT DATE_TRUNC('MONTH', date) AS date FROM dates) SELECT * FROM date_table", + "postgres": "WITH dates AS (SELECT GENERATE_SERIES(CAST('2020-01-01' AS DATE), CAST('2024-01-01' AS DATE), CAST('1 day' AS INTERVAL)) AS date), date_table AS (SELECT DISTINCT DATE_TRUNC('MONTH', date) AS date FROM dates) SELECT * FROM date_table", + }, + ) self.validate_all( "GENERATE_SERIES(a, b, ' 2 days ')", write={ "postgres": "GENERATE_SERIES(a, b, INTERVAL '2 DAYS')", - "presto": "SEQUENCE(a, b, INTERVAL '2' DAY)", - "trino": "SEQUENCE(a, b, INTERVAL '2' DAY)", + "presto": "UNNEST(SEQUENCE(a, b, INTERVAL '2' DAY))", + "trino": "UNNEST(SEQUENCE(a, b, INTERVAL '2' DAY))", }, ) self.validate_all( "GENERATE_SERIES('2019-01-01'::TIMESTAMP, NOW(), '1day')", write={ + "databricks": "EXPLODE(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY))", + "hive": "EXPLODE(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY))", "postgres": "GENERATE_SERIES(CAST('2019-01-01' AS TIMESTAMP), CURRENT_TIMESTAMP, INTERVAL '1 DAY')", - "presto": "SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' DAY)", - "trino": "SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' DAY)", - "hive": "SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY)", - "spark2": "SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY)", - "spark": "SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY)", - "databricks": "SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY)", + "presto": "UNNEST(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' DAY))", + "spark": "EXPLODE(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY))", + "spark2": "EXPLODE(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY))", + "trino": "UNNEST(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' DAY))", }, ) self.validate_all( - "GENERATE_SERIES(a, b)", + "SELECT * FROM GENERATE_SERIES(a, b)", read={ - "postgres": "GENERATE_SERIES(a, b)", - "presto": "SEQUENCE(a, b)", - "trino": "SEQUENCE(a, b)", - "tsql": "GENERATE_SERIES(a, b)", - "hive": "SEQUENCE(a, b)", - "spark2": "SEQUENCE(a, b)", - "spark": "SEQUENCE(a, b)", - "databricks": "SEQUENCE(a, b)", + "tsql": "SELECT * FROM GENERATE_SERIES(a, b)", }, write={ - "postgres": "GENERATE_SERIES(a, b)", - "presto": "SEQUENCE(a, b)", - "trino": "SEQUENCE(a, b)", - "tsql": "GENERATE_SERIES(a, b)", - "hive": "SEQUENCE(a, b)", - "spark2": "SEQUENCE(a, b)", - "spark": "SEQUENCE(a, b)", - "databricks": "SEQUENCE(a, b)", + "databricks": "SELECT * FROM EXPLODE(SEQUENCE(a, b))", + "hive": "SELECT * FROM EXPLODE(SEQUENCE(a, b))", + "postgres": "SELECT * FROM GENERATE_SERIES(a, b)", + "presto": "SELECT * FROM UNNEST(SEQUENCE(a, b))", + "spark": "SELECT * FROM EXPLODE(SEQUENCE(a, b))", + "spark2": "SELECT * FROM EXPLODE(SEQUENCE(a, b))", + "trino": "SELECT * FROM UNNEST(SEQUENCE(a, b))", + "tsql": "SELECT * FROM GENERATE_SERIES(a, b)", }, ) self.validate_all( diff --git a/tests/dialects/test_presto.py b/tests/dialects/test_presto.py index f89b8e8..950c89f 100644 --- a/tests/dialects/test_presto.py +++ b/tests/dialects/test_presto.py @@ -13,6 +13,13 @@ class TestPresto(Validator): self.validate_identity("CAST(TDIGEST_AGG(1) AS TDIGEST)") self.validate_identity("CAST(x AS HYPERLOGLOG)") + self.validate_all( + "SELECT FROM_ISO8601_TIMESTAMP('2020-05-11T11:15:05')", + write={ + "duckdb": "SELECT CAST('2020-05-11T11:15:05' AS TIMESTAMPTZ)", + "presto": "SELECT FROM_ISO8601_TIMESTAMP('2020-05-11T11:15:05')", + }, + ) self.validate_all( "CAST(x AS INTERVAL YEAR TO MONTH)", write={ diff --git a/tests/dialects/test_redshift.py b/tests/dialects/test_redshift.py index c4e7073..c13b61a 100644 --- a/tests/dialects/test_redshift.py +++ b/tests/dialects/test_redshift.py @@ -6,7 +6,6 @@ class TestRedshift(Validator): dialect = "redshift" def test_redshift(self): - self.validate_identity("1 div", "1 AS div") self.validate_all( "SELECT SPLIT_TO_ARRAY('12,345,6789')", write={ @@ -315,6 +314,7 @@ class TestRedshift(Validator): ) def test_identity(self): + self.validate_identity("1 div", "1 AS div") self.validate_identity("LISTAGG(DISTINCT foo, ', ')") self.validate_identity("CREATE MATERIALIZED VIEW orders AUTO REFRESH YES AS SELECT 1") self.validate_identity("SELECT DATEADD(DAY, 1, 'today')") @@ -337,6 +337,10 @@ class TestRedshift(Validator): self.validate_identity( """SELECT JSON_EXTRACT_PATH_TEXT('{"f2":{"f3":1},"f4":{"f5":99,"f6":"star"}', 'f4', 'f6', TRUE)""" ) + self.validate_identity( + 'DATE_PART(year, "somecol")', + 'EXTRACT(year FROM "somecol")', + ).this.assert_is(exp.Var) self.validate_identity( "SELECT CONCAT('abc', 'def')", "SELECT 'abc' || 'def'", @@ -430,6 +434,14 @@ ORDER BY ) self.validate_identity("SELECT JSON_PARSE('[]')") + self.validate_identity("SELECT ARRAY(1, 2, 3)") + self.validate_identity("SELECT ARRAY[1, 2, 3]") + + self.validate_identity( + """SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')""", + """SELECT CONVERT_TIMEZONE('UTC', 'America/New_York', '2024-08-06 09:10:00.000')""", + ) + def test_values(self): # Test crazy-sized VALUES clause to UNION ALL conversion to ensure we don't get RecursionError values = [str(v) for v in range(0, 10000)] @@ -608,3 +620,9 @@ FROM ( "select a.foo, b.bar, a.baz from a, b where a.baz = b.baz (+)", "SELECT a.foo, b.bar, a.baz FROM a, b WHERE a.baz = b.baz (+)", ) + + def test_time(self): + self.validate_all( + "TIME_TO_STR(a, '%Y-%m-%d %H:%M:%S.%f')", + write={"redshift": "TO_CHAR(a, 'YYYY-MM-DD HH24:MI:SS.US')"}, + ) diff --git a/tests/dialects/test_snowflake.py b/tests/dialects/test_snowflake.py index 3686de5..d01101e 100644 --- a/tests/dialects/test_snowflake.py +++ b/tests/dialects/test_snowflake.py @@ -11,6 +11,7 @@ class TestSnowflake(Validator): dialect = "snowflake" def test_snowflake(self): + self.validate_identity("1 /* /* */") self.validate_identity( "SELECT * FROM table AT (TIMESTAMP => '2024-07-24') UNPIVOT(a FOR b IN (c)) AS pivot_table" ) @@ -59,6 +60,7 @@ WHERE )""", ) + self.validate_identity("exclude := [foo]") self.validate_identity("SELECT CAST([1, 2, 3] AS VECTOR(FLOAT, 3))") self.validate_identity("SELECT CONNECT_BY_ROOT test AS test_column_alias") self.validate_identity("SELECT number").selects[0].assert_is(exp.Column) @@ -113,6 +115,18 @@ WHERE self.validate_identity("ALTER TABLE a SWAP WITH b") self.validate_identity("SELECT MATCH_CONDITION") self.validate_identity("SELECT * REPLACE (CAST(col AS TEXT) AS scol) FROM t") + self.validate_identity( + "SELECT * FROM quarterly_sales PIVOT(SUM(amount) FOR quarter IN ('2023_Q1', '2023_Q2', '2023_Q3', '2023_Q4', '2024_Q1') DEFAULT ON NULL (0)) ORDER BY empid" + ) + self.validate_identity( + "SELECT * FROM quarterly_sales PIVOT(SUM(amount) FOR quarter IN (SELECT DISTINCT quarter FROM ad_campaign_types_by_quarter WHERE television = TRUE ORDER BY quarter)) ORDER BY empid" + ) + self.validate_identity( + "SELECT * FROM quarterly_sales PIVOT(SUM(amount) FOR IN (ANY ORDER BY quarter)) ORDER BY empid" + ) + self.validate_identity( + "SELECT * FROM quarterly_sales PIVOT(SUM(amount) FOR IN (ANY)) ORDER BY empid" + ) self.validate_identity( "MERGE INTO my_db AS ids USING (SELECT new_id FROM my_model WHERE NOT col IS NULL) AS new_ids ON ids.type = new_ids.type AND ids.source = new_ids.source WHEN NOT MATCHED THEN INSERT VALUES (new_ids.new_id)" ) @@ -125,6 +139,18 @@ WHERE self.validate_identity( "SELECT * FROM DATA AS DATA_L ASOF JOIN DATA AS DATA_R MATCH_CONDITION (DATA_L.VAL > DATA_R.VAL) ON DATA_L.ID = DATA_R.ID" ) + self.validate_identity( + "SELECT * FROM s WHERE c NOT IN (1, 2, 3)", + "SELECT * FROM s WHERE NOT c IN (1, 2, 3)", + ) + self.validate_identity( + "SELECT * FROM s WHERE c NOT IN (SELECT * FROM t)", + "SELECT * FROM s WHERE c <> ALL (SELECT * FROM t)", + ) + self.validate_identity( + "SELECT * FROM t1 INNER JOIN t2 USING (t1.col)", + "SELECT * FROM t1 INNER JOIN t2 USING (col)", + ) self.validate_identity( "CURRENT_TIMESTAMP - INTERVAL '1 w' AND (1 = 1)", "CURRENT_TIMESTAMP() - INTERVAL '1 WEEK' AND (1 = 1)", @@ -847,6 +873,33 @@ WHERE }, ) + self.validate_identity( + """SELECT ARRAY_CONSTRUCT('foo')::VARIANT[0]""", + """SELECT CAST(['foo'] AS VARIANT)[0]""", + ) + + self.validate_all( + "SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')", + write={ + "snowflake": "SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')", + "spark": "SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')", + "databricks": "SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')", + "redshift": "SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')", + }, + ) + + self.validate_all( + "SELECT CONVERT_TIMEZONE('America/Los_Angeles', 'America/New_York', '2024-08-06 09:10:00.000')", + write={ + "snowflake": "SELECT CONVERT_TIMEZONE('America/Los_Angeles', 'America/New_York', '2024-08-06 09:10:00.000')", + "spark": "SELECT CONVERT_TIMEZONE('America/Los_Angeles', 'America/New_York', '2024-08-06 09:10:00.000')", + "databricks": "SELECT CONVERT_TIMEZONE('America/Los_Angeles', 'America/New_York', '2024-08-06 09:10:00.000')", + "redshift": "SELECT CONVERT_TIMEZONE('America/Los_Angeles', 'America/New_York', '2024-08-06 09:10:00.000')", + "mysql": "SELECT CONVERT_TZ('2024-08-06 09:10:00.000', 'America/Los_Angeles', 'America/New_York')", + "duckdb": "SELECT CAST('2024-08-06 09:10:00.000' AS TIMESTAMP) AT TIME ZONE 'America/Los_Angeles' AT TIME ZONE 'America/New_York'", + }, + ) + def test_null_treatment(self): self.validate_all( r"SELECT FIRST_VALUE(TABLE1.COLUMN1) OVER (PARTITION BY RANDOM_COLUMN1, RANDOM_COLUMN2 ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS MY_ALIAS FROM TABLE1", @@ -1921,7 +1974,7 @@ STORAGE_ALLOWED_LOCATIONS=('s3://mybucket1/path1/', 's3://mybucket2/path2/')""", def test_swap(self): ast = parse_one("ALTER TABLE a SWAP WITH b", read="snowflake") - assert isinstance(ast, exp.AlterTable) + assert isinstance(ast, exp.Alter) assert isinstance(ast.args["actions"][0], exp.SwapTable) def test_try_cast(self): diff --git a/tests/dialects/test_spark.py b/tests/dialects/test_spark.py index fc3146f..a2d87df 100644 --- a/tests/dialects/test_spark.py +++ b/tests/dialects/test_spark.py @@ -129,6 +129,16 @@ TBLPROPERTIES ( "spark": "ALTER TABLE StudentInfo DROP COLUMNS (LastName, DOB)", }, ) + self.validate_identity("ALTER VIEW StudentInfoView AS SELECT * FROM StudentInfo") + self.validate_identity("ALTER VIEW StudentInfoView AS SELECT LastName FROM StudentInfo") + self.validate_identity("ALTER VIEW StudentInfoView RENAME TO StudentInfoViewRenamed") + self.validate_identity( + "ALTER VIEW StudentInfoView SET TBLPROPERTIES ('key1'='val1', 'key2'='val2')" + ) + self.validate_identity( + "ALTER VIEW StudentInfoView UNSET TBLPROPERTIES ('key1', 'key2')", + check_command_warning=True, + ) def test_to_date(self): self.validate_all( @@ -297,6 +307,13 @@ TBLPROPERTIES ( }, ) + self.validate_all( + "SELECT DATE_FORMAT(DATE '2020-01-01', 'EEEE') AS weekday", + write={ + "presto": "SELECT DATE_FORMAT(CAST(CAST('2020-01-01' AS DATE) AS TIMESTAMP), '%W') AS weekday", + "spark": "SELECT DATE_FORMAT(CAST(CAST('2020-01-01' AS DATE) AS TIMESTAMP), 'EEEE') AS weekday", + }, + ) self.validate_all( "SELECT TRY_ELEMENT_AT(MAP(1, 'a', 2, 'b'), 2)", read={ @@ -557,7 +574,10 @@ TBLPROPERTIES ( ) self.validate_all( - "CAST(x AS TIMESTAMP)", read={"trino": "CAST(x AS TIMESTAMP(6) WITH TIME ZONE)"} + "CAST(x AS TIMESTAMP)", + read={ + "trino": "CAST(x AS TIMESTAMP(6) WITH TIME ZONE)", + }, ) self.validate_all( "SELECT DATE_ADD(my_date_column, 1)", @@ -688,6 +708,7 @@ TBLPROPERTIES ( "trino": "SELECT DATE_ADD('MONTH', 20, col)", }, ) + self.validate_identity("DESCRIBE schema.test PARTITION(ds = '2024-01-01')") def test_bool_or(self): self.validate_all( @@ -805,8 +826,22 @@ TBLPROPERTIES ( self.assertEqual(query.sql(name), without_modifiers) def test_schema_binding_options(self): - for schema_binding in ("BINDING", "COMPENSATION", "TYPE EVOLUTION", "EVOLUTION"): + for schema_binding in ( + "BINDING", + "COMPENSATION", + "TYPE EVOLUTION", + "EVOLUTION", + ): with self.subTest(f"Test roundtrip of VIEW schema binding {schema_binding}"): self.validate_identity( f"CREATE VIEW emp_v WITH SCHEMA {schema_binding} AS SELECT * FROM emp" ) + + def test_minus(self): + self.validate_all( + "SELECT * FROM db.table1 MINUS SELECT * FROM db.table2", + write={ + "spark": "SELECT * FROM db.table1 EXCEPT SELECT * FROM db.table2", + "databricks": "SELECT * FROM db.table1 EXCEPT SELECT * FROM db.table2", + }, + ) diff --git a/tests/dialects/test_teradata.py b/tests/dialects/test_teradata.py index 3945ca3..598cb53 100644 --- a/tests/dialects/test_teradata.py +++ b/tests/dialects/test_teradata.py @@ -1,3 +1,4 @@ +from sqlglot import exp from tests.dialects.test_dialect import Validator @@ -31,6 +32,10 @@ class TestTeradata(Validator): }, ) + self.validate_identity( + "RENAME TABLE emp TO employee", check_command_warning=True + ).assert_is(exp.Command) + def test_translate(self): self.validate_all( "TRANSLATE(x USING LATIN_TO_UNICODE)", diff --git a/tests/dialects/test_trino.py b/tests/dialects/test_trino.py index ccc1407..0ebe749 100644 --- a/tests/dialects/test_trino.py +++ b/tests/dialects/test_trino.py @@ -16,3 +16,35 @@ class TestTrino(Validator): "SELECT TRIM('!foo!', '!')", "SELECT TRIM('!' FROM '!foo!')", ) + + def test_ddl(self): + self.validate_identity("ALTER TABLE users RENAME TO people") + self.validate_identity("ALTER TABLE IF EXISTS users RENAME TO people") + self.validate_identity("ALTER TABLE users ADD COLUMN zip VARCHAR") + self.validate_identity("ALTER TABLE IF EXISTS users ADD COLUMN IF NOT EXISTS zip VARCHAR") + self.validate_identity("ALTER TABLE users DROP COLUMN zip") + self.validate_identity("ALTER TABLE IF EXISTS users DROP COLUMN IF EXISTS zip") + self.validate_identity("ALTER TABLE users RENAME COLUMN id TO user_id") + self.validate_identity("ALTER TABLE IF EXISTS users RENAME COLUMN IF EXISTS id TO user_id") + self.validate_identity("ALTER TABLE users ALTER COLUMN id SET DATA TYPE BIGINT") + self.validate_identity("ALTER TABLE users ALTER COLUMN id DROP NOT NULL") + self.validate_identity( + "ALTER TABLE people SET AUTHORIZATION alice", check_command_warning=True + ) + self.validate_identity( + "ALTER TABLE people SET AUTHORIZATION ROLE PUBLIC", check_command_warning=True + ) + self.validate_identity( + "ALTER TABLE people SET PROPERTIES x = 'y'", check_command_warning=True + ) + self.validate_identity( + "ALTER TABLE people SET PROPERTIES foo = 123, 'foo bar' = 456", + check_command_warning=True, + ) + self.validate_identity( + "ALTER TABLE people SET PROPERTIES x = DEFAULT", check_command_warning=True + ) + self.validate_identity("ALTER VIEW people RENAME TO users") + self.validate_identity( + "ALTER VIEW people SET AUTHORIZATION alice", check_command_warning=True + ) diff --git a/tests/dialects/test_tsql.py b/tests/dialects/test_tsql.py index cfdcb8d..ad18981 100644 --- a/tests/dialects/test_tsql.py +++ b/tests/dialects/test_tsql.py @@ -792,7 +792,7 @@ class TestTSQL(Validator): self.validate_identity(f"CREATE VIEW a.b WITH {view_attr} AS SELECT * FROM x") self.validate_identity("ALTER TABLE dbo.DocExe DROP CONSTRAINT FK_Column_B").assert_is( - exp.AlterTable + exp.Alter ).args["actions"][0].assert_is(exp.Drop) for clustered_keyword in ("CLUSTERED", "NONCLUSTERED"): @@ -822,6 +822,20 @@ class TestTSQL(Validator): self.validate_identity("ALTER TABLE tbl SET DATA_DELETION=ON") self.validate_identity("ALTER TABLE tbl SET DATA_DELETION=OFF") + self.validate_identity("ALTER VIEW v AS SELECT a, b, c, d FROM foo") + self.validate_identity("ALTER VIEW v AS SELECT * FROM foo WHERE c > 100") + self.validate_identity( + "ALTER VIEW v WITH SCHEMABINDING AS SELECT * FROM foo WHERE c > 100", + check_command_warning=True, + ) + self.validate_identity( + "ALTER VIEW v WITH ENCRYPTION AS SELECT * FROM foo WHERE c > 100", + check_command_warning=True, + ) + self.validate_identity( + "ALTER VIEW v WITH VIEW_METADATA AS SELECT * FROM foo WHERE c > 100", + check_command_warning=True, + ) self.validate_identity( "CREATE PROCEDURE foo AS BEGIN DELETE FROM bla WHERE foo < CURRENT_TIMESTAMP - 7 END", "CREATE PROCEDURE foo AS BEGIN DELETE FROM bla WHERE foo < GETDATE() - 7 END", @@ -1513,6 +1527,15 @@ WHERE }, ) + # Check superfluous casts arent added. ref: https://github.com/TobikoData/sqlmesh/issues/2672 + self.validate_all( + "SELECT DATEDIFF(DAY, CAST(a AS DATETIME2), CAST(b AS DATETIME2)) AS x FROM foo", + write={ + "tsql": "SELECT DATEDIFF(DAY, CAST(a AS DATETIME2), CAST(b AS DATETIME2)) AS x FROM foo", + "clickhouse": "SELECT DATE_DIFF(DAY, CAST(a AS Nullable(DateTime)), CAST(b AS Nullable(DateTime))) AS x FROM foo", + }, + ) + def test_lateral_subquery(self): self.validate_all( "SELECT x.a, x.b, t.v, t.y FROM x CROSS APPLY (SELECT v, y FROM t) t(v, y)", @@ -1650,7 +1673,7 @@ WHERE }, write={ "bigquery": "LAST_DAY(CAST(CURRENT_TIMESTAMP() AS DATE))", - "clickhouse": "LAST_DAY(CAST(CURRENT_TIMESTAMP() AS DATE))", + "clickhouse": "LAST_DAY(CAST(CURRENT_TIMESTAMP() AS Nullable(DATE)))", "duckdb": "LAST_DAY(CAST(CURRENT_TIMESTAMP AS DATE))", "mysql": "LAST_DAY(DATE(CURRENT_TIMESTAMP()))", "postgres": "CAST(DATE_TRUNC('MONTH', CAST(CURRENT_TIMESTAMP AS DATE)) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)", @@ -1665,7 +1688,7 @@ WHERE "EOMONTH(GETDATE(), -1)", write={ "bigquery": "LAST_DAY(DATE_ADD(CAST(CURRENT_TIMESTAMP() AS DATE), INTERVAL -1 MONTH))", - "clickhouse": "LAST_DAY(DATE_ADD(MONTH, -1, CAST(CURRENT_TIMESTAMP() AS DATE)))", + "clickhouse": "LAST_DAY(DATE_ADD(MONTH, -1, CAST(CURRENT_TIMESTAMP() AS Nullable(DATE))))", "duckdb": "LAST_DAY(CAST(CURRENT_TIMESTAMP AS DATE) + INTERVAL (-1) MONTH)", "mysql": "LAST_DAY(DATE_ADD(CURRENT_TIMESTAMP(), INTERVAL -1 MONTH))", "postgres": "CAST(DATE_TRUNC('MONTH', CAST(CURRENT_TIMESTAMP AS DATE) + INTERVAL '-1 MONTH') + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)", -- cgit v1.2.3