diff options
Diffstat (limited to 'tests')
27 files changed, 925 insertions, 245 deletions
diff --git a/tests/dialects/test_athena.py b/tests/dialects/test_athena.py index 3288ada..5522976 100644 --- a/tests/dialects/test_athena.py +++ b/tests/dialects/test_athena.py @@ -7,6 +7,15 @@ class TestAthena(Validator): def test_athena(self): self.validate_identity( + "CREATE TABLE IF NOT EXISTS t (name STRING) LOCATION 's3://bucket/tmp/mytable/' TBLPROPERTIES ('table_type'='iceberg', 'FORMAT'='parquet')" + ) + self.validate_identity( + "UNLOAD (SELECT name1, address1, comment1, key1 FROM table1) " + "TO 's3://amzn-s3-demo-bucket/ partitioned/' " + "WITH (format = 'TEXTFILE', partitioned_by = ARRAY['key1'])", + check_command_warning=True, + ) + self.validate_identity( """USING EXTERNAL FUNCTION some_function(input VARBINARY) RETURNS VARCHAR LAMBDA 'some-name' @@ -14,7 +23,3 @@ class TestAthena(Validator): some_function(1)""", check_command_warning=True, ) - - self.validate_identity( - "CREATE TABLE IF NOT EXISTS t (name STRING) LOCATION 's3://bucket/tmp/mytable/' TBLPROPERTIES ('table_type'='iceberg', 'FORMAT'='parquet')" - ) diff --git a/tests/dialects/test_bigquery.py b/tests/dialects/test_bigquery.py index 45f3856..8c9f093 100644 --- a/tests/dialects/test_bigquery.py +++ b/tests/dialects/test_bigquery.py @@ -1,4 +1,6 @@ from unittest import mock +import datetime +import pytz from sqlglot import ( ErrorLevel, @@ -103,6 +105,7 @@ LANGUAGE js AS select_with_quoted_udf = self.validate_identity("SELECT `p.d.UdF`(data) FROM `p.d.t`") self.assertEqual(select_with_quoted_udf.selects[0].name, "p.d.UdF") + self.validate_identity("SELECT * FROM READ_CSV('bla.csv')") self.validate_identity("CAST(x AS STRUCT<list ARRAY<INT64>>)") self.validate_identity("assert.true(1 = 1)") self.validate_identity("SELECT ARRAY_TO_STRING(list, '--') AS text") @@ -446,7 +449,7 @@ LANGUAGE js AS write={ "bigquery": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE), MONTH)", "duckdb": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))", - "clickhouse": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))", + "clickhouse": "SELECT LAST_DAY(CAST('2008-11-25' AS Nullable(DATE)))", "mysql": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))", "oracle": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))", "postgres": "SELECT CAST(DATE_TRUNC('MONTH', CAST('2008-11-25' AS DATE)) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)", @@ -511,6 +514,20 @@ LANGUAGE js AS }, ) self.validate_all( + "SELECT FORMAT_DATETIME('%Y%m%d %H:%M:%S', DATETIME '2023-12-25 15:30:00')", + write={ + "bigquery": "SELECT FORMAT_DATETIME('%Y%m%d %H:%M:%S', CAST('2023-12-25 15:30:00' AS DATETIME))", + "duckdb": "SELECT STRFTIME(CAST('2023-12-25 15:30:00' AS TIMESTAMP), '%Y%m%d %H:%M:%S')", + }, + ) + self.validate_all( + "SELECT FORMAT_DATETIME('%x', '2023-12-25 15:30:00')", + write={ + "bigquery": "SELECT FORMAT_DATETIME('%x', '2023-12-25 15:30:00')", + "duckdb": "SELECT STRFTIME(CAST('2023-12-25 15:30:00' AS TIMESTAMP), '%x')", + }, + ) + self.validate_all( "SELECT COUNTIF(x)", read={ "clickhouse": "SELECT countIf(x)", @@ -636,6 +653,7 @@ LANGUAGE js AS write={ "bigquery": "SELECT DATETIME_TRUNC('2023-01-01T01:01:01', HOUR)", "databricks": "SELECT DATE_TRUNC('HOUR', '2023-01-01T01:01:01')", + "duckdb": "SELECT DATE_TRUNC('HOUR', CAST('2023-01-01T01:01:01' AS DATETIME))", }, ), ) @@ -1209,10 +1227,9 @@ LANGUAGE js AS "SELECT * FROM a WHERE b IN UNNEST([1, 2, 3])", write={ "bigquery": "SELECT * FROM a WHERE b IN UNNEST([1, 2, 3])", - "mysql": "SELECT * FROM a WHERE b IN (SELECT UNNEST(ARRAY(1, 2, 3)))", "presto": "SELECT * FROM a WHERE b IN (SELECT UNNEST(ARRAY[1, 2, 3]))", - "hive": "SELECT * FROM a WHERE b IN (SELECT UNNEST(ARRAY(1, 2, 3)))", - "spark": "SELECT * FROM a WHERE b IN (SELECT UNNEST(ARRAY(1, 2, 3)))", + "hive": "SELECT * FROM a WHERE b IN (SELECT EXPLODE(ARRAY(1, 2, 3)))", + "spark": "SELECT * FROM a WHERE b IN (SELECT EXPLODE(ARRAY(1, 2, 3)))", }, ) self.validate_all( @@ -1257,6 +1274,13 @@ LANGUAGE js AS }, ) self.validate_all( + "DATE_DIFF('2021-01-01', '2020-01-01', DAY)", + write={ + "bigquery": "DATE_DIFF('2021-01-01', '2020-01-01', DAY)", + "duckdb": "DATE_DIFF('DAY', CAST('2020-01-01' AS DATE), CAST('2021-01-01' AS DATE))", + }, + ) + self.validate_all( "CURRENT_DATE('UTC')", write={ "mysql": "CURRENT_DATE AT TIME ZONE 'UTC'", @@ -1402,6 +1426,57 @@ WHERE "": "SELECT LENGTH(foo)", }, ) + self.validate_all( + "SELECT TIME_DIFF('12:00:00', '12:30:00', MINUTE)", + write={ + "duckdb": "SELECT DATE_DIFF('MINUTE', CAST('12:30:00' AS TIME), CAST('12:00:00' AS TIME))", + "bigquery": "SELECT TIME_DIFF('12:00:00', '12:30:00', MINUTE)", + }, + ) + self.validate_all( + "ARRAY_CONCAT([1, 2], [3, 4], [5, 6])", + write={ + "bigquery": "ARRAY_CONCAT([1, 2], [3, 4], [5, 6])", + "duckdb": "ARRAY_CONCAT([1, 2], ARRAY_CONCAT([3, 4], [5, 6]))", + "postgres": "ARRAY_CAT(ARRAY[1, 2], ARRAY_CAT(ARRAY[3, 4], ARRAY[5, 6]))", + "redshift": "ARRAY_CONCAT(ARRAY(1, 2), ARRAY_CONCAT(ARRAY(3, 4), ARRAY(5, 6)))", + "snowflake": "ARRAY_CAT([1, 2], ARRAY_CAT([3, 4], [5, 6]))", + "hive": "CONCAT(ARRAY(1, 2), ARRAY(3, 4), ARRAY(5, 6))", + "spark2": "CONCAT(ARRAY(1, 2), ARRAY(3, 4), ARRAY(5, 6))", + "spark": "CONCAT(ARRAY(1, 2), ARRAY(3, 4), ARRAY(5, 6))", + "databricks": "CONCAT(ARRAY(1, 2), ARRAY(3, 4), ARRAY(5, 6))", + "presto": "CONCAT(ARRAY[1, 2], ARRAY[3, 4], ARRAY[5, 6])", + "trino": "CONCAT(ARRAY[1, 2], ARRAY[3, 4], ARRAY[5, 6])", + }, + ) + self.validate_all( + "SELECT GENERATE_DATE_ARRAY('2016-10-05', '2016-10-08')", + write={ + "duckdb": "SELECT CAST(GENERATE_SERIES(CAST('2016-10-05' AS DATE), CAST('2016-10-08' AS DATE), INTERVAL 1 DAY) AS DATE[])", + "bigquery": "SELECT GENERATE_DATE_ARRAY('2016-10-05', '2016-10-08', INTERVAL 1 DAY)", + }, + ) + self.validate_all( + "SELECT GENERATE_DATE_ARRAY('2016-10-05', '2016-10-08', INTERVAL '1' MONTH)", + write={ + "duckdb": "SELECT CAST(GENERATE_SERIES(CAST('2016-10-05' AS DATE), CAST('2016-10-08' AS DATE), INTERVAL '1' MONTH) AS DATE[])", + "bigquery": "SELECT GENERATE_DATE_ARRAY('2016-10-05', '2016-10-08', INTERVAL '1' MONTH)", + }, + ) + self.validate_all( + "SELECT GENERATE_TIMESTAMP_ARRAY('2016-10-05 00:00:00', '2016-10-07 00:00:00', INTERVAL '1' DAY)", + write={ + "duckdb": "SELECT GENERATE_SERIES(CAST('2016-10-05 00:00:00' AS TIMESTAMP), CAST('2016-10-07 00:00:00' AS TIMESTAMP), INTERVAL '1' DAY)", + "bigquery": "SELECT GENERATE_TIMESTAMP_ARRAY('2016-10-05 00:00:00', '2016-10-07 00:00:00', INTERVAL '1' DAY)", + }, + ) + self.validate_all( + "SELECT PARSE_DATE('%A %b %e %Y', 'Thursday Dec 25 2008')", + write={ + "bigquery": "SELECT PARSE_DATE('%A %b %e %Y', 'Thursday Dec 25 2008')", + "duckdb": "SELECT CAST(STRPTIME('Thursday Dec 25 2008', '%A %b %-d %Y') AS DATE)", + }, + ) def test_errors(self): with self.assertRaises(TokenError): @@ -1794,14 +1869,14 @@ OPTIONS ( "SELECT * FROM UNNEST(ARRAY<STRUCT<x INT64>>[])", write={ "bigquery": "SELECT * FROM UNNEST(CAST([] AS ARRAY<STRUCT<x INT64>>))", - "duckdb": "SELECT * FROM UNNEST(CAST([] AS STRUCT(x BIGINT)[]))", + "duckdb": "SELECT * FROM (SELECT UNNEST(CAST([] AS STRUCT(x BIGINT)[]), max_depth => 2))", }, ) self.validate_all( "SELECT * FROM UNNEST(ARRAY<STRUCT<device_id INT64, time DATETIME, signal INT64, state STRING>>[STRUCT(1, DATETIME '2023-11-01 09:34:01', 74, 'INACTIVE'),STRUCT(4, DATETIME '2023-11-01 09:38:01', 80, 'ACTIVE')])", write={ "bigquery": "SELECT * FROM UNNEST(CAST([STRUCT(1, CAST('2023-11-01 09:34:01' AS DATETIME), 74, 'INACTIVE'), STRUCT(4, CAST('2023-11-01 09:38:01' AS DATETIME), 80, 'ACTIVE')] AS ARRAY<STRUCT<device_id INT64, time DATETIME, signal INT64, state STRING>>))", - "duckdb": "SELECT * FROM UNNEST(CAST([ROW(1, CAST('2023-11-01 09:34:01' AS TIMESTAMP), 74, 'INACTIVE'), ROW(4, CAST('2023-11-01 09:38:01' AS TIMESTAMP), 80, 'ACTIVE')] AS STRUCT(device_id BIGINT, time TIMESTAMP, signal BIGINT, state TEXT)[]))", + "duckdb": "SELECT * FROM (SELECT UNNEST(CAST([ROW(1, CAST('2023-11-01 09:34:01' AS TIMESTAMP), 74, 'INACTIVE'), ROW(4, CAST('2023-11-01 09:38:01' AS TIMESTAMP), 80, 'ACTIVE')] AS STRUCT(device_id BIGINT, time TIMESTAMP, signal BIGINT, state TEXT)[]), max_depth => 2))", }, ) self.validate_all( @@ -1811,3 +1886,51 @@ OPTIONS ( "duckdb": "SELECT CAST(ROW(1, ROW('c_str')) AS STRUCT(a BIGINT, b STRUCT(c TEXT)))", }, ) + + def test_convert(self): + for value, expected in [ + (datetime.datetime(2023, 1, 1), "CAST('2023-01-01 00:00:00' AS DATETIME)"), + (datetime.datetime(2023, 1, 1, 12, 13, 14), "CAST('2023-01-01 12:13:14' AS DATETIME)"), + ( + datetime.datetime(2023, 1, 1, 12, 13, 14, tzinfo=datetime.timezone.utc), + "CAST('2023-01-01 12:13:14+00:00' AS TIMESTAMP)", + ), + ( + pytz.timezone("America/Los_Angeles").localize( + datetime.datetime(2023, 1, 1, 12, 13, 14) + ), + "CAST('2023-01-01 12:13:14-08:00' AS TIMESTAMP)", + ), + ]: + with self.subTest(value): + self.assertEqual(exp.convert(value).sql(dialect=self.dialect), expected) + + def test_unnest(self): + self.validate_all( + "SELECT name, laps FROM UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps), STRUCT('Makhloufi' AS name, [24.5, 25.4, 26.6, 26.1] AS laps)])", + write={ + "bigquery": "SELECT name, laps FROM UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps), STRUCT('Makhloufi' AS name, [24.5, 25.4, 26.6, 26.1] AS laps)])", + "duckdb": "SELECT name, laps FROM (SELECT UNNEST([{'name': 'Rudisha', 'laps': [23.4, 26.3, 26.4, 26.1]}, {'name': 'Makhloufi', 'laps': [24.5, 25.4, 26.6, 26.1]}], max_depth => 2))", + }, + ) + self.validate_all( + "WITH Races AS (SELECT '800M' AS race) SELECT race, name, laps FROM Races AS r CROSS JOIN UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)])", + write={ + "bigquery": "WITH Races AS (SELECT '800M' AS race) SELECT race, name, laps FROM Races AS r CROSS JOIN UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)])", + "duckdb": "WITH Races AS (SELECT '800M' AS race) SELECT race, name, laps FROM Races AS r CROSS JOIN (SELECT UNNEST([{'name': 'Rudisha', 'laps': [23.4, 26.3, 26.4, 26.1]}], max_depth => 2))", + }, + ) + self.validate_all( + "SELECT participant FROM UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)]) AS participant", + write={ + "bigquery": "SELECT participant FROM UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)]) AS participant", + "duckdb": "SELECT participant FROM (SELECT UNNEST([{'name': 'Rudisha', 'laps': [23.4, 26.3, 26.4, 26.1]}], max_depth => 2)) AS participant", + }, + ) + self.validate_all( + "WITH Races AS (SELECT '800M' AS race) SELECT race, participant FROM Races AS r CROSS JOIN UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)]) AS participant", + write={ + "bigquery": "WITH Races AS (SELECT '800M' AS race) SELECT race, participant FROM Races AS r CROSS JOIN UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)]) AS participant", + "duckdb": "WITH Races AS (SELECT '800M' AS race) SELECT race, participant FROM Races AS r CROSS JOIN (SELECT UNNEST([{'name': 'Rudisha', 'laps': [23.4, 26.3, 26.4, 26.1]}], max_depth => 2)) AS participant", + }, + ) diff --git a/tests/dialects/test_clickhouse.py b/tests/dialects/test_clickhouse.py index 2356ad0..53cd0a9 100644 --- a/tests/dialects/test_clickhouse.py +++ b/tests/dialects/test_clickhouse.py @@ -1,4 +1,7 @@ +from datetime import date from sqlglot import exp, parse_one +from sqlglot.dialects import ClickHouse +from sqlglot.expressions import convert from tests.dialects.test_dialect import Validator from sqlglot.errors import ErrorLevel @@ -7,27 +10,27 @@ class TestClickhouse(Validator): dialect = "clickhouse" def test_clickhouse(self): - self.validate_identity("SELECT toFloat(like)") - self.validate_identity("SELECT like") + for string_type_enum in ClickHouse.Generator.STRING_TYPE_MAPPING: + self.validate_identity(f"CAST(x AS {string_type_enum.value})", "CAST(x AS String)") - string_types = [ - "BLOB", - "LONGBLOB", - "LONGTEXT", - "MEDIUMBLOB", - "MEDIUMTEXT", - "TINYBLOB", - "TINYTEXT", - "VARCHAR(255)", - ] + # Arrays, maps and tuples can't be Nullable in ClickHouse + for non_nullable_type in ("ARRAY<INT>", "MAP<INT, INT>", "STRUCT(a: INT)"): + try_cast = parse_one(f"TRY_CAST(x AS {non_nullable_type})") + target_type = try_cast.to.sql("clickhouse") + self.assertEqual(try_cast.sql("clickhouse"), f"CAST(x AS {target_type})") - for string_type in string_types: - self.validate_identity(f"CAST(x AS {string_type})", "CAST(x AS String)") + for nullable_type in ("INT", "UINT", "BIGINT", "FLOAT", "DOUBLE", "TEXT", "DATE", "UUID"): + try_cast = parse_one(f"TRY_CAST(x AS {nullable_type})") + target_type = exp.DataType.build(nullable_type, dialect="clickhouse").sql("clickhouse") + self.assertEqual(try_cast.sql("clickhouse"), f"CAST(x AS Nullable({target_type}))") expr = parse_one("count(x)") self.assertEqual(expr.sql(dialect="clickhouse"), "COUNT(x)") self.assertIsNone(expr._meta) + self.validate_identity("@macro").assert_is(exp.Parameter).this.assert_is(exp.Var) + self.validate_identity("SELECT toFloat(like)") + self.validate_identity("SELECT like") self.validate_identity("SELECT STR_TO_DATE(str, fmt, tz)") self.validate_identity("SELECT STR_TO_DATE('05 12 2000', '%d %m %Y')") self.validate_identity("SELECT EXTRACT(YEAR FROM toDateTime('2023-02-01'))") @@ -40,7 +43,7 @@ class TestClickhouse(Validator): self.validate_identity("SELECT * FROM (SELECT a FROM b SAMPLE 0.01)") self.validate_identity("SELECT * FROM (SELECT a FROM b SAMPLE 1 / 10 OFFSET 1 / 2)") self.validate_identity("SELECT sum(foo * bar) FROM bla SAMPLE 10000000") - self.validate_identity("CAST(x AS Nested(ID UInt32, Serial UInt32, EventTime DATETIME))") + self.validate_identity("CAST(x AS Nested(ID UInt32, Serial UInt32, EventTime DateTime))") self.validate_identity("CAST(x AS Enum('hello' = 1, 'world' = 2))") self.validate_identity("CAST(x AS Enum('hello', 'world'))") self.validate_identity("CAST(x AS Enum('hello' = 1, 'world'))") @@ -79,7 +82,8 @@ class TestClickhouse(Validator): self.validate_identity("SELECT * FROM foo WHERE x GLOBAL IN (SELECT * FROM bar)") self.validate_identity("position(haystack, needle)") self.validate_identity("position(haystack, needle, position)") - self.validate_identity("CAST(x AS DATETIME)") + self.validate_identity("CAST(x AS DATETIME)", "CAST(x AS DateTime)") + self.validate_identity("CAST(x AS TIMESTAMPTZ)", "CAST(x AS DateTime)") self.validate_identity("CAST(x as MEDIUMINT)", "CAST(x AS Int32)") self.validate_identity("SELECT arrayJoin([1, 2, 3] AS src) AS dst, 'Hello', src") self.validate_identity("""SELECT JSONExtractString('{"x": {"y": 1}}', 'x', 'y')""") @@ -207,11 +211,16 @@ class TestClickhouse(Validator): }, ) self.validate_all( - "SELECT CAST('2020-01-01' AS TIMESTAMP) + INTERVAL '500' MICROSECOND", + "SELECT CAST('2020-01-01' AS Nullable(DateTime)) + INTERVAL '500' MICROSECOND", read={ "duckdb": "SELECT TIMESTAMP '2020-01-01' + INTERVAL '500 us'", "postgres": "SELECT TIMESTAMP '2020-01-01' + INTERVAL '500 us'", }, + write={ + "clickhouse": "SELECT CAST('2020-01-01' AS Nullable(DateTime)) + INTERVAL '500' MICROSECOND", + "duckdb": "SELECT CAST('2020-01-01' AS DATETIME) + INTERVAL '500' MICROSECOND", + "postgres": "SELECT CAST('2020-01-01' AS TIMESTAMP) + INTERVAL '500 MICROSECOND'", + }, ) self.validate_all( "SELECT CURRENT_DATE()", @@ -471,6 +480,47 @@ class TestClickhouse(Validator): parse_one("Tuple(select Int64)", into=exp.DataType, read="clickhouse"), exp.DataType ) + self.validate_identity("INSERT INTO t (col1, col2) VALUES ('abcd', 1234)") + self.validate_all( + "INSERT INTO t (col1, col2) VALUES ('abcd', 1234)", + read={ + # looks like values table function, but should be parsed as VALUES block + "clickhouse": "INSERT INTO t (col1, col2) values('abcd', 1234)" + }, + write={ + "clickhouse": "INSERT INTO t (col1, col2) VALUES ('abcd', 1234)", + "postgres": "INSERT INTO t (col1, col2) VALUES ('abcd', 1234)", + }, + ) + + def test_clickhouse_values(self): + values = exp.select("*").from_( + exp.values([exp.tuple_(1, 2, 3)], alias="subq", columns=["a", "b", "c"]) + ) + self.assertEqual( + values.sql("clickhouse"), + "SELECT * FROM (SELECT 1 AS a, 2 AS b, 3 AS c) AS subq", + ) + + self.validate_identity("INSERT INTO t (col1, col2) VALUES ('abcd', 1234)") + self.validate_identity( + "INSERT INTO t (col1, col2) FORMAT Values('abcd', 1234)", + "INSERT INTO t (col1, col2) VALUES ('abcd', 1234)", + ) + + self.validate_all( + "SELECT col FROM (SELECT 1 AS col) AS _t", + read={ + "duckdb": "SELECT col FROM (VALUES (1)) AS _t(col)", + }, + ) + self.validate_all( + "SELECT col1, col2 FROM (SELECT 1 AS col1, 2 AS col2 UNION ALL SELECT 3, 4) AS _t", + read={ + "duckdb": "SELECT col1, col2 FROM (VALUES (1, 2), (3, 4)) AS _t(col1, col2)", + }, + ) + def test_cte(self): self.validate_identity("WITH 'x' AS foo SELECT foo") self.validate_identity("WITH ['c'] AS field_names SELECT field_names") @@ -531,7 +581,7 @@ class TestClickhouse(Validator): self.validate_all( "SELECT {abc: UInt32}, {b: String}, {c: DateTime},{d: Map(String, Array(UInt8))}, {e: Tuple(UInt8, String)}", write={ - "clickhouse": "SELECT {abc: UInt32}, {b: String}, {c: DATETIME}, {d: Map(String, Array(UInt8))}, {e: Tuple(UInt8, String)}", + "clickhouse": "SELECT {abc: UInt32}, {b: String}, {c: DateTime}, {d: Map(String, Array(UInt8))}, {e: Tuple(UInt8, String)}", "": "SELECT :abc, :b, :c, :d, :e", }, ) @@ -562,15 +612,78 @@ class TestClickhouse(Validator): ) def test_ddl(self): + db_table_expr = exp.Table(this=None, db=exp.to_identifier("foo"), catalog=None) + create_with_cluster = exp.Create( + this=db_table_expr, + kind="DATABASE", + properties=exp.Properties(expressions=[exp.OnCluster(this=exp.to_identifier("c"))]), + ) + self.assertEqual(create_with_cluster.sql("clickhouse"), "CREATE DATABASE foo ON CLUSTER c") + + ctas_with_comment = exp.Create( + this=exp.table_("foo"), + kind="TABLE", + expression=exp.select("*").from_("db.other_table"), + properties=exp.Properties( + expressions=[ + exp.EngineProperty(this=exp.var("Memory")), + exp.SchemaCommentProperty(this=exp.Literal.string("foo")), + ], + ), + ) + self.assertEqual( + ctas_with_comment.sql("clickhouse"), + "CREATE TABLE foo ENGINE=Memory AS (SELECT * FROM db.other_table) COMMENT 'foo'", + ) + + self.validate_identity("""CREATE TABLE ip_data (ip4 IPv4, ip6 IPv6) ENGINE=TinyLog()""") + self.validate_identity("""CREATE TABLE dates (dt1 Date32) ENGINE=TinyLog()""") + self.validate_identity("CREATE TABLE named_tuples (a Tuple(select String, i Int64))") + self.validate_identity("""CREATE TABLE t (a String) EMPTY AS SELECT * FROM dummy""") + self.validate_identity( + "CREATE TABLE t1 (a String EPHEMERAL, b String EPHEMERAL func(), c String MATERIALIZED func(), d String ALIAS func()) ENGINE=TinyLog()" + ) + self.validate_identity( + "CREATE TABLE t (a String, b String, c UInt64, PROJECTION p1 (SELECT a, sum(c) GROUP BY a, b), PROJECTION p2 (SELECT b, sum(c) GROUP BY b)) ENGINE=MergeTree()" + ) + self.validate_identity( + """CREATE TABLE xyz (ts DateTime, data String) ENGINE=MergeTree() ORDER BY ts SETTINGS index_granularity = 8192 COMMENT '{"key": "value"}'""" + ) + self.validate_identity( + "INSERT INTO FUNCTION s3('a', 'b', 'c', 'd', 'e') PARTITION BY CONCAT(s1, s2, s3, s4) SETTINGS set1 = 1, set2 = '2' SELECT * FROM some_table SETTINGS foo = 3" + ) self.validate_identity( 'CREATE TABLE data5 ("x" UInt32, "y" UInt32) ENGINE=MergeTree ORDER BY (round(y / 1000000000), cityHash64(x)) SAMPLE BY cityHash64(x)' ) self.validate_identity( "CREATE TABLE foo (x UInt32) TTL time_column + INTERVAL '1' MONTH DELETE WHERE column = 'value'" ) - self.validate_identity("CREATE TABLE named_tuples (a Tuple(select String, i Int64))") + self.validate_identity( + "CREATE TABLE a ENGINE=Memory AS SELECT 1 AS c COMMENT 'foo'", + "CREATE TABLE a ENGINE=Memory AS (SELECT 1 AS c) COMMENT 'foo'", + ) self.validate_all( + "CREATE DATABASE x", + read={ + "duckdb": "CREATE SCHEMA x", + }, + write={ + "clickhouse": "CREATE DATABASE x", + "duckdb": "CREATE SCHEMA x", + }, + ) + self.validate_all( + "DROP DATABASE x", + read={ + "duckdb": "DROP SCHEMA x", + }, + write={ + "clickhouse": "DROP DATABASE x", + "duckdb": "DROP SCHEMA x", + }, + ) + self.validate_all( """ CREATE TABLE example1 ( timestamp DateTime, @@ -583,7 +696,7 @@ class TestClickhouse(Validator): """, write={ "clickhouse": """CREATE TABLE example1 ( - timestamp DATETIME, + timestamp DateTime, x UInt32 TTL now() + INTERVAL '1' MONTH, y String TTL timestamp + INTERVAL '1' DAY, z String @@ -661,7 +774,7 @@ SETTINGS """, write={ "clickhouse": """CREATE TABLE example_table ( - d DATETIME, + d DateTime, a Int32 ) ENGINE=MergeTree @@ -688,7 +801,7 @@ TTL """, write={ "clickhouse": """CREATE TABLE table_with_where ( - d DATETIME, + d DateTime, a Int32 ) ENGINE=MergeTree @@ -716,7 +829,7 @@ WHERE """, write={ "clickhouse": """CREATE TABLE table_for_recompression ( - d DATETIME, + d DateTime, key UInt64, value String ) @@ -748,7 +861,7 @@ SETTINGS """, write={ "clickhouse": """CREATE TABLE table_for_aggregation ( - d DATETIME, + d DateTime, k1 Int32, k2 Int32, x Int32, @@ -855,8 +968,6 @@ LIFETIME(MIN 0 MAX 0)""", }, pretty=True, ) - self.validate_identity("""CREATE TABLE ip_data (ip4 IPv4, ip6 IPv6) ENGINE=TinyLog()""") - self.validate_identity("""CREATE TABLE dates (dt1 Date32) ENGINE=TinyLog()""") self.validate_all( """ CREATE TABLE t ( @@ -873,12 +984,6 @@ LIFETIME(MIN 0 MAX 0)""", }, pretty=True, ) - self.validate_identity( - "CREATE TABLE t1 (a String EPHEMERAL, b String EPHEMERAL func(), c String MATERIALIZED func(), d String ALIAS func()) ENGINE=TinyLog()" - ) - self.validate_identity( - "CREATE TABLE t (a String, b String, c UInt64, PROJECTION p1 (SELECT a, sum(c) GROUP BY a, b), PROJECTION p2 (SELECT b, sum(c) GROUP BY b)) ENGINE=MergeTree()" - ) def test_agg_functions(self): def extract_agg_func(query): @@ -934,3 +1039,8 @@ LIFETIME(MIN 0 MAX 0)""", f"SELECT {func_alias}(SECOND, 1, bar)", f"SELECT {func_name}(SECOND, 1, bar)", ) + + def test_convert(self): + self.assertEqual( + convert(date(2020, 1, 1)).sql(dialect=self.dialect), "toDate('2020-01-01')" + ) diff --git a/tests/dialects/test_databricks.py b/tests/dialects/test_databricks.py index 90499fb..a46794b 100644 --- a/tests/dialects/test_databricks.py +++ b/tests/dialects/test_databricks.py @@ -256,3 +256,11 @@ class TestDatabricks(Validator): "databricks": "WITH x AS (SELECT 1) SELECT * FROM x", }, ) + + def test_streaming_tables(self): + self.validate_identity( + "CREATE STREAMING TABLE raw_data AS SELECT * FROM STREAM READ_FILES('abfss://container@storageAccount.dfs.core.windows.net/base/path')" + ) + self.validate_identity( + "CREATE OR REFRESH STREAMING TABLE csv_data (id INT, ts TIMESTAMP, event STRING) AS SELECT * FROM STREAM READ_FILES('s3://bucket/path', format => 'csv', schema => 'id int, ts timestamp, event string')" + ) diff --git a/tests/dialects/test_dialect.py b/tests/dialects/test_dialect.py index 522c42c..8f3c183 100644 --- a/tests/dialects/test_dialect.py +++ b/tests/dialects/test_dialect.py @@ -160,7 +160,7 @@ class TestDialect(Validator): "CAST(a AS TEXT)", write={ "bigquery": "CAST(a AS STRING)", - "clickhouse": "CAST(a AS String)", + "clickhouse": "CAST(a AS Nullable(String))", "drill": "CAST(a AS VARCHAR)", "duckdb": "CAST(a AS TEXT)", "materialize": "CAST(a AS TEXT)", @@ -181,7 +181,7 @@ class TestDialect(Validator): "CAST(a AS BINARY(4))", write={ "bigquery": "CAST(a AS BYTES)", - "clickhouse": "CAST(a AS BINARY(4))", + "clickhouse": "CAST(a AS Nullable(BINARY(4)))", "drill": "CAST(a AS VARBINARY(4))", "duckdb": "CAST(a AS BLOB(4))", "materialize": "CAST(a AS BYTEA(4))", @@ -201,7 +201,7 @@ class TestDialect(Validator): "CAST(a AS VARBINARY(4))", write={ "bigquery": "CAST(a AS BYTES)", - "clickhouse": "CAST(a AS String)", + "clickhouse": "CAST(a AS Nullable(String))", "duckdb": "CAST(a AS BLOB(4))", "materialize": "CAST(a AS BYTEA(4))", "mysql": "CAST(a AS VARBINARY(4))", @@ -219,19 +219,19 @@ class TestDialect(Validator): self.validate_all( "CAST(MAP('a', '1') AS MAP(TEXT, TEXT))", write={ - "clickhouse": "CAST(map('a', '1') AS Map(String, String))", + "clickhouse": "CAST(map('a', '1') AS Map(String, Nullable(String)))", }, ) self.validate_all( "CAST(ARRAY(1, 2) AS ARRAY<TINYINT>)", write={ - "clickhouse": "CAST([1, 2] AS Array(Int8))", + "clickhouse": "CAST([1, 2] AS Array(Nullable(Int8)))", }, ) self.validate_all( - "CAST((1, 2) AS STRUCT<a: TINYINT, b: SMALLINT, c: INT, d: BIGINT>)", + "CAST((1, 2, 3, 4) AS STRUCT<a: TINYINT, b: SMALLINT, c: INT, d: BIGINT>)", write={ - "clickhouse": "CAST((1, 2) AS Tuple(a Int8, b Int16, c Int32, d Int64))", + "clickhouse": "CAST((1, 2, 3, 4) AS Tuple(a Nullable(Int8), b Nullable(Int16), c Nullable(Int32), d Nullable(Int64)))", }, ) self.validate_all( @@ -328,19 +328,9 @@ class TestDialect(Validator): "redshift": "CAST(a AS DOUBLE PRECISION)", }, write={ - "duckdb": "CAST(a AS DOUBLE)", - "drill": "CAST(a AS DOUBLE)", - "postgres": "CAST(a AS DOUBLE PRECISION)", - "redshift": "CAST(a AS DOUBLE PRECISION)", - "doris": "CAST(a AS DOUBLE)", - }, - ) - - self.validate_all( - "CAST(a AS DOUBLE)", - write={ "bigquery": "CAST(a AS FLOAT64)", - "clickhouse": "CAST(a AS Float64)", + "clickhouse": "CAST(a AS Nullable(Float64))", + "doris": "CAST(a AS DOUBLE)", "drill": "CAST(a AS DOUBLE)", "duckdb": "CAST(a AS DOUBLE)", "materialize": "CAST(a AS DOUBLE PRECISION)", @@ -592,7 +582,7 @@ class TestDialect(Validator): "hive": "CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, 'yyyy-MM-ddTHH:mm:ss')) AS TIMESTAMP)", "presto": "DATE_PARSE(x, '%Y-%m-%dT%T')", "drill": "TO_TIMESTAMP(x, 'yyyy-MM-dd''T''HH:mm:ss')", - "redshift": "TO_TIMESTAMP(x, 'YYYY-MM-DDTHH:MI:SS')", + "redshift": "TO_TIMESTAMP(x, 'YYYY-MM-DDTHH24:MI:SS')", "spark": "TO_TIMESTAMP(x, 'yyyy-MM-ddTHH:mm:ss')", }, ) @@ -647,8 +637,18 @@ class TestDialect(Validator): self.validate_all( "TIME_STR_TO_TIME('2020-01-01')", write={ - "drill": "CAST('2020-01-01' AS TIMESTAMP)", + "bigquery": "CAST('2020-01-01' AS DATETIME)", + "databricks": "CAST('2020-01-01' AS TIMESTAMP)", "duckdb": "CAST('2020-01-01' AS TIMESTAMP)", + "tsql": "CAST('2020-01-01' AS DATETIME2)", + "mysql": "CAST('2020-01-01' AS DATETIME)", + "postgres": "CAST('2020-01-01' AS TIMESTAMP)", + "redshift": "CAST('2020-01-01' AS TIMESTAMP)", + "snowflake": "CAST('2020-01-01' AS TIMESTAMP)", + "spark": "CAST('2020-01-01' AS TIMESTAMP)", + "trino": "CAST('2020-01-01' AS TIMESTAMP)", + "clickhouse": "CAST('2020-01-01' AS Nullable(DateTime))", + "drill": "CAST('2020-01-01' AS TIMESTAMP)", "hive": "CAST('2020-01-01' AS TIMESTAMP)", "presto": "CAST('2020-01-01' AS TIMESTAMP)", "sqlite": "'2020-01-01'", @@ -656,6 +656,48 @@ class TestDialect(Validator): }, ) self.validate_all( + "TIME_STR_TO_TIME('2020-01-01 12:13:14-08:00', 'America/Los_Angeles')", + write={ + "bigquery": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)", + "databricks": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)", + "duckdb": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMPTZ)", + "tsql": "CAST('2020-01-01 12:13:14-08:00' AS DATETIMEOFFSET) AT TIME ZONE 'UTC'", + "mysql": "CAST('2020-01-01 12:13:14-08:00' AS DATETIME)", + "postgres": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMPTZ)", + "redshift": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP WITH TIME ZONE)", + "snowflake": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMPTZ)", + "spark": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)", + "trino": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP WITH TIME ZONE)", + "clickhouse": "CAST('2020-01-01 12:13:14' AS Nullable(DateTime('America/Los_Angeles')))", + "drill": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)", + "hive": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)", + "presto": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP WITH TIME ZONE)", + "sqlite": "'2020-01-01 12:13:14-08:00'", + "doris": "CAST('2020-01-01 12:13:14-08:00' AS DATETIME)", + }, + ) + self.validate_all( + "TIME_STR_TO_TIME(col, 'America/Los_Angeles')", + write={ + "bigquery": "CAST(col AS TIMESTAMP)", + "databricks": "CAST(col AS TIMESTAMP)", + "duckdb": "CAST(col AS TIMESTAMPTZ)", + "tsql": "CAST(col AS DATETIMEOFFSET) AT TIME ZONE 'UTC'", + "mysql": "CAST(col AS DATETIME)", + "postgres": "CAST(col AS TIMESTAMPTZ)", + "redshift": "CAST(col AS TIMESTAMP WITH TIME ZONE)", + "snowflake": "CAST(col AS TIMESTAMPTZ)", + "spark": "CAST(col AS TIMESTAMP)", + "trino": "CAST(col AS TIMESTAMP WITH TIME ZONE)", + "clickhouse": "CAST(col AS Nullable(DateTime('America/Los_Angeles')))", + "drill": "CAST(col AS TIMESTAMP)", + "hive": "CAST(col AS TIMESTAMP)", + "presto": "CAST(col AS TIMESTAMP WITH TIME ZONE)", + "sqlite": "col", + "doris": "CAST(col AS DATETIME)", + }, + ) + self.validate_all( "TIME_STR_TO_UNIX('2020-01-01')", write={ "duckdb": "EPOCH(CAST('2020-01-01' AS TIMESTAMP))", @@ -2115,6 +2157,17 @@ SELECT }, ) + # needs to preserve the target alias in then WHEN condition but not in the THEN clause + self.validate_all( + """MERGE INTO foo AS target USING (SELECT a, b FROM tbl) AS src ON src.a = target.a + WHEN MATCHED AND target.a <> src.a THEN UPDATE SET target.b = 'FOO' + WHEN NOT MATCHED THEN INSERT (target.a, target.b) VALUES (src.a, src.b)""", + write={ + "trino": """MERGE INTO foo AS target USING (SELECT a, b FROM tbl) AS src ON src.a = target.a WHEN MATCHED AND target.a <> src.a THEN UPDATE SET b = 'FOO' WHEN NOT MATCHED THEN INSERT (a, b) VALUES (src.a, src.b)""", + "postgres": """MERGE INTO foo AS target USING (SELECT a, b FROM tbl) AS src ON src.a = target.a WHEN MATCHED AND target.a <> src.a THEN UPDATE SET b = 'FOO' WHEN NOT MATCHED THEN INSERT (a, b) VALUES (src.a, src.b)""", + }, + ) + def test_substring(self): self.validate_all( "SUBSTR('123456', 2, 3)", @@ -2603,3 +2656,46 @@ FROM subquery2""", "trino": f"SELECT {pad_func}('bar', 5, ' ')", }, ) + + def test_generate_date_array(self): + self.validate_all( + "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))", + write={ + "bigquery": "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), INTERVAL '1' WEEK))", + "databricks": "SELECT * FROM EXPLODE(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), INTERVAL '1' WEEK))", + "duckdb": "SELECT * FROM UNNEST(CAST(GENERATE_SERIES(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (7 * INTERVAL '1' DAY)) AS DATE[]))", + "mysql": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) FROM _generated_dates WHERE CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates", + "postgres": "SELECT * FROM (SELECT CAST(value AS DATE) FROM GENERATE_SERIES(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), INTERVAL '1 WEEK') AS value) AS _unnested_generate_series", + "presto": "SELECT * FROM UNNEST(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (1 * INTERVAL '7' DAY)))", + "redshift": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates", + "snowflake": "SELECT * FROM (SELECT DATEADD(WEEK, CAST(value AS INT), CAST('2020-01-01' AS DATE)) AS value FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(WEEK, CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE)) + 1 - 1) + 1))) AS _u(seq, key, path, index, value, this))", + "spark": "SELECT * FROM EXPLODE(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), INTERVAL '1' WEEK))", + "trino": "SELECT * FROM UNNEST(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (1 * INTERVAL '7' DAY)))", + "tsql": "WITH _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_value AS date_value FROM _generated_dates) AS _generated_dates", + }, + ) + self.validate_all( + "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates", + write={ + "mysql": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) FROM _generated_dates WHERE CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) <= CAST('2020-02-01' AS DATE)), dates AS (SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates) SELECT * FROM dates", + "redshift": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)), dates AS (SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates) SELECT * FROM dates", + "tsql": "WITH _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)), dates AS (SELECT * FROM (SELECT date_value AS date_value FROM _generated_dates) AS _generated_dates) SELECT * FROM dates", + }, + ) + self.validate_all( + "WITH dates1 AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))), dates2 AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-03-01', INTERVAL 1 MONTH))) SELECT * FROM dates1 CROSS JOIN dates2", + write={ + "mysql": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) FROM _generated_dates WHERE CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) <= CAST('2020-02-01' AS DATE)), _generated_dates_1(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATE_ADD(date_value, INTERVAL 1 MONTH) AS DATE) FROM _generated_dates_1 WHERE CAST(DATE_ADD(date_value, INTERVAL 1 MONTH) AS DATE) <= CAST('2020-03-01' AS DATE)), dates1 AS (SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates), dates2 AS (SELECT * FROM (SELECT date_value FROM _generated_dates_1) AS _generated_dates_1) SELECT * FROM dates1 CROSS JOIN dates2", + "redshift": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)), _generated_dates_1(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(MONTH, 1, date_value) AS DATE) FROM _generated_dates_1 WHERE CAST(DATEADD(MONTH, 1, date_value) AS DATE) <= CAST('2020-03-01' AS DATE)), dates1 AS (SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates), dates2 AS (SELECT * FROM (SELECT date_value FROM _generated_dates_1) AS _generated_dates_1) SELECT * FROM dates1 CROSS JOIN dates2", + "tsql": "WITH _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)), _generated_dates_1(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(MONTH, 1, date_value) AS DATE) FROM _generated_dates_1 WHERE CAST(DATEADD(MONTH, 1, date_value) AS DATE) <= CAST('2020-03-01' AS DATE)), dates1 AS (SELECT * FROM (SELECT date_value AS date_value FROM _generated_dates) AS _generated_dates), dates2 AS (SELECT * FROM (SELECT date_value AS date_value FROM _generated_dates_1) AS _generated_dates_1) SELECT * FROM dates1 CROSS JOIN dates2", + }, + ) + self.validate_all( + "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK)) AS _q(date_week)", + write={ + "mysql": "WITH RECURSIVE _generated_dates(date_week) AS (SELECT CAST('2020-01-01' AS DATE) AS date_week UNION ALL SELECT CAST(DATE_ADD(date_week, INTERVAL 1 WEEK) AS DATE) FROM _generated_dates WHERE CAST(DATE_ADD(date_week, INTERVAL 1 WEEK) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_week FROM _generated_dates) AS _generated_dates", + "redshift": "WITH RECURSIVE _generated_dates(date_week) AS (SELECT CAST('2020-01-01' AS DATE) AS date_week UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_week) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_week) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_week FROM _generated_dates) AS _generated_dates", + "snowflake": "SELECT * FROM (SELECT DATEADD(WEEK, CAST(date_week AS INT), CAST('2020-01-01' AS DATE)) AS date_week FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(WEEK, CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE)) + 1 - 1) + 1))) AS _q(seq, key, path, index, date_week, this)) AS _q(date_week)", + "tsql": "WITH _generated_dates(date_week) AS (SELECT CAST('2020-01-01' AS DATE) AS date_week UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_week) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_week) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_week AS date_week FROM _generated_dates) AS _generated_dates", + }, + ) diff --git a/tests/dialects/test_duckdb.py b/tests/dialects/test_duckdb.py index 15af086..3fb9930 100644 --- a/tests/dialects/test_duckdb.py +++ b/tests/dialects/test_duckdb.py @@ -8,8 +8,6 @@ class TestDuckDB(Validator): dialect = "duckdb" def test_duckdb(self): - self.validate_identity("x::int[3]", "CAST(x AS INT[3])") - with self.assertRaises(ParseError): parse_one("1 //", read="duckdb") @@ -35,31 +33,6 @@ class TestDuckDB(Validator): }, ) self.validate_all( - "SELECT CAST('2020-01-01 12:05:01' AS TIMESTAMP)", - read={ - "duckdb": "SELECT CAST('2020-01-01 12:05:01' AS TIMESTAMP)", - "snowflake": "SELECT CAST('2020-01-01 12:05:01' AS TIMESTAMPNTZ)", - }, - ) - self.validate_all( - "SELECT CAST('2020-01-01' AS DATE) + INTERVAL (day_offset) DAY FROM t", - read={ - "duckdb": "SELECT CAST('2020-01-01' AS DATE) + INTERVAL (day_offset) DAY FROM t", - "mysql": "SELECT DATE '2020-01-01' + INTERVAL day_offset DAY FROM t", - }, - ) - self.validate_all( - "SELECT CAST('09:05:03' AS TIME) + INTERVAL 2 HOUR", - read={ - "bigquery": "SELECT TIME_ADD(CAST('09:05:03' AS TIME), INTERVAL 2 HOUR)", - "snowflake": "SELECT TIMEADD(HOUR, 2, TO_TIME('09:05:03'))", - }, - write={ - "duckdb": "SELECT CAST('09:05:03' AS TIME) + INTERVAL '2' HOUR", - "snowflake": "SELECT CAST('09:05:03' AS TIME) + INTERVAL '2 HOUR'", - }, - ) - self.validate_all( 'STRUCT_PACK("a b" := 1)', write={ "duckdb": "{'a b': 1}", @@ -112,7 +85,9 @@ class TestDuckDB(Validator): self.validate_all( "CREATE TEMPORARY FUNCTION f1(a, b) AS (a + b)", - read={"bigquery": "CREATE TEMP FUNCTION f1(a INT64, b INT64) AS (a + b)"}, + read={ + "bigquery": "CREATE TEMP FUNCTION f1(a INT64, b INT64) AS (a + b)", + }, ) self.validate_identity("SELECT 1 WHERE x > $1") self.validate_identity("SELECT 1 WHERE x > $name") @@ -128,13 +103,17 @@ class TestDuckDB(Validator): ) self.validate_all( - "{'a': 1, 'b': '2'}", write={"presto": "CAST(ROW(1, '2') AS ROW(a INTEGER, b VARCHAR))"} + "{'a': 1, 'b': '2'}", + write={ + "presto": "CAST(ROW(1, '2') AS ROW(a INTEGER, b VARCHAR))", + }, ) self.validate_all( "struct_pack(a := 1, b := 2)", - write={"presto": "CAST(ROW(1, 2) AS ROW(a INTEGER, b INTEGER))"}, + write={ + "presto": "CAST(ROW(1, 2) AS ROW(a INTEGER, b INTEGER))", + }, ) - self.validate_all( "struct_pack(a := 1, b := x)", write={ @@ -818,6 +797,9 @@ class TestDuckDB(Validator): ) self.validate_identity("SELECT LENGTH(foo)") + self.validate_identity("SELECT ARRAY[1, 2, 3]", "SELECT [1, 2, 3]") + + self.validate_identity("SELECT * FROM (DESCRIBE t)") def test_array_index(self): with self.assertLogs(helper_logger) as cm: @@ -909,12 +891,12 @@ class TestDuckDB(Validator): "EPOCH_MS(x)", write={ "bigquery": "TIMESTAMP_MILLIS(x)", + "clickhouse": "fromUnixTimestamp64Milli(CAST(x AS Nullable(Int64)))", "duckdb": "EPOCH_MS(x)", + "mysql": "FROM_UNIXTIME(x / POWER(10, 3))", + "postgres": "TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / 10 ^ 3)", "presto": "FROM_UNIXTIME(CAST(x AS DOUBLE) / POW(10, 3))", "spark": "TIMESTAMP_MILLIS(x)", - "clickhouse": "fromUnixTimestamp64Milli(CAST(x AS Int64))", - "postgres": "TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / 10 ^ 3)", - "mysql": "FROM_UNIXTIME(x / POWER(10, 3))", }, ) self.validate_all( @@ -958,7 +940,7 @@ class TestDuckDB(Validator): self.validate_all( "STRPTIME(x, '%-m/%-d/%y %-I:%M %p')", write={ - "bigquery": "PARSE_TIMESTAMP('%-m/%-d/%y %-I:%M %p', x)", + "bigquery": "PARSE_TIMESTAMP('%-m/%e/%y %-I:%M %p', x)", "duckdb": "STRPTIME(x, '%-m/%-d/%y %-I:%M %p')", "presto": "DATE_PARSE(x, '%c/%e/%y %l:%i %p')", "hive": "CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, 'M/d/yy h:mm a')) AS TIMESTAMP)", @@ -1023,6 +1005,7 @@ class TestDuckDB(Validator): self.validate_identity("ARRAY((SELECT id FROM t))") def test_cast(self): + self.validate_identity("x::int[3]", "CAST(x AS INT[3])") self.validate_identity("CAST(x AS REAL)") self.validate_identity("CAST(x AS UINTEGER)") self.validate_identity("CAST(x AS UBIGINT)") @@ -1076,6 +1059,39 @@ class TestDuckDB(Validator): "STRUCT_PACK(a := 'b')::STRUCT(a TEXT)", "CAST(ROW('b') AS STRUCT(a TEXT))", ) + + self.validate_all( + "CAST(x AS TIME)", + read={ + "duckdb": "CAST(x AS TIME)", + "presto": "CAST(x AS TIME(6))", + }, + ) + self.validate_all( + "SELECT CAST('2020-01-01 12:05:01' AS TIMESTAMP)", + read={ + "duckdb": "SELECT CAST('2020-01-01 12:05:01' AS TIMESTAMP)", + "snowflake": "SELECT CAST('2020-01-01 12:05:01' AS TIMESTAMPNTZ)", + }, + ) + self.validate_all( + "SELECT CAST('2020-01-01' AS DATE) + INTERVAL (day_offset) DAY FROM t", + read={ + "duckdb": "SELECT CAST('2020-01-01' AS DATE) + INTERVAL (day_offset) DAY FROM t", + "mysql": "SELECT DATE '2020-01-01' + INTERVAL day_offset DAY FROM t", + }, + ) + self.validate_all( + "SELECT CAST('09:05:03' AS TIME) + INTERVAL 2 HOUR", + read={ + "bigquery": "SELECT TIME_ADD(CAST('09:05:03' AS TIME), INTERVAL 2 HOUR)", + "snowflake": "SELECT TIMEADD(HOUR, 2, TO_TIME('09:05:03'))", + }, + write={ + "duckdb": "SELECT CAST('09:05:03' AS TIME) + INTERVAL '2' HOUR", + "snowflake": "SELECT CAST('09:05:03' AS TIME) + INTERVAL '2 HOUR'", + }, + ) self.validate_all( "CAST(x AS VARCHAR(5))", write={ @@ -1156,6 +1172,12 @@ class TestDuckDB(Validator): }, ) + self.validate_identity("SELECT x::INT[3][3]", "SELECT CAST(x AS INT[3][3])") + self.validate_identity( + """SELECT ARRAY[[[1]]]::INT[1][1][1]""", + """SELECT CAST([[[1]]] AS INT[1][1][1])""", + ) + def test_encode_decode(self): self.validate_all( "ENCODE(x)", diff --git a/tests/dialects/test_hive.py b/tests/dialects/test_hive.py index 65ea0e5..c2768bf 100644 --- a/tests/dialects/test_hive.py +++ b/tests/dialects/test_hive.py @@ -171,6 +171,16 @@ class TestHive(Validator): self.validate_identity( """CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY<DATE>) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')""" ) + self.validate_identity("ALTER VIEW v1 AS SELECT x, UPPER(s) AS s FROM t2") + self.validate_identity("ALTER VIEW v1 (c1, c2) AS SELECT x, UPPER(s) AS s FROM t2") + self.validate_identity( + "ALTER VIEW v7 (c1 COMMENT 'Comment for c1', c2) AS SELECT t1.c1, t1.c2 FROM t1" + ) + self.validate_identity("ALTER VIEW db1.v1 RENAME TO db2.v2") + self.validate_identity("ALTER VIEW v1 SET TBLPROPERTIES ('tblp1'='1', 'tblp2'='2')") + self.validate_identity( + "ALTER VIEW v1 UNSET TBLPROPERTIES ('tblp1', 'tblp2')", check_command_warning=True + ) def test_lateral_view(self): self.validate_all( diff --git a/tests/dialects/test_mysql.py b/tests/dialects/test_mysql.py index 65b4c4a..c598cb5 100644 --- a/tests/dialects/test_mysql.py +++ b/tests/dialects/test_mysql.py @@ -24,6 +24,18 @@ class TestMySQL(Validator): self.validate_identity("ALTER TABLE t ADD INDEX `i` (`c`)") self.validate_identity("ALTER TABLE t ADD UNIQUE `i` (`c`)") self.validate_identity("ALTER TABLE test_table MODIFY COLUMN test_column LONGTEXT") + self.validate_identity("ALTER VIEW v AS SELECT a, b, c, d FROM foo") + self.validate_identity("ALTER VIEW v AS SELECT * FROM foo WHERE c > 100") + self.validate_identity( + "ALTER ALGORITHM = MERGE VIEW v AS SELECT * FROM foo", check_command_warning=True + ) + self.validate_identity( + "ALTER DEFINER = 'admin'@'localhost' VIEW v AS SELECT * FROM foo", + check_command_warning=True, + ) + self.validate_identity( + "ALTER SQL SECURITY = DEFINER VIEW v AS SELECT * FROM foo", check_command_warning=True + ) self.validate_identity( "INSERT INTO things (a, b) VALUES (1, 2) AS new_data ON DUPLICATE KEY UPDATE id = LAST_INSERT_ID(id), a = new_data.a, b = new_data.b" ) diff --git a/tests/dialects/test_oracle.py b/tests/dialects/test_oracle.py index 1d9fd99..4813a49 100644 --- a/tests/dialects/test_oracle.py +++ b/tests/dialects/test_oracle.py @@ -6,6 +6,7 @@ class TestOracle(Validator): dialect = "oracle" def test_oracle(self): + self.validate_identity("1 /* /* */") self.validate_all( "SELECT CONNECT_BY_ROOT x y", write={ @@ -13,8 +14,9 @@ class TestOracle(Validator): "oracle": "SELECT CONNECT_BY_ROOT x AS y", }, ) - self.parse_one("ALTER TABLE tbl_name DROP FOREIGN KEY fk_symbol").assert_is(exp.AlterTable) + self.parse_one("ALTER TABLE tbl_name DROP FOREIGN KEY fk_symbol").assert_is(exp.Alter) + self.validate_identity("SYSDATE") self.validate_identity("CREATE GLOBAL TEMPORARY TABLE t AS SELECT * FROM orders") self.validate_identity("CREATE PRIVATE TEMPORARY TABLE t AS SELECT * FROM orders") self.validate_identity("REGEXP_REPLACE('source', 'search')") @@ -44,6 +46,9 @@ class TestOracle(Validator): self.validate_identity("SELECT * FROM V$SESSION") self.validate_identity("SELECT TO_DATE('January 15, 1989, 11:00 A.M.')") self.validate_identity( + "SELECT * FROM test UNPIVOT INCLUDE NULLS (value FOR Description IN (col AS 'PREFIX ' || CHR(38) || ' SUFFIX'))" + ) + self.validate_identity( "SELECT last_name, employee_id, manager_id, LEVEL FROM employees START WITH employee_id = 100 CONNECT BY PRIOR employee_id = manager_id ORDER SIBLINGS BY last_name" ) self.validate_identity( @@ -73,10 +78,6 @@ class TestOracle(Validator): "SELECT JSON_OBJECTAGG(department_name: department_id) FROM dep WHERE id <= 30", ) self.validate_identity( - "SYSDATE", - "CURRENT_TIMESTAMP", - ) - self.validate_identity( "SELECT last_name, department_id, salary, MIN(salary) KEEP (DENSE_RANK FIRST ORDER BY commission_pct) " 'OVER (PARTITION BY department_id) AS "Worst", MAX(salary) KEEP (DENSE_RANK LAST ORDER BY commission_pct) ' 'OVER (PARTITION BY department_id) AS "Best" FROM employees ORDER BY department_id, salary, last_name' @@ -88,7 +89,6 @@ class TestOracle(Validator): self.validate_identity( "SELECT * FROM T ORDER BY I OFFSET NVL(:variable1, 10) ROWS FETCH NEXT NVL(:variable2, 10) ROWS ONLY", ) - self.validate_identity("NVL(x, y)").assert_is(exp.Anonymous) self.validate_identity( "SELECT * FROM t SAMPLE (.25)", "SELECT * FROM t SAMPLE (0.25)", @@ -99,6 +99,16 @@ class TestOracle(Validator): ) self.validate_all( + "SELECT * FROM test WHERE MOD(col1, 4) = 3", + read={ + "duckdb": "SELECT * FROM test WHERE col1 % 4 = 3", + }, + write={ + "duckdb": "SELECT * FROM test WHERE col1 % 4 = 3", + "oracle": "SELECT * FROM test WHERE MOD(col1, 4) = 3", + }, + ) + self.validate_all( "CURRENT_TIMESTAMP BETWEEN TO_DATE(f.C_SDATE, 'yyyy/mm/dd') AND TO_DATE(f.C_EDATE, 'yyyy/mm/dd')", read={ "postgres": "CURRENT_TIMESTAMP BETWEEN TO_DATE(f.C_SDATE, 'yyyy/mm/dd') AND TO_DATE(f.C_EDATE, 'yyyy/mm/dd')", @@ -242,6 +252,15 @@ class TestOracle(Validator): """SELECT * FROM t ORDER BY a ASC, b ASC NULLS FIRST, c DESC NULLS LAST, d DESC""", ) + self.validate_all( + "NVL(NULL, 1)", + write={ + "oracle": "NVL(NULL, 1)", + "": "COALESCE(NULL, 1)", + "clickhouse": "COALESCE(NULL, 1)", + }, + ) + def test_join_marker(self): self.validate_identity("SELECT e1.x, e2.x FROM e e1, e e2 WHERE e1.y (+) = e2.y") diff --git a/tests/dialects/test_postgres.py b/tests/dialects/test_postgres.py index 5df5e31..075c274 100644 --- a/tests/dialects/test_postgres.py +++ b/tests/dialects/test_postgres.py @@ -28,7 +28,7 @@ class TestPostgres(Validator): alter_table_only = """ALTER TABLE ONLY "Album" ADD CONSTRAINT "FK_AlbumArtistId" FOREIGN KEY ("ArtistId") REFERENCES "Artist" ("ArtistId") ON DELETE NO ACTION ON UPDATE NO ACTION""" expr = self.parse_one(alter_table_only) - self.assertIsInstance(expr, exp.AlterTable) + self.assertIsInstance(expr, exp.Alter) self.assertEqual(expr.sql(dialect="postgres"), alter_table_only) self.validate_identity("STRING_TO_ARRAY('xx~^~yy~^~zz', '~^~', 'yy')") @@ -549,46 +549,53 @@ class TestPostgres(Validator): }, ) self.validate_all( + "SELECT GENERATE_SERIES(1, 5)", + write={ + "bigquery": UnsupportedError, + "postgres": "SELECT GENERATE_SERIES(1, 5)", + }, + ) + self.validate_all( + "WITH dates AS (SELECT GENERATE_SERIES('2020-01-01'::DATE, '2024-01-01'::DATE, '1 day'::INTERVAL) AS date), date_table AS (SELECT DISTINCT DATE_TRUNC('MONTH', date) AS date FROM dates) SELECT * FROM date_table", + write={ + "duckdb": "WITH dates AS (SELECT UNNEST(GENERATE_SERIES(CAST('2020-01-01' AS DATE), CAST('2024-01-01' AS DATE), CAST('1 day' AS INTERVAL))) AS date), date_table AS (SELECT DISTINCT DATE_TRUNC('MONTH', date) AS date FROM dates) SELECT * FROM date_table", + "postgres": "WITH dates AS (SELECT GENERATE_SERIES(CAST('2020-01-01' AS DATE), CAST('2024-01-01' AS DATE), CAST('1 day' AS INTERVAL)) AS date), date_table AS (SELECT DISTINCT DATE_TRUNC('MONTH', date) AS date FROM dates) SELECT * FROM date_table", + }, + ) + self.validate_all( "GENERATE_SERIES(a, b, ' 2 days ')", write={ "postgres": "GENERATE_SERIES(a, b, INTERVAL '2 DAYS')", - "presto": "SEQUENCE(a, b, INTERVAL '2' DAY)", - "trino": "SEQUENCE(a, b, INTERVAL '2' DAY)", + "presto": "UNNEST(SEQUENCE(a, b, INTERVAL '2' DAY))", + "trino": "UNNEST(SEQUENCE(a, b, INTERVAL '2' DAY))", }, ) self.validate_all( "GENERATE_SERIES('2019-01-01'::TIMESTAMP, NOW(), '1day')", write={ + "databricks": "EXPLODE(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY))", + "hive": "EXPLODE(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY))", "postgres": "GENERATE_SERIES(CAST('2019-01-01' AS TIMESTAMP), CURRENT_TIMESTAMP, INTERVAL '1 DAY')", - "presto": "SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' DAY)", - "trino": "SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' DAY)", - "hive": "SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY)", - "spark2": "SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY)", - "spark": "SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY)", - "databricks": "SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY)", + "presto": "UNNEST(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' DAY))", + "spark": "EXPLODE(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY))", + "spark2": "EXPLODE(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY))", + "trino": "UNNEST(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' DAY))", }, ) self.validate_all( - "GENERATE_SERIES(a, b)", + "SELECT * FROM GENERATE_SERIES(a, b)", read={ - "postgres": "GENERATE_SERIES(a, b)", - "presto": "SEQUENCE(a, b)", - "trino": "SEQUENCE(a, b)", - "tsql": "GENERATE_SERIES(a, b)", - "hive": "SEQUENCE(a, b)", - "spark2": "SEQUENCE(a, b)", - "spark": "SEQUENCE(a, b)", - "databricks": "SEQUENCE(a, b)", + "tsql": "SELECT * FROM GENERATE_SERIES(a, b)", }, write={ - "postgres": "GENERATE_SERIES(a, b)", - "presto": "SEQUENCE(a, b)", - "trino": "SEQUENCE(a, b)", - "tsql": "GENERATE_SERIES(a, b)", - "hive": "SEQUENCE(a, b)", - "spark2": "SEQUENCE(a, b)", - "spark": "SEQUENCE(a, b)", - "databricks": "SEQUENCE(a, b)", + "databricks": "SELECT * FROM EXPLODE(SEQUENCE(a, b))", + "hive": "SELECT * FROM EXPLODE(SEQUENCE(a, b))", + "postgres": "SELECT * FROM GENERATE_SERIES(a, b)", + "presto": "SELECT * FROM UNNEST(SEQUENCE(a, b))", + "spark": "SELECT * FROM EXPLODE(SEQUENCE(a, b))", + "spark2": "SELECT * FROM EXPLODE(SEQUENCE(a, b))", + "trino": "SELECT * FROM UNNEST(SEQUENCE(a, b))", + "tsql": "SELECT * FROM GENERATE_SERIES(a, b)", }, ) self.validate_all( diff --git a/tests/dialects/test_presto.py b/tests/dialects/test_presto.py index f89b8e8..950c89f 100644 --- a/tests/dialects/test_presto.py +++ b/tests/dialects/test_presto.py @@ -14,6 +14,13 @@ class TestPresto(Validator): self.validate_identity("CAST(x AS HYPERLOGLOG)") self.validate_all( + "SELECT FROM_ISO8601_TIMESTAMP('2020-05-11T11:15:05')", + write={ + "duckdb": "SELECT CAST('2020-05-11T11:15:05' AS TIMESTAMPTZ)", + "presto": "SELECT FROM_ISO8601_TIMESTAMP('2020-05-11T11:15:05')", + }, + ) + self.validate_all( "CAST(x AS INTERVAL YEAR TO MONTH)", write={ "oracle": "CAST(x AS INTERVAL YEAR TO MONTH)", diff --git a/tests/dialects/test_redshift.py b/tests/dialects/test_redshift.py index c4e7073..c13b61a 100644 --- a/tests/dialects/test_redshift.py +++ b/tests/dialects/test_redshift.py @@ -6,7 +6,6 @@ class TestRedshift(Validator): dialect = "redshift" def test_redshift(self): - self.validate_identity("1 div", "1 AS div") self.validate_all( "SELECT SPLIT_TO_ARRAY('12,345,6789')", write={ @@ -315,6 +314,7 @@ class TestRedshift(Validator): ) def test_identity(self): + self.validate_identity("1 div", "1 AS div") self.validate_identity("LISTAGG(DISTINCT foo, ', ')") self.validate_identity("CREATE MATERIALIZED VIEW orders AUTO REFRESH YES AS SELECT 1") self.validate_identity("SELECT DATEADD(DAY, 1, 'today')") @@ -338,6 +338,10 @@ class TestRedshift(Validator): """SELECT JSON_EXTRACT_PATH_TEXT('{"f2":{"f3":1},"f4":{"f5":99,"f6":"star"}', 'f4', 'f6', TRUE)""" ) self.validate_identity( + 'DATE_PART(year, "somecol")', + 'EXTRACT(year FROM "somecol")', + ).this.assert_is(exp.Var) + self.validate_identity( "SELECT CONCAT('abc', 'def')", "SELECT 'abc' || 'def'", ) @@ -430,6 +434,14 @@ ORDER BY ) self.validate_identity("SELECT JSON_PARSE('[]')") + self.validate_identity("SELECT ARRAY(1, 2, 3)") + self.validate_identity("SELECT ARRAY[1, 2, 3]") + + self.validate_identity( + """SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')""", + """SELECT CONVERT_TIMEZONE('UTC', 'America/New_York', '2024-08-06 09:10:00.000')""", + ) + def test_values(self): # Test crazy-sized VALUES clause to UNION ALL conversion to ensure we don't get RecursionError values = [str(v) for v in range(0, 10000)] @@ -608,3 +620,9 @@ FROM ( "select a.foo, b.bar, a.baz from a, b where a.baz = b.baz (+)", "SELECT a.foo, b.bar, a.baz FROM a, b WHERE a.baz = b.baz (+)", ) + + def test_time(self): + self.validate_all( + "TIME_TO_STR(a, '%Y-%m-%d %H:%M:%S.%f')", + write={"redshift": "TO_CHAR(a, 'YYYY-MM-DD HH24:MI:SS.US')"}, + ) diff --git a/tests/dialects/test_snowflake.py b/tests/dialects/test_snowflake.py index 3686de5..d01101e 100644 --- a/tests/dialects/test_snowflake.py +++ b/tests/dialects/test_snowflake.py @@ -11,6 +11,7 @@ class TestSnowflake(Validator): dialect = "snowflake" def test_snowflake(self): + self.validate_identity("1 /* /* */") self.validate_identity( "SELECT * FROM table AT (TIMESTAMP => '2024-07-24') UNPIVOT(a FOR b IN (c)) AS pivot_table" ) @@ -59,6 +60,7 @@ WHERE )""", ) + self.validate_identity("exclude := [foo]") self.validate_identity("SELECT CAST([1, 2, 3] AS VECTOR(FLOAT, 3))") self.validate_identity("SELECT CONNECT_BY_ROOT test AS test_column_alias") self.validate_identity("SELECT number").selects[0].assert_is(exp.Column) @@ -114,6 +116,18 @@ WHERE self.validate_identity("SELECT MATCH_CONDITION") self.validate_identity("SELECT * REPLACE (CAST(col AS TEXT) AS scol) FROM t") self.validate_identity( + "SELECT * FROM quarterly_sales PIVOT(SUM(amount) FOR quarter IN ('2023_Q1', '2023_Q2', '2023_Q3', '2023_Q4', '2024_Q1') DEFAULT ON NULL (0)) ORDER BY empid" + ) + self.validate_identity( + "SELECT * FROM quarterly_sales PIVOT(SUM(amount) FOR quarter IN (SELECT DISTINCT quarter FROM ad_campaign_types_by_quarter WHERE television = TRUE ORDER BY quarter)) ORDER BY empid" + ) + self.validate_identity( + "SELECT * FROM quarterly_sales PIVOT(SUM(amount) FOR IN (ANY ORDER BY quarter)) ORDER BY empid" + ) + self.validate_identity( + "SELECT * FROM quarterly_sales PIVOT(SUM(amount) FOR IN (ANY)) ORDER BY empid" + ) + self.validate_identity( "MERGE INTO my_db AS ids USING (SELECT new_id FROM my_model WHERE NOT col IS NULL) AS new_ids ON ids.type = new_ids.type AND ids.source = new_ids.source WHEN NOT MATCHED THEN INSERT VALUES (new_ids.new_id)" ) self.validate_identity( @@ -126,6 +140,18 @@ WHERE "SELECT * FROM DATA AS DATA_L ASOF JOIN DATA AS DATA_R MATCH_CONDITION (DATA_L.VAL > DATA_R.VAL) ON DATA_L.ID = DATA_R.ID" ) self.validate_identity( + "SELECT * FROM s WHERE c NOT IN (1, 2, 3)", + "SELECT * FROM s WHERE NOT c IN (1, 2, 3)", + ) + self.validate_identity( + "SELECT * FROM s WHERE c NOT IN (SELECT * FROM t)", + "SELECT * FROM s WHERE c <> ALL (SELECT * FROM t)", + ) + self.validate_identity( + "SELECT * FROM t1 INNER JOIN t2 USING (t1.col)", + "SELECT * FROM t1 INNER JOIN t2 USING (col)", + ) + self.validate_identity( "CURRENT_TIMESTAMP - INTERVAL '1 w' AND (1 = 1)", "CURRENT_TIMESTAMP() - INTERVAL '1 WEEK' AND (1 = 1)", ) @@ -847,6 +873,33 @@ WHERE }, ) + self.validate_identity( + """SELECT ARRAY_CONSTRUCT('foo')::VARIANT[0]""", + """SELECT CAST(['foo'] AS VARIANT)[0]""", + ) + + self.validate_all( + "SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')", + write={ + "snowflake": "SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')", + "spark": "SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')", + "databricks": "SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')", + "redshift": "SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')", + }, + ) + + self.validate_all( + "SELECT CONVERT_TIMEZONE('America/Los_Angeles', 'America/New_York', '2024-08-06 09:10:00.000')", + write={ + "snowflake": "SELECT CONVERT_TIMEZONE('America/Los_Angeles', 'America/New_York', '2024-08-06 09:10:00.000')", + "spark": "SELECT CONVERT_TIMEZONE('America/Los_Angeles', 'America/New_York', '2024-08-06 09:10:00.000')", + "databricks": "SELECT CONVERT_TIMEZONE('America/Los_Angeles', 'America/New_York', '2024-08-06 09:10:00.000')", + "redshift": "SELECT CONVERT_TIMEZONE('America/Los_Angeles', 'America/New_York', '2024-08-06 09:10:00.000')", + "mysql": "SELECT CONVERT_TZ('2024-08-06 09:10:00.000', 'America/Los_Angeles', 'America/New_York')", + "duckdb": "SELECT CAST('2024-08-06 09:10:00.000' AS TIMESTAMP) AT TIME ZONE 'America/Los_Angeles' AT TIME ZONE 'America/New_York'", + }, + ) + def test_null_treatment(self): self.validate_all( r"SELECT FIRST_VALUE(TABLE1.COLUMN1) OVER (PARTITION BY RANDOM_COLUMN1, RANDOM_COLUMN2 ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS MY_ALIAS FROM TABLE1", @@ -1921,7 +1974,7 @@ STORAGE_ALLOWED_LOCATIONS=('s3://mybucket1/path1/', 's3://mybucket2/path2/')""", def test_swap(self): ast = parse_one("ALTER TABLE a SWAP WITH b", read="snowflake") - assert isinstance(ast, exp.AlterTable) + assert isinstance(ast, exp.Alter) assert isinstance(ast.args["actions"][0], exp.SwapTable) def test_try_cast(self): diff --git a/tests/dialects/test_spark.py b/tests/dialects/test_spark.py index fc3146f..a2d87df 100644 --- a/tests/dialects/test_spark.py +++ b/tests/dialects/test_spark.py @@ -129,6 +129,16 @@ TBLPROPERTIES ( "spark": "ALTER TABLE StudentInfo DROP COLUMNS (LastName, DOB)", }, ) + self.validate_identity("ALTER VIEW StudentInfoView AS SELECT * FROM StudentInfo") + self.validate_identity("ALTER VIEW StudentInfoView AS SELECT LastName FROM StudentInfo") + self.validate_identity("ALTER VIEW StudentInfoView RENAME TO StudentInfoViewRenamed") + self.validate_identity( + "ALTER VIEW StudentInfoView SET TBLPROPERTIES ('key1'='val1', 'key2'='val2')" + ) + self.validate_identity( + "ALTER VIEW StudentInfoView UNSET TBLPROPERTIES ('key1', 'key2')", + check_command_warning=True, + ) def test_to_date(self): self.validate_all( @@ -298,6 +308,13 @@ TBLPROPERTIES ( ) self.validate_all( + "SELECT DATE_FORMAT(DATE '2020-01-01', 'EEEE') AS weekday", + write={ + "presto": "SELECT DATE_FORMAT(CAST(CAST('2020-01-01' AS DATE) AS TIMESTAMP), '%W') AS weekday", + "spark": "SELECT DATE_FORMAT(CAST(CAST('2020-01-01' AS DATE) AS TIMESTAMP), 'EEEE') AS weekday", + }, + ) + self.validate_all( "SELECT TRY_ELEMENT_AT(MAP(1, 'a', 2, 'b'), 2)", read={ "databricks": "SELECT TRY_ELEMENT_AT(MAP(1, 'a', 2, 'b'), 2)", @@ -557,7 +574,10 @@ TBLPROPERTIES ( ) self.validate_all( - "CAST(x AS TIMESTAMP)", read={"trino": "CAST(x AS TIMESTAMP(6) WITH TIME ZONE)"} + "CAST(x AS TIMESTAMP)", + read={ + "trino": "CAST(x AS TIMESTAMP(6) WITH TIME ZONE)", + }, ) self.validate_all( "SELECT DATE_ADD(my_date_column, 1)", @@ -688,6 +708,7 @@ TBLPROPERTIES ( "trino": "SELECT DATE_ADD('MONTH', 20, col)", }, ) + self.validate_identity("DESCRIBE schema.test PARTITION(ds = '2024-01-01')") def test_bool_or(self): self.validate_all( @@ -805,8 +826,22 @@ TBLPROPERTIES ( self.assertEqual(query.sql(name), without_modifiers) def test_schema_binding_options(self): - for schema_binding in ("BINDING", "COMPENSATION", "TYPE EVOLUTION", "EVOLUTION"): + for schema_binding in ( + "BINDING", + "COMPENSATION", + "TYPE EVOLUTION", + "EVOLUTION", + ): with self.subTest(f"Test roundtrip of VIEW schema binding {schema_binding}"): self.validate_identity( f"CREATE VIEW emp_v WITH SCHEMA {schema_binding} AS SELECT * FROM emp" ) + + def test_minus(self): + self.validate_all( + "SELECT * FROM db.table1 MINUS SELECT * FROM db.table2", + write={ + "spark": "SELECT * FROM db.table1 EXCEPT SELECT * FROM db.table2", + "databricks": "SELECT * FROM db.table1 EXCEPT SELECT * FROM db.table2", + }, + ) diff --git a/tests/dialects/test_teradata.py b/tests/dialects/test_teradata.py index 3945ca3..598cb53 100644 --- a/tests/dialects/test_teradata.py +++ b/tests/dialects/test_teradata.py @@ -1,3 +1,4 @@ +from sqlglot import exp from tests.dialects.test_dialect import Validator @@ -31,6 +32,10 @@ class TestTeradata(Validator): }, ) + self.validate_identity( + "RENAME TABLE emp TO employee", check_command_warning=True + ).assert_is(exp.Command) + def test_translate(self): self.validate_all( "TRANSLATE(x USING LATIN_TO_UNICODE)", diff --git a/tests/dialects/test_trino.py b/tests/dialects/test_trino.py index ccc1407..0ebe749 100644 --- a/tests/dialects/test_trino.py +++ b/tests/dialects/test_trino.py @@ -16,3 +16,35 @@ class TestTrino(Validator): "SELECT TRIM('!foo!', '!')", "SELECT TRIM('!' FROM '!foo!')", ) + + def test_ddl(self): + self.validate_identity("ALTER TABLE users RENAME TO people") + self.validate_identity("ALTER TABLE IF EXISTS users RENAME TO people") + self.validate_identity("ALTER TABLE users ADD COLUMN zip VARCHAR") + self.validate_identity("ALTER TABLE IF EXISTS users ADD COLUMN IF NOT EXISTS zip VARCHAR") + self.validate_identity("ALTER TABLE users DROP COLUMN zip") + self.validate_identity("ALTER TABLE IF EXISTS users DROP COLUMN IF EXISTS zip") + self.validate_identity("ALTER TABLE users RENAME COLUMN id TO user_id") + self.validate_identity("ALTER TABLE IF EXISTS users RENAME COLUMN IF EXISTS id TO user_id") + self.validate_identity("ALTER TABLE users ALTER COLUMN id SET DATA TYPE BIGINT") + self.validate_identity("ALTER TABLE users ALTER COLUMN id DROP NOT NULL") + self.validate_identity( + "ALTER TABLE people SET AUTHORIZATION alice", check_command_warning=True + ) + self.validate_identity( + "ALTER TABLE people SET AUTHORIZATION ROLE PUBLIC", check_command_warning=True + ) + self.validate_identity( + "ALTER TABLE people SET PROPERTIES x = 'y'", check_command_warning=True + ) + self.validate_identity( + "ALTER TABLE people SET PROPERTIES foo = 123, 'foo bar' = 456", + check_command_warning=True, + ) + self.validate_identity( + "ALTER TABLE people SET PROPERTIES x = DEFAULT", check_command_warning=True + ) + self.validate_identity("ALTER VIEW people RENAME TO users") + self.validate_identity( + "ALTER VIEW people SET AUTHORIZATION alice", check_command_warning=True + ) diff --git a/tests/dialects/test_tsql.py b/tests/dialects/test_tsql.py index cfdcb8d..ad18981 100644 --- a/tests/dialects/test_tsql.py +++ b/tests/dialects/test_tsql.py @@ -792,7 +792,7 @@ class TestTSQL(Validator): self.validate_identity(f"CREATE VIEW a.b WITH {view_attr} AS SELECT * FROM x") self.validate_identity("ALTER TABLE dbo.DocExe DROP CONSTRAINT FK_Column_B").assert_is( - exp.AlterTable + exp.Alter ).args["actions"][0].assert_is(exp.Drop) for clustered_keyword in ("CLUSTERED", "NONCLUSTERED"): @@ -822,6 +822,20 @@ class TestTSQL(Validator): self.validate_identity("ALTER TABLE tbl SET DATA_DELETION=ON") self.validate_identity("ALTER TABLE tbl SET DATA_DELETION=OFF") + self.validate_identity("ALTER VIEW v AS SELECT a, b, c, d FROM foo") + self.validate_identity("ALTER VIEW v AS SELECT * FROM foo WHERE c > 100") + self.validate_identity( + "ALTER VIEW v WITH SCHEMABINDING AS SELECT * FROM foo WHERE c > 100", + check_command_warning=True, + ) + self.validate_identity( + "ALTER VIEW v WITH ENCRYPTION AS SELECT * FROM foo WHERE c > 100", + check_command_warning=True, + ) + self.validate_identity( + "ALTER VIEW v WITH VIEW_METADATA AS SELECT * FROM foo WHERE c > 100", + check_command_warning=True, + ) self.validate_identity( "CREATE PROCEDURE foo AS BEGIN DELETE FROM bla WHERE foo < CURRENT_TIMESTAMP - 7 END", "CREATE PROCEDURE foo AS BEGIN DELETE FROM bla WHERE foo < GETDATE() - 7 END", @@ -1513,6 +1527,15 @@ WHERE }, ) + # Check superfluous casts arent added. ref: https://github.com/TobikoData/sqlmesh/issues/2672 + self.validate_all( + "SELECT DATEDIFF(DAY, CAST(a AS DATETIME2), CAST(b AS DATETIME2)) AS x FROM foo", + write={ + "tsql": "SELECT DATEDIFF(DAY, CAST(a AS DATETIME2), CAST(b AS DATETIME2)) AS x FROM foo", + "clickhouse": "SELECT DATE_DIFF(DAY, CAST(a AS Nullable(DateTime)), CAST(b AS Nullable(DateTime))) AS x FROM foo", + }, + ) + def test_lateral_subquery(self): self.validate_all( "SELECT x.a, x.b, t.v, t.y FROM x CROSS APPLY (SELECT v, y FROM t) t(v, y)", @@ -1650,7 +1673,7 @@ WHERE }, write={ "bigquery": "LAST_DAY(CAST(CURRENT_TIMESTAMP() AS DATE))", - "clickhouse": "LAST_DAY(CAST(CURRENT_TIMESTAMP() AS DATE))", + "clickhouse": "LAST_DAY(CAST(CURRENT_TIMESTAMP() AS Nullable(DATE)))", "duckdb": "LAST_DAY(CAST(CURRENT_TIMESTAMP AS DATE))", "mysql": "LAST_DAY(DATE(CURRENT_TIMESTAMP()))", "postgres": "CAST(DATE_TRUNC('MONTH', CAST(CURRENT_TIMESTAMP AS DATE)) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)", @@ -1665,7 +1688,7 @@ WHERE "EOMONTH(GETDATE(), -1)", write={ "bigquery": "LAST_DAY(DATE_ADD(CAST(CURRENT_TIMESTAMP() AS DATE), INTERVAL -1 MONTH))", - "clickhouse": "LAST_DAY(DATE_ADD(MONTH, -1, CAST(CURRENT_TIMESTAMP() AS DATE)))", + "clickhouse": "LAST_DAY(DATE_ADD(MONTH, -1, CAST(CURRENT_TIMESTAMP() AS Nullable(DATE))))", "duckdb": "LAST_DAY(CAST(CURRENT_TIMESTAMP AS DATE) + INTERVAL (-1) MONTH)", "mysql": "LAST_DAY(DATE_ADD(CURRENT_TIMESTAMP(), INTERVAL -1 MONTH))", "postgres": "CAST(DATE_TRUNC('MONTH', CAST(CURRENT_TIMESTAMP AS DATE) + INTERVAL '-1 MONTH') + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)", diff --git a/tests/fixtures/identity.sql b/tests/fixtures/identity.sql index 7892adb..20cbe7f 100644 --- a/tests/fixtures/identity.sql +++ b/tests/fixtures/identity.sql @@ -776,6 +776,10 @@ ALTER TABLE orders DROP PARTITION(dt = '2014-05-14', country = 'IN'), PARTITION( ALTER TABLE mydataset.mytable DELETE WHERE x = 1 ALTER TABLE table1 RENAME COLUMN c1 TO c2 ALTER TABLE table1 RENAME COLUMN IF EXISTS c1 TO c2 +ALTER TABLE table1 RENAME TO table2 +ALTER VIEW view1 AS SELECT a, b, c FROM table1 +ALTER VIEW view1 AS SELECT a, b, c FROM table1 UNION ALL SELECT a, b, c FROM table2 +ALTER VIEW view1 AS SELECT a, b, c FROM table1 UNION ALL SELECT a, b, c FROM table2 LIMIT 100 SELECT div.a FROM test_table AS div WITH view AS (SELECT 1 AS x) SELECT * FROM view ARRAY<STRUCT<INT, DOUBLE, ARRAY<INT>>> @@ -872,4 +876,5 @@ SELECT cube, cube.x FROM cube SELECT * FROM a STRAIGHT_JOIN b SELECT COUNT(DISTINCT "foo bar") FROM (SELECT 1 AS "foo bar") AS t SELECT vector -WITH all AS (SELECT 1 AS count) SELECT all.count FROM all
\ No newline at end of file +WITH all AS (SELECT 1 AS count) SELECT all.count FROM all +SELECT rename
\ No newline at end of file diff --git a/tests/fixtures/optimizer/annotate_types.sql b/tests/fixtures/optimizer/annotate_types.sql index 589f0ec..0a5fc22 100644 --- a/tests/fixtures/optimizer/annotate_types.sql +++ b/tests/fixtures/optimizer/annotate_types.sql @@ -15,6 +15,13 @@ bool; null; null; + +null and false; +bool; + +null + 1; +int; + CASE WHEN x THEN NULL ELSE 1 END; INT; diff --git a/tests/fixtures/optimizer/canonicalize.sql b/tests/fixtures/optimizer/canonicalize.sql index e4c78b7..66c6c95 100644 --- a/tests/fixtures/optimizer/canonicalize.sql +++ b/tests/fixtures/optimizer/canonicalize.sql @@ -52,6 +52,10 @@ SELECT "x"."a" AS "a" FROM "x" AS "x" WHERE CASE WHEN COALESCE("x"."b" <> 0, 1 < DATE('2023-01-01'); CAST('2023-01-01' AS DATE); +-- Some dialects only allow dates +DATE('2023-01-01 00:00:00'); +DATE('2023-01-01 00:00:00'); + TIMESTAMP('2023-01-01'); CAST('2023-01-01' AS TIMESTAMP); diff --git a/tests/fixtures/optimizer/qualify_columns.sql b/tests/fixtures/optimizer/qualify_columns.sql index 444a8e0..7d58908 100644 --- a/tests/fixtures/optimizer/qualify_columns.sql +++ b/tests/fixtures/optimizer/qualify_columns.sql @@ -343,6 +343,11 @@ WITH tbl1 AS (SELECT STRUCT(1 AS col1, Struct(5 AS col1)) AS col) SELECT tbl1.co WITH tbl1 AS (SELECT STRUCT(1 AS col1, 2 AS col1) AS col) SELECT tbl1.col.* FROM tbl1; WITH tbl1 AS (SELECT STRUCT(1 AS col1, 2 AS col1) AS col) SELECT tbl1.col.* FROM tbl1 AS tbl1; +# title: CSV files are not scanned by default +# execute: false +SELECT * FROM READ_CSV('file.csv'); +SELECT * FROM READ_CSV('file.csv') AS _q_0; + -------------------------------------- -- CTEs -------------------------------------- @@ -655,6 +660,16 @@ SELECT x.a + x.b AS f, x.a + x.b AS _col_1, x.a + x.b + 5 AS _col_2 FROM x AS x; SELECT a, SUM(b) AS c, SUM(c) OVER(PARTITION BY a) AS d from x group by 1 ORDER BY a; SELECT x.a AS a, SUM(x.b) AS c, SUM(SUM(x.b)) OVER (PARTITION BY x.a) AS d FROM x AS x GROUP BY x.a ORDER BY a; +# title: we can't expand aliases corresponding to recursive CTE columns (CTE names output columns) +# execute: false +WITH RECURSIVE t(c) AS (SELECT 1 AS c UNION ALL SELECT c + 1 AS c FROM t WHERE c <= 10) SELECT c FROM t; +WITH RECURSIVE t(c) AS (SELECT 1 AS c UNION ALL SELECT t.c + 1 AS c FROM t AS t WHERE t.c <= 10) SELECT t.c AS c FROM t AS t; + +# title: we can't expand aliases corresponding to recursive CTE columns (CTE doesn't name output columns) +# execute: false +WITH RECURSIVE t AS (SELECT 1 AS c UNION ALL SELECT c + 1 AS c FROM t WHERE c <= 10) SELECT c FROM t; +WITH RECURSIVE t AS (SELECT 1 AS c UNION ALL SELECT t.c + 1 AS c FROM t AS t WHERE t.c <= 10) SELECT t.c AS c FROM t AS t; + -------------------------------------- -- Wrapped tables / join constructs -------------------------------------- diff --git a/tests/test_expressions.py b/tests/test_expressions.py index 37a9720..9bb00de 100644 --- a/tests/test_expressions.py +++ b/tests/test_expressions.py @@ -1,3 +1,4 @@ +import sys import datetime import math import unittest @@ -431,6 +432,31 @@ class TestExpressions(unittest.TestCase): table = expression.find(exp.Table) self.assertEqual(table.alias_column_names, ["a", "b"]) + def test_cast(self): + expression = parse_one("select cast(x as DATE)") + casts = list(expression.find_all(exp.Cast)) + self.assertEqual(len(casts), 1) + + cast = casts[0] + self.assertTrue(cast.to.is_type(exp.DataType.Type.DATE)) + + # check that already cast values arent re-cast if wrapped in a cast to the same type + recast = exp.cast(cast, to=exp.DataType.Type.DATE) + self.assertEqual(recast, cast) + self.assertEqual(recast.sql(), "CAST(x AS DATE)") + + # however, recasting is fine if the types are different + recast = exp.cast(cast, to=exp.DataType.Type.VARCHAR) + self.assertNotEqual(recast, cast) + self.assertEqual(len(list(recast.find_all(exp.Cast))), 2) + self.assertEqual(recast.sql(), "CAST(CAST(x AS DATE) AS VARCHAR)") + + # check that dialect is used when casting strings + self.assertEqual( + exp.cast("x", to="regtype", dialect="postgres").sql(), "CAST(x AS REGTYPE)" + ) + self.assertEqual(exp.cast("`x`", to="date", dialect="hive").sql(), 'CAST("x" AS DATE)') + def test_ctes(self): expression = parse_one("SELECT a FROM x") self.assertEqual(expression.ctes, []) @@ -657,7 +683,10 @@ class TestExpressions(unittest.TestCase): self.assertIsInstance(parse_one("TIME_TO_TIME_STR(a)"), exp.Cast) self.assertIsInstance(parse_one("TIME_TO_UNIX(a)"), exp.TimeToUnix) self.assertIsInstance(parse_one("TIME_STR_TO_DATE(a)"), exp.TimeStrToDate) - self.assertIsInstance(parse_one("TIME_STR_TO_TIME(a)"), exp.TimeStrToTime) + (self.assertIsInstance(parse_one("TIME_STR_TO_TIME(a)"), exp.TimeStrToTime),) + self.assertIsInstance( + parse_one("TIME_STR_TO_TIME(a, 'America/Los_Angeles')"), exp.TimeStrToTime + ) self.assertIsInstance(parse_one("TIME_STR_TO_UNIX(a)"), exp.TimeStrToUnix) self.assertIsInstance(parse_one("TRIM(LEADING 'b' FROM 'bla')"), exp.Trim) self.assertIsInstance(parse_one("TS_OR_DS_ADD(a, 1, 'day')"), exp.TsOrDsAdd) @@ -791,6 +820,7 @@ class TestExpressions(unittest.TestCase): def test_convert(self): from collections import namedtuple + import pytz PointTuple = namedtuple("Point", ["x", "y"]) @@ -809,11 +839,17 @@ class TestExpressions(unittest.TestCase): ({"x": None}, "MAP(ARRAY('x'), ARRAY(NULL))"), ( datetime.datetime(2022, 10, 1, 1, 1, 1, 1), - "TIME_STR_TO_TIME('2022-10-01 01:01:01.000001+00:00')", + "TIME_STR_TO_TIME('2022-10-01 01:01:01.000001')", ), ( datetime.datetime(2022, 10, 1, 1, 1, 1, tzinfo=datetime.timezone.utc), - "TIME_STR_TO_TIME('2022-10-01 01:01:01+00:00')", + "TIME_STR_TO_TIME('2022-10-01 01:01:01+00:00', 'UTC')", + ), + ( + pytz.timezone("America/Los_Angeles").localize( + datetime.datetime(2022, 10, 1, 1, 1, 1) + ), + "TIME_STR_TO_TIME('2022-10-01 01:01:01-07:00', 'America/Los_Angeles')", ), (datetime.date(2022, 10, 1), "DATE_STR_TO_DATE('2022-10-01')"), (math.nan, "NULL"), @@ -829,6 +865,21 @@ class TestExpressions(unittest.TestCase): "MAP_FROM_ARRAYS(ARRAY('test'), ARRAY('value'))", ) + @unittest.skipUnless(sys.version_info >= (3, 9), "zoneinfo only available from python 3.9+") + def test_convert_python39(self): + import zoneinfo + + for value, expected in [ + ( + datetime.datetime( + 2022, 10, 1, 1, 1, 1, tzinfo=zoneinfo.ZoneInfo("America/Los_Angeles") + ), + "TIME_STR_TO_TIME('2022-10-01 01:01:01-07:00', 'America/Los_Angeles')", + ) + ]: + with self.subTest(value): + self.assertEqual(exp.convert(value).sql(), expected) + def test_comment_alias(self): sql = """ SELECT @@ -993,16 +1044,15 @@ FROM foo""", self.assertEqual(exp.DataType.build("UNKNOWN", dialect="bigquery").sql(), "UNKNOWN") self.assertEqual(exp.DataType.build("UNKNOWN", dialect="snowflake").sql(), "UNKNOWN") self.assertEqual(exp.DataType.build("TIMESTAMP", dialect="bigquery").sql(), "TIMESTAMPTZ") - self.assertEqual( - exp.DataType.build("struct<x int>", dialect="spark").sql(), "STRUCT<x INT>" - ) self.assertEqual(exp.DataType.build("USER-DEFINED").sql(), "USER-DEFINED") - self.assertEqual(exp.DataType.build("ARRAY<UNKNOWN>").sql(), "ARRAY<UNKNOWN>") self.assertEqual(exp.DataType.build("ARRAY<NULL>").sql(), "ARRAY<NULL>") self.assertEqual(exp.DataType.build("varchar(100) collate 'en-ci'").sql(), "VARCHAR(100)") self.assertEqual(exp.DataType.build("int[3]").sql(dialect="duckdb"), "INT[3]") self.assertEqual(exp.DataType.build("int[3][3]").sql(dialect="duckdb"), "INT[3][3]") + self.assertEqual( + exp.DataType.build("struct<x int>", dialect="spark").sql(), "STRUCT<x INT>" + ) with self.assertRaises(ParseError): exp.DataType.build("varchar(") @@ -1107,6 +1157,10 @@ FROM foo""", dtype = exp.DataType.build("a.b.c", udt=True) assert dtype.is_type("a.b.c") + dtype = exp.DataType.build("Nullable(Int32)", dialect="clickhouse") + assert dtype.is_type("int") + assert not dtype.is_type("int", check_nullable=True) + with self.assertRaises(ParseError): exp.DataType.build("foo") diff --git a/tests/test_generator.py b/tests/test_generator.py index a5945b2..e21aa25 100644 --- a/tests/test_generator.py +++ b/tests/test_generator.py @@ -1,3 +1,4 @@ +import time import unittest from sqlglot import exp, parse_one @@ -43,3 +44,10 @@ class TestGenerator(unittest.TestCase): assert parse_one("X").sql(identify="safe") == "X" assert parse_one("x as 1").sql(identify="safe") == '"x" AS "1"' assert parse_one("X as 1").sql(identify="safe") == 'X AS "1"' + + def test_generate_nested_binary(self): + sql = "SELECT 'foo'" + (" || 'foo'" * 500) + + now = time.time() + self.assertEqual(parse_one(sql).sql(), sql) + self.assertLessEqual(time.time() - now, 0.1) diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py index 604a364..c746a78 100644 --- a/tests/test_optimizer.py +++ b/tests/test_optimizer.py @@ -360,6 +360,37 @@ class TestOptimizer(unittest.TestCase): "SELECT _q_0.id AS id, _q_0.dt AS dt, _q_0.v AS v FROM (SELECT t1.id AS id, t1.dt AS dt, sum(coalesce(t2.v, 0)) AS v FROM t1 AS t1 LEFT JOIN lkp AS lkp ON t1.id = lkp.id LEFT JOIN t2 AS t2 ON lkp.other_id = t2.other_id AND t1.dt = t2.dt AND COALESCE(t1.common, lkp.common) = t2.common WHERE t1.id > 10 GROUP BY t1.id, t1.dt) AS _q_0", ) + # Detection of correlation where columns are referenced in derived tables nested within subqueries + self.assertEqual( + optimizer.qualify.qualify( + parse_one( + "SELECT a.g FROM a WHERE a.e < (SELECT MAX(u) FROM (SELECT SUM(c.b) AS u FROM c WHERE c.d = f GROUP BY c.e) w)" + ), + schema={ + "a": {"g": "INT", "e": "INT", "f": "INT"}, + "c": {"d": "INT", "e": "INT", "b": "INT"}, + }, + quote_identifiers=False, + ).sql(), + "SELECT a.g AS g FROM a AS a WHERE a.e < (SELECT MAX(w.u) AS _col_0 FROM (SELECT SUM(c.b) AS u FROM c AS c WHERE c.d = a.f GROUP BY c.e) AS w)", + ) + + # Detection of correlation where columns are referenced in derived tables nested within lateral joins + self.assertEqual( + optimizer.qualify.qualify( + parse_one( + "SELECT u.user_id, l.log_date FROM users AS u CROSS JOIN LATERAL (SELECT l1.log_date FROM (SELECT l.log_date FROM logs AS l WHERE l.user_id = u.user_id AND l.log_date <= 100 ORDER BY l.log_date LIMIT 1) AS l1) AS l", + dialect="postgres", + ), + schema={ + "users": {"user_id": "text", "log_date": "date"}, + "logs": {"user_id": "text", "log_date": "date"}, + }, + quote_identifiers=False, + ).sql("postgres"), + "SELECT u.user_id AS user_id, l.log_date AS log_date FROM users AS u CROSS JOIN LATERAL (SELECT l1.log_date AS log_date FROM (SELECT l.log_date AS log_date FROM logs AS l WHERE l.user_id = u.user_id AND l.log_date <= 100 ORDER BY l.log_date LIMIT 1) AS l1) AS l", + ) + self.check_file( "qualify_columns", qualify_columns, @@ -591,7 +622,7 @@ SELECT "_q_0"."n_comment" AS "n_comment" FROM READ_CSV('tests/fixtures/optimizer/tpc-h/nation.csv.gz', 'delimiter', '|') AS "_q_0" """.strip(), - optimizer.optimize(expression).sql(pretty=True), + optimizer.optimize(expression, infer_csv_schemas=True).sql(pretty=True), ) def test_scope(self): @@ -1028,31 +1059,14 @@ FROM READ_CSV('tests/fixtures/optimizer/tpc-h/nation.csv.gz', 'delimiter', '|') concat_expr.right.expressions[0].type.this, exp.DataType.Type.VARCHAR ) # x.cola (arg) + # Ensures we don't raise if there are unqualified columns annotate_types(parse_one("select x from y lateral view explode(y) as x")).expressions[0] - def test_null_annotation(self): - expression = annotate_types(parse_one("SELECT NULL + 2 AS col")).expressions[0].this - self.assertEqual(expression.left.type.this, exp.DataType.Type.NULL) - self.assertEqual(expression.right.type.this, exp.DataType.Type.INT) - - # NULL <op> UNKNOWN should yield NULL - sql = "SELECT NULL + SOME_ANONYMOUS_FUNC() AS result" - - concat_expr_alias = annotate_types(parse_one(sql)).expressions[0] - self.assertEqual(concat_expr_alias.type.this, exp.DataType.Type.NULL) - - concat_expr = concat_expr_alias.this - self.assertEqual(concat_expr.type.this, exp.DataType.Type.NULL) - self.assertEqual(concat_expr.left.type.this, exp.DataType.Type.NULL) - self.assertEqual(concat_expr.right.type.this, exp.DataType.Type.UNKNOWN) - - def test_nullable_annotation(self): - nullable = exp.DataType.build("NULLABLE", expressions=exp.DataType.build("BOOLEAN")) - expression = annotate_types(parse_one("NULL AND FALSE")) - - self.assertEqual(expression.type, nullable) - self.assertEqual(expression.left.type.this, exp.DataType.Type.NULL) - self.assertEqual(expression.right.type.this, exp.DataType.Type.BOOLEAN) + # NULL <op> UNKNOWN should yield UNKNOWN + self.assertEqual( + annotate_types(parse_one("SELECT NULL + ANONYMOUS_FUNC()")).expressions[0].type.this, + exp.DataType.Type.UNKNOWN, + ) def test_predicate_annotation(self): expression = annotate_types(parse_one("x BETWEEN a AND b")) @@ -1181,6 +1195,19 @@ FROM READ_CSV('tests/fixtures/optimizer/tpc-h/nation.csv.gz', 'delimiter', '|') exp.DataType.build("date"), ) + self.assertEqual( + annotate_types( + optimizer.qualify.qualify( + parse_one( + "SELECT x FROM UNNEST(GENERATE_TIMESTAMP_ARRAY('2016-10-05 00:00:00', '2016-10-06 02:00:00', interval 1 day)) AS x" + ) + ) + ) + .selects[0] + .type, + exp.DataType.build("timestamp"), + ) + def test_map_annotation(self): # ToMap annotation expression = annotate_types(parse_one("SELECT MAP {'x': 1}", read="duckdb")) @@ -1196,6 +1223,26 @@ FROM READ_CSV('tests/fixtures/optimizer/tpc-h/nation.csv.gz', 'delimiter', '|') expression = annotate_types(parse_one("SELECT MAP('a', 'b')", read="spark")) self.assertEqual(expression.selects[0].type, exp.DataType.build("MAP(VARCHAR, VARCHAR)")) + def test_union_annotation(self): + for left, right, expected_type in ( + ("SELECT 1::INT AS c", "SELECT 2::BIGINT AS c", "BIGINT"), + ("SELECT 1 AS c", "SELECT NULL AS c", "INT"), + ("SELECT FOO() AS c", "SELECT 1 AS c", "UNKNOWN"), + ("SELECT FOO() AS c", "SELECT BAR() AS c", "UNKNOWN"), + ): + with self.subTest(f"left: {left}, right: {right}, expected: {expected_type}"): + lr = annotate_types(parse_one(f"SELECT t.c FROM ({left} UNION ALL {right}) t(c)")) + rl = annotate_types(parse_one(f"SELECT t.c FROM ({right} UNION ALL {left}) t(c)")) + assert lr.selects[0].type == rl.selects[0].type == exp.DataType.build(expected_type) + + union_by_name = annotate_types( + parse_one( + "SELECT t.a, t.d FROM (SELECT 1 a, 3 d, UNION ALL BY NAME SELECT 7.0 d, 8::BIGINT a) AS t(a, d)" + ) + ) + self.assertEqual(union_by_name.selects[0].type.this, exp.DataType.Type.BIGINT) + self.assertEqual(union_by_name.selects[1].type.this, exp.DataType.Type.DOUBLE) + def test_recursive_cte(self): query = parse_one( """ diff --git a/tests/test_parser.py b/tests/test_parser.py index f360b43..ff82e08 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -579,12 +579,6 @@ class TestParser(unittest.TestCase): logger, ) - def test_rename_table(self): - self.assertEqual( - parse_one("ALTER TABLE foo RENAME TO bar").sql(), - "ALTER TABLE foo RENAME TO bar", - ) - def test_pivot_columns(self): nothing_aliased = """ SELECT * FROM ( @@ -705,77 +699,19 @@ class TestParser(unittest.TestCase): def test_parse_nested(self): now = time.time() - query = parse_one( - """ - SELECT * - FROM a - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - LEFT JOIN b ON a.id = b.id - """ - ) - + query = parse_one("SELECT * FROM a " + ("LEFT JOIN b ON a.id = b.id " * 38)) self.assertIsNotNone(query) + self.assertLessEqual(time.time() - now, 0.1) - query = parse_one( - """ - SELECT * - FROM a - LEFT JOIN UNNEST(ARRAY[]) - LEFT JOIN UNNEST(ARRAY[]) - LEFT JOIN UNNEST(ARRAY[]) - LEFT JOIN UNNEST(ARRAY[]) - LEFT JOIN UNNEST(ARRAY[]) - LEFT JOIN UNNEST(ARRAY[]) - LEFT JOIN UNNEST(ARRAY[]) - LEFT JOIN UNNEST(ARRAY[]) - LEFT JOIN UNNEST(ARRAY[]) - LEFT JOIN UNNEST(ARRAY[]) - LEFT JOIN UNNEST(ARRAY[]) - LEFT JOIN UNNEST(ARRAY[]) - LEFT JOIN UNNEST(ARRAY[]) - LEFT JOIN UNNEST(ARRAY[]) - LEFT JOIN UNNEST(ARRAY[]) - """ - ) + now = time.time() + query = parse_one("SELECT * FROM a " + ("LEFT JOIN UNNEST(ARRAY[]) " * 15)) + self.assertIsNotNone(query) + self.assertLessEqual(time.time() - now, 0.1) + now = time.time() + query = parse_one("SELECT * FROM a " + ("OUTER APPLY (SELECT * FROM b) " * 30)) self.assertIsNotNone(query) - self.assertLessEqual(time.time() - now, 0.2) + self.assertLessEqual(time.time() - now, 0.1) def test_parse_properties(self): self.assertEqual( @@ -903,3 +839,18 @@ class TestParser(unittest.TestCase): def test_parse_prop_eq(self): self.assertIsInstance(parse_one("x(a := b and c)").expressions[0], exp.PropertyEQ) + + def test_collate(self): + collates = [ + ('pg_catalog."default"', exp.Column), + ('"en_DE"', exp.Identifier), + ("LATIN1_GENERAL_BIN", exp.Var), + ("'en'", exp.Literal), + ] + + for collate_pair in collates: + collate_node = parse_one( + f"""SELECT * FROM t WHERE foo LIKE '%bar%' COLLATE {collate_pair[0]}""" + ).find(exp.Collate) + self.assertIsInstance(collate_node, exp.Collate) + self.assertIsInstance(collate_node.expression, collate_pair[1]) diff --git a/tests/test_schema.py b/tests/test_schema.py index 21b59fd..83cbbd8 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -202,11 +202,11 @@ class TestSchema(unittest.TestCase): dialect="clickhouse", ) - table_z = exp.table_("z", db="y", catalog="x") + table_z = exp.table_("Z", db="y", catalog="x") table_w = exp.table_("w", db="y", catalog="x") self.assertEqual(schema.column_names(table_z), ["a", "B"]) - self.assertEqual(schema.column_names(table_w), ["c"]) + self.assertEqual(schema.column_names(table_w), ["C"]) schema = MappingSchema(schema={"x": {"`y`": "INT"}}, dialect="clickhouse") self.assertEqual(schema.column_names(exp.table_("x")), ["y"]) diff --git a/tests/test_transpile.py b/tests/test_transpile.py index d27739c..1e0d9b6 100644 --- a/tests/test_transpile.py +++ b/tests/test_transpile.py @@ -733,6 +733,11 @@ FROM x""", self.validate("TIME_TO_STR(x, 'y')", "DATE_FORMAT(x, 'y')", write="hive") self.validate("TIME_STR_TO_TIME(x)", "TIME_STR_TO_TIME(x)", write=None) + self.validate( + "TIME_STR_TO_TIME(x, 'America/Los_Angeles')", + "TIME_STR_TO_TIME(x, 'America/Los_Angeles')", + write=None, + ) self.validate("TIME_STR_TO_UNIX(x)", "TIME_STR_TO_UNIX(x)", write=None) self.validate("TIME_TO_TIME_STR(x)", "CAST(x AS TEXT)", write=None) self.validate("TIME_TO_STR(x, 'y')", "TIME_TO_STR(x, 'y')", write=None) @@ -845,7 +850,6 @@ FROM x""", "ALTER TABLE table1 RENAME COLUMN c1 TO c2, c2 TO c3", "ALTER TABLE table1 RENAME COLUMN c1 c2", "ALTER TYPE electronic_mail RENAME TO email", - "ALTER VIEW foo ALTER COLUMN bla SET DEFAULT 'NOT SET'", "ALTER schema doo", "ANALYZE a.y", "CALL catalog.system.iceberg_procedure_name(named_arg_1 => 'arg_1', named_arg_2 => 'arg_2')", |