diff options
Diffstat (limited to '')
-rw-r--r-- | tests/dialects/test_clickhouse.py | 503 |
1 files changed, 434 insertions, 69 deletions
diff --git a/tests/dialects/test_clickhouse.py b/tests/dialects/test_clickhouse.py index 72634a8..5a4461e 100644 --- a/tests/dialects/test_clickhouse.py +++ b/tests/dialects/test_clickhouse.py @@ -1,4 +1,8 @@ +from datetime import date, datetime, timezone from sqlglot import exp, parse_one +from sqlglot.dialects import ClickHouse +from sqlglot.expressions import convert +from sqlglot.optimizer import traverse_scope from tests.dialects.test_dialect import Validator from sqlglot.errors import ErrorLevel @@ -7,41 +11,34 @@ class TestClickhouse(Validator): dialect = "clickhouse" def test_clickhouse(self): - self.validate_all( - "SELECT * FROM x PREWHERE y = 1 WHERE z = 2", - write={ - "": "SELECT * FROM x WHERE z = 2", - "clickhouse": "SELECT * FROM x PREWHERE y = 1 WHERE z = 2", - }, - ) - self.validate_all( - "SELECT * FROM x AS prewhere", - read={ - "clickhouse": "SELECT * FROM x AS prewhere", - "duckdb": "SELECT * FROM x prewhere", - }, - ) + for string_type_enum in ClickHouse.Generator.STRING_TYPE_MAPPING: + self.validate_identity(f"CAST(x AS {string_type_enum.value})", "CAST(x AS String)") - self.validate_identity("SELECT * FROM x LIMIT 1 UNION ALL SELECT * FROM y") - - string_types = [ - "BLOB", - "LONGBLOB", - "LONGTEXT", - "MEDIUMBLOB", - "MEDIUMTEXT", - "TINYBLOB", - "TINYTEXT", - "VARCHAR(255)", - ] + # Arrays, maps and tuples can't be Nullable in ClickHouse + for non_nullable_type in ("ARRAY<INT>", "MAP<INT, INT>", "STRUCT(a: INT)"): + try_cast = parse_one(f"TRY_CAST(x AS {non_nullable_type})") + target_type = try_cast.to.sql("clickhouse") + self.assertEqual(try_cast.sql("clickhouse"), f"CAST(x AS {target_type})") - for string_type in string_types: - self.validate_identity(f"CAST(x AS {string_type})", "CAST(x AS String)") + for nullable_type in ("INT", "UINT", "BIGINT", "FLOAT", "DOUBLE", "TEXT", "DATE", "UUID"): + try_cast = parse_one(f"TRY_CAST(x AS {nullable_type})") + target_type = exp.DataType.build(nullable_type, dialect="clickhouse").sql("clickhouse") + self.assertEqual(try_cast.sql("clickhouse"), f"CAST(x AS Nullable({target_type}))") expr = parse_one("count(x)") self.assertEqual(expr.sql(dialect="clickhouse"), "COUNT(x)") self.assertIsNone(expr._meta) + self.validate_identity("CAST(1 AS Bool)") + self.validate_identity("SELECT toString(CHAR(104.1, 101, 108.9, 108.9, 111, 32))") + self.validate_identity("@macro").assert_is(exp.Parameter).this.assert_is(exp.Var) + self.validate_identity("SELECT toFloat(like)") + self.validate_identity("SELECT like") + self.validate_identity("SELECT STR_TO_DATE(str, fmt, tz)") + self.validate_identity("SELECT STR_TO_DATE('05 12 2000', '%d %m %Y')") + self.validate_identity("SELECT EXTRACT(YEAR FROM toDateTime('2023-02-01'))") + self.validate_identity("extract(haystack, pattern)") + self.validate_identity("SELECT * FROM x LIMIT 1 UNION ALL SELECT * FROM y") self.validate_identity("SELECT CAST(x AS Tuple(String, Array(Nullable(Float64))))") self.validate_identity("countIf(x, y)") self.validate_identity("x = y") @@ -49,7 +46,7 @@ class TestClickhouse(Validator): self.validate_identity("SELECT * FROM (SELECT a FROM b SAMPLE 0.01)") self.validate_identity("SELECT * FROM (SELECT a FROM b SAMPLE 1 / 10 OFFSET 1 / 2)") self.validate_identity("SELECT sum(foo * bar) FROM bla SAMPLE 10000000") - self.validate_identity("CAST(x AS Nested(ID UInt32, Serial UInt32, EventTime DATETIME))") + self.validate_identity("CAST(x AS Nested(ID UInt32, Serial UInt32, EventTime DateTime))") self.validate_identity("CAST(x AS Enum('hello' = 1, 'world' = 2))") self.validate_identity("CAST(x AS Enum('hello', 'world'))") self.validate_identity("CAST(x AS Enum('hello' = 1, 'world'))") @@ -85,26 +82,29 @@ class TestClickhouse(Validator): self.validate_identity("SELECT histogram(5)(a)") self.validate_identity("SELECT groupUniqArray(2)(a)") self.validate_identity("SELECT exponentialTimeDecayedAvg(60)(a, b)") + self.validate_identity("levenshteinDistance(col1, col2)", "editDistance(col1, col2)") self.validate_identity("SELECT * FROM foo WHERE x GLOBAL IN (SELECT * FROM bar)") self.validate_identity("position(haystack, needle)") self.validate_identity("position(haystack, needle, position)") - self.validate_identity("CAST(x AS DATETIME)") + self.validate_identity("CAST(x AS DATETIME)", "CAST(x AS DateTime)") + self.validate_identity("CAST(x AS TIMESTAMPTZ)", "CAST(x AS DateTime)") self.validate_identity("CAST(x as MEDIUMINT)", "CAST(x AS Int32)") + self.validate_identity("CAST(x AS DECIMAL(38, 2))", "CAST(x AS Decimal(38, 2))") self.validate_identity("SELECT arrayJoin([1, 2, 3] AS src) AS dst, 'Hello', src") self.validate_identity("""SELECT JSONExtractString('{"x": {"y": 1}}', 'x', 'y')""") self.validate_identity("SELECT * FROM table LIMIT 1 BY a, b") self.validate_identity("SELECT * FROM table LIMIT 2 OFFSET 1 BY a, b") + self.validate_identity("TRUNCATE TABLE t1 ON CLUSTER test_cluster") + self.validate_identity("TRUNCATE DATABASE db") + self.validate_identity("TRUNCATE DATABASE db ON CLUSTER test_cluster") self.validate_identity( - "SELECT id, quantileGK(100, 0.95)(reading) OVER (PARTITION BY id ORDER BY id RANGE BETWEEN 30000 PRECEDING AND CURRENT ROW) AS window FROM table" + "SELECT CAST(1730098800 AS DateTime64) AS DATETIME, 'test' AS interp ORDER BY DATETIME WITH FILL FROM toDateTime64(1730098800, 3) - INTERVAL '7' HOUR TO toDateTime64(1730185140, 3) - INTERVAL '7' HOUR STEP toIntervalSecond(900) INTERPOLATE (interp)" ) - self.validate_identity( - "SELECT $1$foo$1$", - "SELECT 'foo'", + "SELECT number, COUNT() OVER (PARTITION BY number % 3) AS partition_count FROM numbers(10) WINDOW window_name AS (PARTITION BY number) QUALIFY partition_count = 4 ORDER BY number" ) self.validate_identity( - "SELECT * FROM table LIMIT 1, 2 BY a, b", - "SELECT * FROM table LIMIT 2 OFFSET 1 BY a, b", + "SELECT id, quantileGK(100, 0.95)(reading) OVER (PARTITION BY id ORDER BY id RANGE BETWEEN 30000 PRECEDING AND CURRENT ROW) AS window FROM table" ) self.validate_identity( "SELECT * FROM table LIMIT 1 BY CONCAT(datalayerVariantNo, datalayerProductId, warehouse)" @@ -134,10 +134,6 @@ class TestClickhouse(Validator): "SELECT sum(1) AS impressions, (arrayJoin(arrayZip(cities, browsers)) AS t).1 AS city, t.2 AS browser FROM (SELECT ['Istanbul', 'Berlin', 'Bobruisk'] AS cities, ['Firefox', 'Chrome', 'Chrome'] AS browsers) GROUP BY 2, 3" ) self.validate_identity( - "SELECT SUM(1) AS impressions FROM (SELECT ['Istanbul', 'Berlin', 'Bobruisk'] AS cities) WHERE arrayJoin(cities) IN ['Istanbul', 'Berlin']", - "SELECT SUM(1) AS impressions FROM (SELECT ['Istanbul', 'Berlin', 'Bobruisk'] AS cities) WHERE arrayJoin(cities) IN ('Istanbul', 'Berlin')", - ) - self.validate_identity( 'SELECT CAST(tuple(1 AS "a", 2 AS "b", 3.0 AS "c").2 AS Nullable(String))' ) self.validate_identity( @@ -150,17 +146,84 @@ class TestClickhouse(Validator): "CREATE MATERIALIZED VIEW test_view ON CLUSTER cl1 (id UInt8) ENGINE=AggregatingMergeTree() ORDER BY tuple() AS SELECT * FROM test_data" ) self.validate_identity( - "CREATE MATERIALIZED VIEW test_view ON CLUSTER cl1 (id UInt8) TO table1 AS SELECT * FROM test_data" + "CREATE MATERIALIZED VIEW test_view ON CLUSTER cl1 TO table1 AS SELECT * FROM test_data" ) self.validate_identity( - "CREATE MATERIALIZED VIEW test_view (id UInt8) TO db.table1 AS SELECT * FROM test_data" + "CREATE MATERIALIZED VIEW test_view TO db.table1 (id UInt8) AS SELECT * FROM test_data" ) - self.validate_identity("TRUNCATE TABLE t1 ON CLUSTER test_cluster") - self.validate_identity("TRUNCATE DATABASE db") - self.validate_identity("TRUNCATE DATABASE db ON CLUSTER test_cluster") self.validate_identity( "CREATE TABLE t (foo String CODEC(LZ4HC(9), ZSTD, DELTA), size String ALIAS formatReadableSize(size_bytes), INDEX idx1 a TYPE bloom_filter(0.001) GRANULARITY 1, INDEX idx2 a TYPE set(100) GRANULARITY 2, INDEX idx3 a TYPE minmax GRANULARITY 3)" ) + self.validate_identity( + "SELECT (toUInt8('1') + toUInt8('2')) IS NOT NULL", + "SELECT NOT ((toUInt8('1') + toUInt8('2')) IS NULL)", + ) + self.validate_identity( + "SELECT $1$foo$1$", + "SELECT 'foo'", + ) + self.validate_identity( + "SELECT * FROM table LIMIT 1, 2 BY a, b", + "SELECT * FROM table LIMIT 2 OFFSET 1 BY a, b", + ) + self.validate_identity( + "SELECT SUM(1) AS impressions FROM (SELECT ['Istanbul', 'Berlin', 'Bobruisk'] AS cities) WHERE arrayJoin(cities) IN ['Istanbul', 'Berlin']", + "SELECT SUM(1) AS impressions FROM (SELECT ['Istanbul', 'Berlin', 'Bobruisk'] AS cities) WHERE arrayJoin(cities) IN ('Istanbul', 'Berlin')", + ) + + self.validate_all( + "CHAR(67) || CHAR(65) || CHAR(84)", + read={ + "clickhouse": "CHAR(67) || CHAR(65) || CHAR(84)", + "oracle": "CHR(67) || CHR(65) || CHR(84)", + }, + ) + self.validate_all( + "SELECT lagInFrame(salary, 1, 0) OVER (ORDER BY hire_date) AS prev_sal FROM employees", + read={ + "clickhouse": "SELECT lagInFrame(salary, 1, 0) OVER (ORDER BY hire_date) AS prev_sal FROM employees", + "oracle": "SELECT LAG(salary, 1, 0) OVER (ORDER BY hire_date) AS prev_sal FROM employees", + }, + ) + self.validate_all( + "SELECT leadInFrame(salary, 1, 0) OVER (ORDER BY hire_date) AS prev_sal FROM employees", + read={ + "clickhouse": "SELECT leadInFrame(salary, 1, 0) OVER (ORDER BY hire_date) AS prev_sal FROM employees", + "oracle": "SELECT LEAD(salary, 1, 0) OVER (ORDER BY hire_date) AS prev_sal FROM employees", + }, + ) + self.validate_all( + "SELECT CAST(STR_TO_DATE('05 12 2000', '%d %m %Y') AS DATE)", + read={ + "clickhouse": "SELECT CAST(STR_TO_DATE('05 12 2000', '%d %m %Y') AS DATE)", + "postgres": "SELECT TO_DATE('05 12 2000', 'DD MM YYYY')", + }, + write={ + "clickhouse": "SELECT CAST(STR_TO_DATE('05 12 2000', '%d %m %Y') AS DATE)", + "postgres": "SELECT CAST(CAST(TO_DATE('05 12 2000', 'DD MM YYYY') AS TIMESTAMP) AS DATE)", + }, + ) + self.validate_all( + "SELECT * FROM x PREWHERE y = 1 WHERE z = 2", + write={ + "": "SELECT * FROM x WHERE z = 2", + "clickhouse": "SELECT * FROM x PREWHERE y = 1 WHERE z = 2", + }, + ) + self.validate_all( + "SELECT * FROM x AS prewhere", + read={ + "clickhouse": "SELECT * FROM x AS prewhere", + "duckdb": "SELECT * FROM x prewhere", + }, + ) + self.validate_all( + "SELECT a, b FROM (SELECT * FROM x) AS t", + read={ + "clickhouse": "SELECT a, b FROM (SELECT * FROM x) AS t", + "duckdb": "SELECT a, b FROM (SELECT * FROM x) AS t(a, b)", + }, + ) self.validate_all( "SELECT arrayJoin([1,2,3])", write={ @@ -181,11 +244,16 @@ class TestClickhouse(Validator): }, ) self.validate_all( - "SELECT CAST('2020-01-01' AS TIMESTAMP) + INTERVAL '500' MICROSECOND", + "SELECT CAST('2020-01-01' AS Nullable(DateTime)) + INTERVAL '500' MICROSECOND", read={ "duckdb": "SELECT TIMESTAMP '2020-01-01' + INTERVAL '500 us'", "postgres": "SELECT TIMESTAMP '2020-01-01' + INTERVAL '500 us'", }, + write={ + "clickhouse": "SELECT CAST('2020-01-01' AS Nullable(DateTime)) + INTERVAL '500' MICROSECOND", + "duckdb": "SELECT CAST('2020-01-01' AS DATETIME) + INTERVAL '500' MICROSECOND", + "postgres": "SELECT CAST('2020-01-01' AS TIMESTAMP) + INTERVAL '500 MICROSECOND'", + }, ) self.validate_all( "SELECT CURRENT_DATE()", @@ -363,15 +431,15 @@ class TestClickhouse(Validator): " GROUP BY loyalty ORDER BY loyalty ASC" }, ) - self.validate_identity("SELECT s, arr FROM arrays_test ARRAY JOIN arr") - self.validate_identity("SELECT s, arr, a FROM arrays_test LEFT ARRAY JOIN arr AS a") - self.validate_identity( - "SELECT s, arr_external FROM arrays_test ARRAY JOIN [1, 2, 3] AS arr_external" - ) self.validate_all( "SELECT quantile(0.5)(a)", - read={"duckdb": "SELECT quantile(a, 0.5)"}, - write={"clickhouse": "SELECT quantile(0.5)(a)"}, + read={ + "duckdb": "SELECT quantile(a, 0.5)", + "clickhouse": "SELECT median(a)", + }, + write={ + "clickhouse": "SELECT quantile(0.5)(a)", + }, ) self.validate_all( "SELECT quantiles(0.5, 0.4)(a)", @@ -413,15 +481,15 @@ class TestClickhouse(Validator): self.validate_identity("SELECT FORMAT") self.validate_identity("1 AS FORMAT").assert_is(exp.Alias) - self.validate_identity("SELECT DATE_FORMAT(NOW(), '%Y-%m-%d', '%T')") + self.validate_identity("SELECT formatDateTime(NOW(), '%Y-%m-%d', '%T')") self.validate_all( - "SELECT DATE_FORMAT(NOW(), '%Y-%m-%d')", + "SELECT formatDateTime(NOW(), '%Y-%m-%d')", read={ "clickhouse": "SELECT formatDateTime(NOW(), '%Y-%m-%d')", "mysql": "SELECT DATE_FORMAT(NOW(), '%Y-%m-%d')", }, write={ - "clickhouse": "SELECT DATE_FORMAT(NOW(), '%Y-%m-%d')", + "clickhouse": "SELECT formatDateTime(NOW(), '%Y-%m-%d')", "mysql": "SELECT DATE_FORMAT(NOW(), '%Y-%m-%d')", }, ) @@ -440,6 +508,70 @@ class TestClickhouse(Validator): ) self.validate_identity("ALTER TABLE visits REPLACE PARTITION ID '201901' FROM visits_tmp") self.validate_identity("ALTER TABLE visits ON CLUSTER test_cluster DROP COLUMN col1") + self.validate_identity("DELETE FROM tbl ON CLUSTER test_cluster WHERE date = '2019-01-01'") + + self.assertIsInstance( + parse_one("Tuple(select Int64)", into=exp.DataType, read="clickhouse"), exp.DataType + ) + + self.validate_identity("INSERT INTO t (col1, col2) VALUES ('abcd', 1234)") + self.validate_all( + "INSERT INTO t (col1, col2) VALUES ('abcd', 1234)", + read={ + # looks like values table function, but should be parsed as VALUES block + "clickhouse": "INSERT INTO t (col1, col2) values('abcd', 1234)" + }, + write={ + "clickhouse": "INSERT INTO t (col1, col2) VALUES ('abcd', 1234)", + "postgres": "INSERT INTO t (col1, col2) VALUES ('abcd', 1234)", + }, + ) + self.validate_identity("SELECT TRIM(TRAILING ')' FROM '( Hello, world! )')") + self.validate_identity("SELECT TRIM(LEADING '(' FROM '( Hello, world! )')") + self.validate_identity("current_timestamp").assert_is(exp.Column) + + self.validate_identity("SELECT * APPLY(sum) FROM columns_transformers") + self.validate_identity("SELECT COLUMNS('[jk]') APPLY(toString) FROM columns_transformers") + self.validate_identity( + "SELECT COLUMNS('[jk]') APPLY(toString) APPLY(length) APPLY(max) FROM columns_transformers" + ) + self.validate_identity("SELECT * APPLY(sum), COLUMNS('col') APPLY(sum) APPLY(avg) FROM t") + self.validate_identity( + "SELECT * FROM ABC WHERE hasAny(COLUMNS('.*field') APPLY(toUInt64) APPLY(to), (SELECT groupUniqArray(toUInt64(field))))" + ) + self.validate_identity("SELECT col apply", "SELECT col AS apply") + self.validate_identity( + "SELECT name FROM data WHERE (SELECT DISTINCT name FROM data) IS NOT NULL", + "SELECT name FROM data WHERE NOT ((SELECT DISTINCT name FROM data) IS NULL)", + ) + + def test_clickhouse_values(self): + values = exp.select("*").from_( + exp.values([exp.tuple_(1, 2, 3)], alias="subq", columns=["a", "b", "c"]) + ) + self.assertEqual( + values.sql("clickhouse"), + "SELECT * FROM (SELECT 1 AS a, 2 AS b, 3 AS c) AS subq", + ) + + self.validate_identity("INSERT INTO t (col1, col2) VALUES ('abcd', 1234)") + self.validate_identity( + "INSERT INTO t (col1, col2) FORMAT Values('abcd', 1234)", + "INSERT INTO t (col1, col2) VALUES ('abcd', 1234)", + ) + + self.validate_all( + "SELECT col FROM (SELECT 1 AS col) AS _t", + read={ + "duckdb": "SELECT col FROM (VALUES (1)) AS _t(col)", + }, + ) + self.validate_all( + "SELECT col1, col2 FROM (SELECT 1 AS col1, 2 AS col2 UNION ALL SELECT 3, 4) AS _t", + read={ + "duckdb": "SELECT col1, col2 FROM (VALUES (1, 2), (3, 4)) AS _t(col1, col2)", + }, + ) def test_cte(self): self.validate_identity("WITH 'x' AS foo SELECT foo") @@ -501,7 +633,7 @@ class TestClickhouse(Validator): self.validate_all( "SELECT {abc: UInt32}, {b: String}, {c: DateTime},{d: Map(String, Array(UInt8))}, {e: Tuple(UInt8, String)}", write={ - "clickhouse": "SELECT {abc: UInt32}, {b: String}, {c: DATETIME}, {d: Map(String, Array(UInt8))}, {e: Tuple(UInt8, String)}", + "clickhouse": "SELECT {abc: UInt32}, {b: String}, {c: DateTime}, {d: Map(String, Array(UInt8))}, {e: Tuple(UInt8, String)}", "": "SELECT :abc, :b, :c, :d, :e", }, ) @@ -531,15 +663,99 @@ class TestClickhouse(Validator): write={"clickhouse": f"CAST(pow(2, 32) AS {data_type})"}, ) + def test_geom_types(self): + data_types = ["Point", "Ring", "LineString", "MultiLineString", "Polygon", "MultiPolygon"] + for data_type in data_types: + with self.subTest(f"Casting to ClickHouse {data_type}"): + self.validate_identity(f"SELECT CAST(val AS {data_type})") + def test_ddl(self): + db_table_expr = exp.Table(this=None, db=exp.to_identifier("foo"), catalog=None) + create_with_cluster = exp.Create( + this=db_table_expr, + kind="DATABASE", + properties=exp.Properties(expressions=[exp.OnCluster(this=exp.to_identifier("c"))]), + ) + self.assertEqual(create_with_cluster.sql("clickhouse"), "CREATE DATABASE foo ON CLUSTER c") + + # Transpiled CREATE SCHEMA may have OnCluster property set + create_with_cluster = exp.Create( + this=db_table_expr, + kind="SCHEMA", + properties=exp.Properties(expressions=[exp.OnCluster(this=exp.to_identifier("c"))]), + ) + self.assertEqual(create_with_cluster.sql("clickhouse"), "CREATE DATABASE foo ON CLUSTER c") + + ctas_with_comment = exp.Create( + this=exp.table_("foo"), + kind="TABLE", + expression=exp.select("*").from_("db.other_table"), + properties=exp.Properties( + expressions=[ + exp.EngineProperty(this=exp.var("Memory")), + exp.SchemaCommentProperty(this=exp.Literal.string("foo")), + ], + ), + ) + self.assertEqual( + ctas_with_comment.sql("clickhouse"), + "CREATE TABLE foo ENGINE=Memory AS (SELECT * FROM db.other_table) COMMENT 'foo'", + ) + + self.validate_identity("CREATE FUNCTION linear_equation AS (x, k, b) -> k * x + b") + self.validate_identity("CREATE MATERIALIZED VIEW a.b TO a.c (c Int32) AS SELECT * FROM a.d") + self.validate_identity("""CREATE TABLE ip_data (ip4 IPv4, ip6 IPv6) ENGINE=TinyLog()""") + self.validate_identity("""CREATE TABLE dates (dt1 Date32) ENGINE=TinyLog()""") + self.validate_identity("CREATE TABLE named_tuples (a Tuple(select String, i Int64))") + self.validate_identity("""CREATE TABLE t (a String) EMPTY AS SELECT * FROM dummy""") + self.validate_identity( + "CREATE TABLE t1 (a String EPHEMERAL, b String EPHEMERAL func(), c String MATERIALIZED func(), d String ALIAS func()) ENGINE=TinyLog()" + ) + self.validate_identity( + "CREATE TABLE t (a String, b String, c UInt64, PROJECTION p1 (SELECT a, sum(c) GROUP BY a, b), PROJECTION p2 (SELECT b, sum(c) GROUP BY b)) ENGINE=MergeTree()" + ) + self.validate_identity( + """CREATE TABLE xyz (ts DateTime, data String) ENGINE=MergeTree() ORDER BY ts SETTINGS index_granularity = 8192 COMMENT '{"key": "value"}'""" + ) + self.validate_identity( + "INSERT INTO FUNCTION s3('a', 'b', 'c', 'd', 'e') PARTITION BY CONCAT(s1, s2, s3, s4) SETTINGS set1 = 1, set2 = '2' SELECT * FROM some_table SETTINGS foo = 3" + ) self.validate_identity( 'CREATE TABLE data5 ("x" UInt32, "y" UInt32) ENGINE=MergeTree ORDER BY (round(y / 1000000000), cityHash64(x)) SAMPLE BY cityHash64(x)' ) self.validate_identity( "CREATE TABLE foo (x UInt32) TTL time_column + INTERVAL '1' MONTH DELETE WHERE column = 'value'" ) + self.validate_identity( + "CREATE FUNCTION parity_str AS (n) -> IF(n % 2, 'odd', 'even')", + "CREATE FUNCTION parity_str AS n -> CASE WHEN n % 2 THEN 'odd' ELSE 'even' END", + ) + self.validate_identity( + "CREATE TABLE a ENGINE=Memory AS SELECT 1 AS c COMMENT 'foo'", + "CREATE TABLE a ENGINE=Memory AS (SELECT 1 AS c) COMMENT 'foo'", + ) self.validate_all( + "CREATE DATABASE x", + read={ + "duckdb": "CREATE SCHEMA x", + }, + write={ + "clickhouse": "CREATE DATABASE x", + "duckdb": "CREATE SCHEMA x", + }, + ) + self.validate_all( + "DROP DATABASE x", + read={ + "duckdb": "DROP SCHEMA x", + }, + write={ + "clickhouse": "DROP DATABASE x", + "duckdb": "DROP SCHEMA x", + }, + ) + self.validate_all( """ CREATE TABLE example1 ( timestamp DateTime, @@ -552,7 +768,7 @@ class TestClickhouse(Validator): """, write={ "clickhouse": """CREATE TABLE example1 ( - timestamp DATETIME, + timestamp DateTime, x UInt32 TTL now() + INTERVAL '1' MONTH, y String TTL timestamp + INTERVAL '1' DAY, z String @@ -630,7 +846,7 @@ SETTINGS """, write={ "clickhouse": """CREATE TABLE example_table ( - d DATETIME, + d DateTime, a Int32 ) ENGINE=MergeTree @@ -657,7 +873,7 @@ TTL """, write={ "clickhouse": """CREATE TABLE table_with_where ( - d DATETIME, + d DateTime, a Int32 ) ENGINE=MergeTree @@ -685,7 +901,7 @@ WHERE """, write={ "clickhouse": """CREATE TABLE table_for_recompression ( - d DATETIME, + d DateTime, key UInt64, value String ) @@ -717,7 +933,7 @@ SETTINGS """, write={ "clickhouse": """CREATE TABLE table_for_aggregation ( - d DATETIME, + d DateTime, k1 Int32, k2 Int32, x Int32, @@ -824,8 +1040,6 @@ LIFETIME(MIN 0 MAX 0)""", }, pretty=True, ) - self.validate_identity("""CREATE TABLE ip_data (ip4 IPv4, ip6 IPv6) ENGINE=TinyLog()""") - self.validate_identity("""CREATE TABLE dates (dt1 Date32) ENGINE=TinyLog()""") self.validate_all( """ CREATE TABLE t ( @@ -842,11 +1056,11 @@ LIFETIME(MIN 0 MAX 0)""", }, pretty=True, ) - self.validate_identity( - "CREATE TABLE t1 (a String EPHEMERAL, b String EPHEMERAL func(), c String MATERIALIZED func(), d String ALIAS func()) ENGINE=TinyLog()" - ) - self.validate_identity( - "CREATE TABLE t (a String, b String, c UInt64, PROJECTION p1 (SELECT a, sum(c) GROUP BY a, b), PROJECTION p2 (SELECT b, sum(c) GROUP BY b)) ENGINE=MergeTree()" + + self.assertIsNotNone( + self.validate_identity("CREATE TABLE t1 (a String MATERIALIZED func())").find( + exp.ColumnConstraint + ) ) def test_agg_functions(self): @@ -880,3 +1094,154 @@ LIFETIME(MIN 0 MAX 0)""", for creatable in ("DATABASE", "TABLE", "VIEW", "DICTIONARY", "FUNCTION"): with self.subTest(f"Test DROP {creatable} ON CLUSTER"): self.validate_identity(f"DROP {creatable} test ON CLUSTER test_cluster") + + def test_datetime_funcs(self): + # Each datetime func has an alias that is roundtripped to the original name e.g. (DATE_SUB, DATESUB) -> DATE_SUB + datetime_funcs = (("DATE_SUB", "DATESUB"), ("DATE_ADD", "DATEADD")) + + # 2-arg functions of type <func>(date, unit) + for func in (*datetime_funcs, ("TIMESTAMP_ADD", "TIMESTAMPADD")): + func_name = func[0] + for func_alias in func: + self.validate_identity( + f"""SELECT {func_alias}(date, INTERVAL '3' YEAR)""", + f"""SELECT {func_name}(date, INTERVAL '3' YEAR)""", + ) + + # 3-arg functions of type <func>(unit, value, date) + for func in (*datetime_funcs, ("DATE_DIFF", "DATEDIFF"), ("TIMESTAMP_SUB", "TIMESTAMPSUB")): + func_name = func[0] + for func_alias in func: + with self.subTest(f"Test 3-arg date-time function {func_alias}"): + self.validate_identity( + f"SELECT {func_alias}(SECOND, 1, bar)", + f"SELECT {func_name}(SECOND, 1, bar)", + ) + + def test_convert(self): + self.assertEqual( + convert(date(2020, 1, 1)).sql(dialect=self.dialect), "toDate('2020-01-01')" + ) + + # no fractional seconds + self.assertEqual( + convert(datetime(2020, 1, 1, 0, 0, 1)).sql(dialect=self.dialect), + "CAST('2020-01-01 00:00:01' AS DateTime64(6))", + ) + self.assertEqual( + convert(datetime(2020, 1, 1, 0, 0, 1, tzinfo=timezone.utc)).sql(dialect=self.dialect), + "CAST('2020-01-01 00:00:01' AS DateTime64(6, 'UTC'))", + ) + + # with fractional seconds + self.assertEqual( + convert(datetime(2020, 1, 1, 0, 0, 1, 1)).sql(dialect=self.dialect), + "CAST('2020-01-01 00:00:01.000001' AS DateTime64(6))", + ) + self.assertEqual( + convert(datetime(2020, 1, 1, 0, 0, 1, 1, tzinfo=timezone.utc)).sql( + dialect=self.dialect + ), + "CAST('2020-01-01 00:00:01.000001' AS DateTime64(6, 'UTC'))", + ) + + def test_timestr_to_time(self): + # no fractional seconds + time_strings = [ + "2020-01-01 00:00:01", + "2020-01-01 00:00:01+01:00", + " 2020-01-01 00:00:01-01:00 ", + "2020-01-01T00:00:01+01:00", + ] + for time_string in time_strings: + with self.subTest(f"'{time_string}'"): + self.assertEqual( + exp.TimeStrToTime(this=exp.Literal.string(time_string)).sql( + dialect=self.dialect + ), + f"CAST('{time_string}' AS DateTime64(6))", + ) + + time_strings_no_utc = ["2020-01-01 00:00:01" for i in range(4)] + for utc, no_utc in zip(time_strings, time_strings_no_utc): + with self.subTest(f"'{time_string}' with UTC timezone"): + self.assertEqual( + exp.TimeStrToTime( + this=exp.Literal.string(utc), zone=exp.Literal.string("UTC") + ).sql(dialect=self.dialect), + f"CAST('{no_utc}' AS DateTime64(6, 'UTC'))", + ) + + # with fractional seconds + time_strings = [ + "2020-01-01 00:00:01.001", + "2020-01-01 00:00:01.000001", + "2020-01-01 00:00:01.001+00:00", + "2020-01-01 00:00:01.000001-00:00", + "2020-01-01 00:00:01.0001", + "2020-01-01 00:00:01.1+00:00", + ] + + for time_string in time_strings: + with self.subTest(f"'{time_string}'"): + self.assertEqual( + exp.TimeStrToTime(this=exp.Literal.string(time_string[0])).sql( + dialect=self.dialect + ), + f"CAST('{time_string[0]}' AS DateTime64(6))", + ) + + time_strings_no_utc = [ + "2020-01-01 00:00:01.001000", + "2020-01-01 00:00:01.000001", + "2020-01-01 00:00:01.001000", + "2020-01-01 00:00:01.000001", + "2020-01-01 00:00:01.000100", + "2020-01-01 00:00:01.100000", + ] + + for utc, no_utc in zip(time_strings, time_strings_no_utc): + with self.subTest(f"'{time_string}' with UTC timezone"): + self.assertEqual( + exp.TimeStrToTime( + this=exp.Literal.string(utc), zone=exp.Literal.string("UTC") + ).sql(dialect=self.dialect), + f"CAST('{no_utc}' AS DateTime64(6, 'UTC'))", + ) + + def test_grant(self): + self.validate_identity("GRANT SELECT(x, y) ON db.table TO john WITH GRANT OPTION") + self.validate_identity("GRANT INSERT(x, y) ON db.table TO john") + + def test_array_join(self): + expr = self.validate_identity( + "SELECT * FROM arrays_test ARRAY JOIN arr1, arrays_test.arr2 AS foo, ['a', 'b', 'c'] AS elem" + ) + joins = expr.args["joins"] + self.assertEqual(len(joins), 1) + + join = joins[0] + self.assertEqual(join.kind, "ARRAY") + self.assertIsInstance(join.this, exp.Column) + + self.assertEqual(len(join.expressions), 2) + self.assertIsInstance(join.expressions[0], exp.Alias) + self.assertIsInstance(join.expressions[0].this, exp.Column) + + self.assertIsInstance(join.expressions[1], exp.Alias) + self.assertIsInstance(join.expressions[1].this, exp.Array) + + self.validate_identity("SELECT s, arr FROM arrays_test ARRAY JOIN arr") + self.validate_identity("SELECT s, arr, a FROM arrays_test LEFT ARRAY JOIN arr AS a") + self.validate_identity( + "SELECT s, arr_external FROM arrays_test ARRAY JOIN [1, 2, 3] AS arr_external" + ) + self.validate_identity( + "SELECT * FROM arrays_test ARRAY JOIN [1, 2, 3] AS arr_external1, ['a', 'b', 'c'] AS arr_external2, splitByString(',', 'asd,qwerty,zxc') AS arr_external3" + ) + + def test_traverse_scope(self): + sql = "SELECT * FROM t FINAL" + scopes = traverse_scope(parse_one(sql, dialect=self.dialect)) + self.assertEqual(len(scopes), 1) + self.assertEqual(set(scopes[0].sources), {"t"}) |