From 948a422be120c069e48c63a8770fec7204307897 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Tue, 19 Dec 2023 12:01:36 +0100 Subject: Adding upstream version 20.3.0. Signed-off-by: Daniel Baumann --- tests/dialects/test_clickhouse.py | 14 +++++ tests/dialects/test_duckdb.py | 9 +++ tests/dialects/test_postgres.py | 7 +++ tests/dialects/test_presto.py | 122 +++++++++++++++++++++++++++++++------- tests/dialects/test_snowflake.py | 103 ++++++++++++++++++++++++++++++-- tests/dialects/test_spark.py | 2 +- tests/dialects/test_tsql.py | 6 ++ 7 files changed, 234 insertions(+), 29 deletions(-) (limited to 'tests/dialects') diff --git a/tests/dialects/test_clickhouse.py b/tests/dialects/test_clickhouse.py index 86ddb00..1f528b6 100644 --- a/tests/dialects/test_clickhouse.py +++ b/tests/dialects/test_clickhouse.py @@ -6,6 +6,8 @@ class TestClickhouse(Validator): dialect = "clickhouse" def test_clickhouse(self): + self.validate_identity("SELECT * FROM x LIMIT 1 UNION ALL SELECT * FROM y") + string_types = [ "BLOB", "LONGBLOB", @@ -68,6 +70,18 @@ class TestClickhouse(Validator): self.validate_identity("CAST(x AS DATETIME)") self.validate_identity("CAST(x as MEDIUMINT)", "CAST(x AS Int32)") self.validate_identity("SELECT arrayJoin([1, 2, 3] AS src) AS dst, 'Hello', src") + self.validate_identity( + "SELECT n, source FROM (SELECT toFloat32(number % 10) AS n, 'original' AS source FROM numbers(10) WHERE number % 3 = 1) ORDER BY n WITH FILL" + ) + self.validate_identity( + "SELECT n, source FROM (SELECT toFloat32(number % 10) AS n, 'original' AS source FROM numbers(10) WHERE number % 3 = 1) ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5" + ) + self.validate_identity( + "SELECT toDate((number * 10) * 86400) AS d1, toDate(number * 86400) AS d2, 'original' AS source FROM numbers(10) WHERE (number % 3) = 1 ORDER BY d2 WITH FILL, d1 WITH FILL STEP 5" + ) + self.validate_identity( + "SELECT n, source, inter FROM (SELECT toFloat32(number % 10) AS n, 'original' AS source, number AS inter FROM numbers(10) WHERE number % 3 = 1) ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5 INTERPOLATE (inter AS inter + 1)" + ) self.validate_identity( "SELECT SUM(1) AS impressions, arrayJoin(cities) AS city, arrayJoin(browsers) AS browser FROM (SELECT ['Istanbul', 'Berlin', 'Bobruisk'] AS cities, ['Firefox', 'Chrome', 'Chrome'] AS browsers) GROUP BY 2, 3" ) diff --git a/tests/dialects/test_duckdb.py b/tests/dialects/test_duckdb.py index 687a807..f915168 100644 --- a/tests/dialects/test_duckdb.py +++ b/tests/dialects/test_duckdb.py @@ -54,6 +54,13 @@ class TestDuckDB(Validator): }, ) + self.validate_all( + "SELECT {'bla': column1, 'foo': column2, 'bar': column3} AS data FROM source_table", + read={ + "bigquery": "SELECT STRUCT(column1 AS bla, column2 AS foo, column3 AS bar) AS data FROM source_table", + "duckdb": "SELECT {'bla': column1, 'foo': column2, 'bar': column3} AS data FROM source_table", + }, + ) self.validate_all( "WITH cte(x) AS (SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3) SELECT AVG(x) FILTER (WHERE x > 1) FROM cte", write={ @@ -109,6 +116,8 @@ class TestDuckDB(Validator): parse_one("a // b", read="duckdb").assert_is(exp.IntDiv).sql(dialect="duckdb"), "a // b" ) + self.validate_identity("MAKE_TIMESTAMP(1992, 9, 20, 13, 34, 27.123456)") + self.validate_identity("MAKE_TIMESTAMP(1667810584123456)") self.validate_identity("SELECT EPOCH_MS(10) AS t") self.validate_identity("SELECT MAKE_TIMESTAMP(10) AS t") self.validate_identity("SELECT TO_TIMESTAMP(10) AS t") diff --git a/tests/dialects/test_postgres.py b/tests/dialects/test_postgres.py index 17a65d7..9155696 100644 --- a/tests/dialects/test_postgres.py +++ b/tests/dialects/test_postgres.py @@ -8,6 +8,11 @@ class TestPostgres(Validator): dialect = "postgres" def test_ddl(self): + expr = parse_one("CREATE TABLE t (x INTERVAL day)", read="postgres") + cdef = expr.find(exp.ColumnDef) + cdef.args["kind"].assert_is(exp.DataType) + self.assertEqual(expr.sql(dialect="postgres"), "CREATE TABLE t (x INTERVAL day)") + self.validate_identity("CREATE INDEX idx_x ON x USING BTREE(x, y) WHERE (NOT y IS NULL)") self.validate_identity("CREATE TABLE test (elems JSONB[])") self.validate_identity("CREATE TABLE public.y (x TSTZRANGE NOT NULL)") @@ -203,6 +208,8 @@ class TestPostgres(Validator): self.validate_identity("SELECT 1 OPERATOR(pg_catalog.+) 2") def test_postgres(self): + self.validate_identity("EXEC AS myfunc @id = 123") + expr = parse_one( "SELECT * FROM r CROSS JOIN LATERAL UNNEST(ARRAY[1]) AS s(location)", read="postgres" ) diff --git a/tests/dialects/test_presto.py b/tests/dialects/test_presto.py index 6a82756..97a387c 100644 --- a/tests/dialects/test_presto.py +++ b/tests/dialects/test_presto.py @@ -379,6 +379,16 @@ class TestPresto(Validator): "presto": "TIMESTAMP(x, '12:00:00')", }, ) + self.validate_all( + "DATE_ADD('DAY', CAST(x AS BIGINT), y)", + write={ + "presto": "DATE_ADD('DAY', CAST(x AS BIGINT), y)", + }, + read={ + "presto": "DATE_ADD('DAY', x, y)", + }, + ) + self.validate_identity("DATE_ADD('DAY', 1, y)") def test_ddl(self): self.validate_all( @@ -462,10 +472,10 @@ class TestPresto(Validator): ) self.validate_all( - 'CREATE TABLE IF NOT EXISTS x ("cola" INTEGER, "ds" TEXT) WITH (PARTITIONED BY=("ds"))', + """CREATE TABLE IF NOT EXISTS x ("cola" INTEGER, "ds" TEXT) COMMENT 'comment' WITH (PARTITIONED BY=("ds"))""", write={ - "spark": "CREATE TABLE IF NOT EXISTS x (`cola` INT, `ds` STRING) PARTITIONED BY (`ds`)", - "presto": """CREATE TABLE IF NOT EXISTS x ("cola" INTEGER, "ds" VARCHAR) WITH (PARTITIONED_BY=ARRAY['ds'])""", + "spark": "CREATE TABLE IF NOT EXISTS x (`cola` INT, `ds` STRING) COMMENT 'comment' PARTITIONED BY (`ds`)", + "presto": """CREATE TABLE IF NOT EXISTS x ("cola" INTEGER, "ds" VARCHAR) COMMENT 'comment' WITH (PARTITIONED_BY=ARRAY['ds'])""", }, ) @@ -534,26 +544,18 @@ class TestPresto(Validator): }, ) - def test_presto(self): - self.validate_identity("string_agg(x, ',')", "ARRAY_JOIN(ARRAY_AGG(x), ',')") - self.validate_identity( - "SELECT * FROM example.testdb.customer_orders FOR VERSION AS OF 8954597067493422955" - ) - self.validate_identity( - "SELECT * FROM example.testdb.customer_orders FOR TIMESTAMP AS OF CAST('2022-03-23 09:59:29.803 Europe/Vienna' AS TIMESTAMP)" - ) - - self.validate_identity("SELECT * FROM x OFFSET 1 LIMIT 1") - self.validate_identity("SELECT * FROM x OFFSET 1 FETCH FIRST 1 ROWS ONLY") - self.validate_identity("SELECT BOOL_OR(a > 10) FROM asd AS T(a)") - self.validate_identity("SELECT * FROM (VALUES (1))") - self.validate_identity("START TRANSACTION READ WRITE, ISOLATION LEVEL SERIALIZABLE") - self.validate_identity("START TRANSACTION ISOLATION LEVEL REPEATABLE READ") - self.validate_identity("APPROX_PERCENTILE(a, b, c, d)") - self.validate_identity( - "SELECT SPLIT_TO_MAP('a:1;b:2;a:3', ';', ':', (k, v1, v2) -> CONCAT(v1, v2))" - ) + def test_unicode_string(self): + for prefix in ("u&", "U&"): + self.validate_identity( + f"{prefix}'Hello winter \\2603 !'", + "U&'Hello winter \\2603 !'", + ) + self.validate_identity( + f"{prefix}'Hello winter #2603 !' UESCAPE '#'", + "U&'Hello winter #2603 !' UESCAPE '#'", + ) + def test_presto(self): with self.assertLogs(helper_logger) as cm: self.validate_all( "SELECT COALESCE(ELEMENT_AT(MAP_FROM_ENTRIES(ARRAY[(51, '1')]), id), quantity) FROM my_table", @@ -572,6 +574,24 @@ class TestPresto(Validator): }, ) + self.validate_identity("string_agg(x, ',')", "ARRAY_JOIN(ARRAY_AGG(x), ',')") + self.validate_identity("SELECT * FROM x OFFSET 1 LIMIT 1") + self.validate_identity("SELECT * FROM x OFFSET 1 FETCH FIRST 1 ROWS ONLY") + self.validate_identity("SELECT BOOL_OR(a > 10) FROM asd AS T(a)") + self.validate_identity("SELECT * FROM (VALUES (1))") + self.validate_identity("START TRANSACTION READ WRITE, ISOLATION LEVEL SERIALIZABLE") + self.validate_identity("START TRANSACTION ISOLATION LEVEL REPEATABLE READ") + self.validate_identity("APPROX_PERCENTILE(a, b, c, d)") + self.validate_identity( + "SELECT SPLIT_TO_MAP('a:1;b:2;a:3', ';', ':', (k, v1, v2) -> CONCAT(v1, v2))" + ) + self.validate_identity( + "SELECT * FROM example.testdb.customer_orders FOR VERSION AS OF 8954597067493422955" + ) + self.validate_identity( + "SELECT * FROM example.testdb.customer_orders FOR TIMESTAMP AS OF CAST('2022-03-23 09:59:29.803 Europe/Vienna' AS TIMESTAMP)" + ) + self.validate_all( "SELECT MAX_BY(a.id, a.timestamp) FROM a", read={ @@ -1044,3 +1064,61 @@ MATCH_RECOGNIZE ( )""", pretty=True, ) + + def test_to_char(self): + self.validate_all( + "TO_CHAR(ts, 'dd')", + write={ + "bigquery": "FORMAT_DATE('%d', ts)", + "presto": "DATE_FORMAT(ts, '%d')", + }, + ) + self.validate_all( + "TO_CHAR(ts, 'hh')", + write={ + "bigquery": "FORMAT_DATE('%H', ts)", + "presto": "DATE_FORMAT(ts, '%H')", + }, + ) + self.validate_all( + "TO_CHAR(ts, 'hh24')", + write={ + "bigquery": "FORMAT_DATE('%H', ts)", + "presto": "DATE_FORMAT(ts, '%H')", + }, + ) + self.validate_all( + "TO_CHAR(ts, 'mi')", + write={ + "bigquery": "FORMAT_DATE('%M', ts)", + "presto": "DATE_FORMAT(ts, '%i')", + }, + ) + self.validate_all( + "TO_CHAR(ts, 'mm')", + write={ + "bigquery": "FORMAT_DATE('%m', ts)", + "presto": "DATE_FORMAT(ts, '%m')", + }, + ) + self.validate_all( + "TO_CHAR(ts, 'ss')", + write={ + "bigquery": "FORMAT_DATE('%S', ts)", + "presto": "DATE_FORMAT(ts, '%s')", + }, + ) + self.validate_all( + "TO_CHAR(ts, 'yyyy')", + write={ + "bigquery": "FORMAT_DATE('%Y', ts)", + "presto": "DATE_FORMAT(ts, '%Y')", + }, + ) + self.validate_all( + "TO_CHAR(ts, 'yy')", + write={ + "bigquery": "FORMAT_DATE('%y', ts)", + "presto": "DATE_FORMAT(ts, '%y')", + }, + ) diff --git a/tests/dialects/test_snowflake.py b/tests/dialects/test_snowflake.py index 997c27b..4d8168a 100644 --- a/tests/dialects/test_snowflake.py +++ b/tests/dialects/test_snowflake.py @@ -36,6 +36,8 @@ WHERE )""", ) + self.validate_identity("SELECT TO_ARRAY(CAST(x AS ARRAY))") + self.validate_identity("SELECT TO_ARRAY(CAST(['test'] AS VARIANT))") self.validate_identity("SELECT user_id, value FROM table_name sample ($s) SEED (0)") self.validate_identity("SELECT ARRAY_UNIQUE_AGG(x)") self.validate_identity("SELECT OBJECT_CONSTRUCT()") @@ -72,6 +74,18 @@ WHERE self.validate_identity( 'DESCRIBE TABLE "SNOWFLAKE_SAMPLE_DATA"."TPCDS_SF100TCL"."WEB_SITE" type=stage' ) + self.validate_identity( + "SELECT * FROM foo at", + "SELECT * FROM foo AS at", + ) + self.validate_identity( + "SELECT * FROM foo before", + "SELECT * FROM foo AS before", + ) + self.validate_identity( + "SELECT * FROM foo at (col)", + "SELECT * FROM foo AS at(col)", + ) self.validate_identity( "SELECT * FROM unnest(x) with ordinality", "SELECT * FROM TABLE(FLATTEN(INPUT => x)) AS _u(seq, key, path, index, value, this)", @@ -115,11 +129,37 @@ WHERE "SELECT TO_TIMESTAMP(x) FROM t", "SELECT CAST(x AS TIMESTAMPNTZ) FROM t", ) + self.validate_identity( + "CAST(x AS BYTEINT)", + "CAST(x AS INT)", + ) + self.validate_identity( + "CAST(x AS CHAR VARYING)", + "CAST(x AS VARCHAR)", + ) + self.validate_identity( + "CAST(x AS CHARACTER VARYING)", + "CAST(x AS VARCHAR)", + ) + self.validate_identity( + "CAST(x AS NCHAR VARYING)", + "CAST(x AS VARCHAR)", + ) - self.validate_all("CAST(x AS BYTEINT)", write={"snowflake": "CAST(x AS INT)"}) - self.validate_all("CAST(x AS CHAR VARYING)", write={"snowflake": "CAST(x AS VARCHAR)"}) - self.validate_all("CAST(x AS CHARACTER VARYING)", write={"snowflake": "CAST(x AS VARCHAR)"}) - self.validate_all("CAST(x AS NCHAR VARYING)", write={"snowflake": "CAST(x AS VARCHAR)"}) + self.validate_all( + "SELECT TO_ARRAY(['test'])", + write={ + "snowflake": "SELECT TO_ARRAY(['test'])", + "spark": "SELECT ARRAY('test')", + }, + ) + self.validate_all( + "SELECT TO_ARRAY(['test'])", + write={ + "snowflake": "SELECT TO_ARRAY(['test'])", + "spark": "SELECT ARRAY('test')", + }, + ) self.validate_all( # We need to qualify the columns in this query because "value" would be ambiguous 'WITH t(x, "value") AS (SELECT [1, 2, 3], 1) SELECT IFF(_u.pos = _u_2.pos_2, _u_2."value", NULL) AS "value" FROM t, TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(t.x)) - 1) + 1))) AS _u(seq, key, path, index, pos, this) CROSS JOIN TABLE(FLATTEN(INPUT => t.x)) AS _u_2(seq, key, path, pos_2, "value", this) WHERE _u.pos = _u_2.pos_2 OR (_u.pos > (ARRAY_SIZE(t.x) - 1) AND _u_2.pos_2 = (ARRAY_SIZE(t.x) - 1))', @@ -489,8 +529,8 @@ WHERE self.validate_all( "TO_ARRAY(x)", write={ - "spark": "ARRAY(x)", - "snowflake": "[x]", + "spark": "IF(x IS NULL, NULL, ARRAY(x))", + "snowflake": "TO_ARRAY(x)", }, ) self.validate_all( @@ -626,6 +666,10 @@ WHERE "SELECT * FROM @mystage t (c1)", "SELECT * FROM @mystage AS t(c1)", ) + self.validate_identity( + "SELECT * FROM @foo/bar (PATTERN => 'test', FILE_FORMAT => ds_sandbox.test.my_csv_format) AS bla", + "SELECT * FROM @foo/bar (FILE_FORMAT => ds_sandbox.test.my_csv_format, PATTERN => 'test') AS bla", + ) def test_sample(self): self.validate_identity("SELECT * FROM testtable TABLESAMPLE BERNOULLI (20.3)") @@ -775,6 +819,53 @@ WHERE }, ) + def test_historical_data(self): + self.validate_identity("SELECT * FROM my_table AT (STATEMENT => $query_id_var)") + self.validate_identity("SELECT * FROM my_table AT (OFFSET => -60 * 5)") + self.validate_identity("SELECT * FROM my_table BEFORE (STATEMENT => $query_id_var)") + self.validate_identity("SELECT * FROM my_table BEFORE (OFFSET => -60 * 5)") + self.validate_identity("CREATE SCHEMA restored_schema CLONE my_schema AT (OFFSET => -3600)") + self.validate_identity( + "CREATE TABLE restored_table CLONE my_table AT (TIMESTAMP => CAST('Sat, 09 May 2015 01:01:00 +0300' AS TIMESTAMPTZ))", + ) + self.validate_identity( + "CREATE DATABASE restored_db CLONE my_db BEFORE (STATEMENT => '8e5d0ca9-005e-44e6-b858-a8f5b37c5726')" + ) + self.validate_identity( + "SELECT * FROM my_table AT (TIMESTAMP => TO_TIMESTAMP(1432669154242, 3))" + ) + self.validate_identity( + "SELECT * FROM my_table AT (OFFSET => -60 * 5) AS T WHERE T.flag = 'valid'" + ) + self.validate_identity( + "SELECT * FROM my_table AT (STATEMENT => '8e5d0ca9-005e-44e6-b858-a8f5b37c5726')" + ) + self.validate_identity( + "SELECT * FROM my_table BEFORE (STATEMENT => '8e5d0ca9-005e-44e6-b858-a8f5b37c5726')" + ) + self.validate_identity( + "SELECT * FROM my_table AT (TIMESTAMP => 'Fri, 01 May 2015 16:20:00 -0700'::timestamp)", + "SELECT * FROM my_table AT (TIMESTAMP => CAST('Fri, 01 May 2015 16:20:00 -0700' AS TIMESTAMPNTZ))", + ) + self.validate_identity( + "SELECT * FROM my_table AT(TIMESTAMP => 'Fri, 01 May 2015 16:20:00 -0700'::timestamp_tz)", + "SELECT * FROM my_table AT (TIMESTAMP => CAST('Fri, 01 May 2015 16:20:00 -0700' AS TIMESTAMPTZ))", + ) + self.validate_identity( + "SELECT * FROM my_table BEFORE (TIMESTAMP => 'Fri, 01 May 2015 16:20:00 -0700'::timestamp_tz);", + "SELECT * FROM my_table BEFORE (TIMESTAMP => CAST('Fri, 01 May 2015 16:20:00 -0700' AS TIMESTAMPTZ))", + ) + self.validate_identity( + """ + SELECT oldt.* , newt.* + FROM my_table BEFORE(STATEMENT => '8e5d0ca9-005e-44e6-b858-a8f5b37c5726') AS oldt + FULL OUTER JOIN my_table AT(STATEMENT => '8e5d0ca9-005e-44e6-b858-a8f5b37c5726') AS newt + ON oldt.id = newt.id + WHERE oldt.id IS NULL OR newt.id IS NULL; + """, + "SELECT oldt.*, newt.* FROM my_table BEFORE (STATEMENT => '8e5d0ca9-005e-44e6-b858-a8f5b37c5726') AS oldt FULL OUTER JOIN my_table AT (STATEMENT => '8e5d0ca9-005e-44e6-b858-a8f5b37c5726') AS newt ON oldt.id = newt.id WHERE oldt.id IS NULL OR newt.id IS NULL", + ) + def test_ddl(self): self.validate_identity( """create external table et2( diff --git a/tests/dialects/test_spark.py b/tests/dialects/test_spark.py index fe37027..24f68f8 100644 --- a/tests/dialects/test_spark.py +++ b/tests/dialects/test_spark.py @@ -75,7 +75,7 @@ class TestSpark(Validator): col_a INTEGER, date VARCHAR ) -COMMENT='Test comment: blah' +COMMENT 'Test comment: blah' WITH ( PARTITIONED_BY=ARRAY['date'], FORMAT='ICEBERG', diff --git a/tests/dialects/test_tsql.py b/tests/dialects/test_tsql.py index 07179ef..a2569ab 100644 --- a/tests/dialects/test_tsql.py +++ b/tests/dialects/test_tsql.py @@ -20,6 +20,12 @@ class TestTSQL(Validator): self.validate_identity("1 AND true", "1 <> 0 AND (1 = 1)") self.validate_identity("CAST(x AS int) OR y", "CAST(x AS INTEGER) <> 0 OR y <> 0") + self.validate_all( + "SELECT TOP 1 * FROM (SELECT x FROM t1 UNION ALL SELECT x FROM t2) AS _l_0", + read={ + "": "SELECT x FROM t1 UNION ALL SELECT x FROM t2 LIMIT 1", + }, + ) self.validate_all( "WITH t(c) AS (SELECT 1) SELECT * INTO foo FROM (SELECT c AS c FROM t) AS temp", read={ -- cgit v1.2.3