From 38e6461a8afbd7cb83709ddb998f03d40ba87755 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Tue, 23 Jan 2024 06:06:14 +0100 Subject: Merging upstream version 20.9.0. Signed-off-by: Daniel Baumann --- tests/dialects/test_snowflake.py | 211 +++++++++++++++++++++++++++++++++------ 1 file changed, 183 insertions(+), 28 deletions(-) (limited to 'tests/dialects/test_snowflake.py') diff --git a/tests/dialects/test_snowflake.py b/tests/dialects/test_snowflake.py index 4d8168a..0882290 100644 --- a/tests/dialects/test_snowflake.py +++ b/tests/dialects/test_snowflake.py @@ -10,6 +10,9 @@ class TestSnowflake(Validator): dialect = "snowflake" def test_snowflake(self): + self.validate_identity( + "INSERT OVERWRITE TABLE t SELECT 1", "INSERT OVERWRITE INTO t SELECT 1" + ) self.validate_identity("SELECT rename, replace") expr = parse_one("SELECT APPROX_TOP_K(C4, 3, 5) FROM t") expr.selects[0].assert_is(exp.AggFunc) @@ -36,9 +39,12 @@ WHERE )""", ) + self.validate_identity("RM @parquet_stage") + self.validate_identity("REMOVE @parquet_stage") + self.validate_identity("SELECT TIMESTAMP_FROM_PARTS(d, t)") + self.validate_identity("SELECT GET_PATH(v, 'attr[0].name') FROM vartab") self.validate_identity("SELECT TO_ARRAY(CAST(x AS ARRAY))") self.validate_identity("SELECT TO_ARRAY(CAST(['test'] AS VARIANT))") - self.validate_identity("SELECT user_id, value FROM table_name sample ($s) SEED (0)") self.validate_identity("SELECT ARRAY_UNIQUE_AGG(x)") self.validate_identity("SELECT OBJECT_CONSTRUCT()") self.validate_identity("SELECT DAYOFMONTH(CURRENT_TIMESTAMP())") @@ -74,6 +80,49 @@ WHERE self.validate_identity( 'DESCRIBE TABLE "SNOWFLAKE_SAMPLE_DATA"."TPCDS_SF100TCL"."WEB_SITE" type=stage' ) + self.validate_identity( + "SELECT a FROM test PIVOT(SUM(x) FOR y IN ('z', 'q')) AS x TABLESAMPLE (0.1)" + ) + self.validate_identity( + """SELECT PARSE_JSON('{"x": "hello"}'):x LIKE 'hello'""", + """SELECT GET_PATH(PARSE_JSON('{"x": "hello"}'), 'x') LIKE 'hello'""", + ) + self.validate_identity( + """SELECT data:x LIKE 'hello' FROM some_table""", + """SELECT GET_PATH(data, 'x') LIKE 'hello' FROM some_table""", + ) + self.validate_identity( + "SELECT SUM({ fn CONVERT(123, SQL_DOUBLE) })", + "SELECT SUM(CAST(123 AS DOUBLE))", + ) + self.validate_identity( + "SELECT SUM({ fn CONVERT(123, SQL_VARCHAR) })", + "SELECT SUM(CAST(123 AS VARCHAR))", + ) + self.validate_identity( + "SELECT TIMESTAMPFROMPARTS(d, t)", + "SELECT TIMESTAMP_FROM_PARTS(d, t)", + ) + self.validate_identity( + "SELECT user_id, value FROM table_name SAMPLE ($s) SEED (0)", + "SELECT user_id, value FROM table_name TABLESAMPLE ($s) SEED (0)", + ) + self.validate_identity( + "SELECT v:attr[0].name FROM vartab", + "SELECT GET_PATH(v, 'attr[0].name') FROM vartab", + ) + self.validate_identity( + 'SELECT v:"fruit" FROM vartab', + """SELECT GET_PATH(v, '"fruit"') FROM vartab""", + ) + self.validate_identity( + "v:attr[0]:name", + "GET_PATH(GET_PATH(v, 'attr[0]'), 'name')", + ) + self.validate_identity( + """SELECT PARSE_JSON('{"food":{"fruit":"banana"}}'):food.fruit::VARCHAR""", + """SELECT CAST(GET_PATH(PARSE_JSON('{"food":{"fruit":"banana"}}'), 'food.fruit') AS VARCHAR)""", + ) self.validate_identity( "SELECT * FROM foo at", "SELECT * FROM foo AS at", @@ -146,6 +195,80 @@ WHERE "CAST(x AS VARCHAR)", ) + self.validate_all( + "OBJECT_CONSTRUCT_KEEP_NULL('key_1', 'one', 'key_2', NULL)", + read={ + "bigquery": "JSON_OBJECT(['key_1', 'key_2'], ['one', NULL])", + "duckdb": "JSON_OBJECT('key_1', 'one', 'key_2', NULL)", + }, + write={ + "bigquery": "JSON_OBJECT('key_1', 'one', 'key_2', NULL)", + "duckdb": "JSON_OBJECT('key_1', 'one', 'key_2', NULL)", + "snowflake": "OBJECT_CONSTRUCT_KEEP_NULL('key_1', 'one', 'key_2', NULL)", + }, + ) + self.validate_all( + "SELECT * FROM example TABLESAMPLE (3) SEED (82)", + read={ + "databricks": "SELECT * FROM example TABLESAMPLE (3 PERCENT) REPEATABLE (82)", + "duckdb": "SELECT * FROM example TABLESAMPLE (3 PERCENT) REPEATABLE (82)", + }, + write={ + "databricks": "SELECT * FROM example TABLESAMPLE (3 PERCENT) REPEATABLE (82)", + "duckdb": "SELECT * FROM example TABLESAMPLE (3 PERCENT) REPEATABLE (82)", + "snowflake": "SELECT * FROM example TABLESAMPLE (3) SEED (82)", + }, + ) + self.validate_all( + "SELECT TIME_FROM_PARTS(12, 34, 56, 987654321)", + write={ + "duckdb": "SELECT MAKE_TIME(12, 34, 56 + (987654321 / 1000000000.0))", + "snowflake": "SELECT TIME_FROM_PARTS(12, 34, 56, 987654321)", + }, + ) + self.validate_all( + "SELECT TIMESTAMP_FROM_PARTS(2013, 4, 5, 12, 00, 00)", + read={ + "duckdb": "SELECT MAKE_TIMESTAMP(2013, 4, 5, 12, 00, 00)", + }, + write={ + "duckdb": "SELECT MAKE_TIMESTAMP(2013, 4, 5, 12, 00, 00)", + "snowflake": "SELECT TIMESTAMP_FROM_PARTS(2013, 4, 5, 12, 00, 00)", + }, + ) + self.validate_all( + """WITH vartab(v) AS (select parse_json('[{"attr": [{"name": "banana"}]}]')) SELECT GET_PATH(v, '[0].attr[0].name') FROM vartab""", + write={ + "bigquery": """WITH vartab AS (SELECT PARSE_JSON('[{"attr": [{"name": "banana"}]}]') AS v) SELECT JSON_EXTRACT(v, '$[0].attr[0].name') FROM vartab""", + "duckdb": """WITH vartab(v) AS (SELECT JSON('[{"attr": [{"name": "banana"}]}]')) SELECT v -> '$[0].attr[0].name' FROM vartab""", + "mysql": """WITH vartab(v) AS (SELECT '[{"attr": [{"name": "banana"}]}]') SELECT JSON_EXTRACT(v, '$[0].attr[0].name') FROM vartab""", + "presto": """WITH vartab(v) AS (SELECT JSON_PARSE('[{"attr": [{"name": "banana"}]}]')) SELECT JSON_EXTRACT(v, '$[0].attr[0].name') FROM vartab""", + "snowflake": """WITH vartab(v) AS (SELECT PARSE_JSON('[{"attr": [{"name": "banana"}]}]')) SELECT GET_PATH(v, '[0].attr[0].name') FROM vartab""", + "tsql": """WITH vartab(v) AS (SELECT '[{"attr": [{"name": "banana"}]}]') SELECT JSON_VALUE(v, '$[0].attr[0].name') FROM vartab""", + }, + ) + self.validate_all( + """WITH vartab(v) AS (select parse_json('{"attr": [{"name": "banana"}]}')) SELECT GET_PATH(v, 'attr[0].name') FROM vartab""", + write={ + "bigquery": """WITH vartab AS (SELECT PARSE_JSON('{"attr": [{"name": "banana"}]}') AS v) SELECT JSON_EXTRACT(v, '$.attr[0].name') FROM vartab""", + "duckdb": """WITH vartab(v) AS (SELECT JSON('{"attr": [{"name": "banana"}]}')) SELECT v -> '$.attr[0].name' FROM vartab""", + "mysql": """WITH vartab(v) AS (SELECT '{"attr": [{"name": "banana"}]}') SELECT JSON_EXTRACT(v, '$.attr[0].name') FROM vartab""", + "presto": """WITH vartab(v) AS (SELECT JSON_PARSE('{"attr": [{"name": "banana"}]}')) SELECT JSON_EXTRACT(v, '$.attr[0].name') FROM vartab""", + "snowflake": """WITH vartab(v) AS (SELECT PARSE_JSON('{"attr": [{"name": "banana"}]}')) SELECT GET_PATH(v, 'attr[0].name') FROM vartab""", + "tsql": """WITH vartab(v) AS (SELECT '{"attr": [{"name": "banana"}]}') SELECT JSON_VALUE(v, '$.attr[0].name') FROM vartab""", + }, + ) + self.validate_all( + """SELECT PARSE_JSON('{"fruit":"banana"}'):fruit""", + write={ + "bigquery": """SELECT JSON_EXTRACT(PARSE_JSON('{"fruit":"banana"}'), '$.fruit')""", + "duckdb": """SELECT JSON('{"fruit":"banana"}') -> '$.fruit'""", + "mysql": """SELECT JSON_EXTRACT('{"fruit":"banana"}', '$.fruit')""", + "presto": """SELECT JSON_EXTRACT(JSON_PARSE('{"fruit":"banana"}'), '$.fruit')""", + "snowflake": """SELECT GET_PATH(PARSE_JSON('{"fruit":"banana"}'), 'fruit')""", + "tsql": """SELECT JSON_VALUE('{"fruit":"banana"}', '$.fruit')""", + }, + ) self.validate_all( "SELECT TO_ARRAY(['test'])", write={ @@ -162,7 +285,7 @@ WHERE ) self.validate_all( # We need to qualify the columns in this query because "value" would be ambiguous - 'WITH t(x, "value") AS (SELECT [1, 2, 3], 1) SELECT IFF(_u.pos = _u_2.pos_2, _u_2."value", NULL) AS "value" FROM t, TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(t.x)) - 1) + 1))) AS _u(seq, key, path, index, pos, this) CROSS JOIN TABLE(FLATTEN(INPUT => t.x)) AS _u_2(seq, key, path, pos_2, "value", this) WHERE _u.pos = _u_2.pos_2 OR (_u.pos > (ARRAY_SIZE(t.x) - 1) AND _u_2.pos_2 = (ARRAY_SIZE(t.x) - 1))', + 'WITH t(x, "value") AS (SELECT [1, 2, 3], 1) SELECT IFF(_u.pos = _u_2.pos_2, _u_2."value", NULL) AS "value" FROM t CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(t.x)) - 1) + 1))) AS _u(seq, key, path, index, pos, this) CROSS JOIN TABLE(FLATTEN(INPUT => t.x)) AS _u_2(seq, key, path, pos_2, "value", this) WHERE _u.pos = _u_2.pos_2 OR (_u.pos > (ARRAY_SIZE(t.x) - 1) AND _u_2.pos_2 = (ARRAY_SIZE(t.x) - 1))', read={ "duckdb": 'WITH t(x, "value") AS (SELECT [1,2,3], 1) SELECT UNNEST(t.x) AS "value" FROM t', }, @@ -172,7 +295,7 @@ WHERE write={ "duckdb": "SELECT {'Manitoba': 'Winnipeg', 'foo': 'bar'} AS province_capital", "snowflake": "SELECT OBJECT_CONSTRUCT('Manitoba', 'Winnipeg', 'foo', 'bar') AS province_capital", - "spark": "SELECT STRUCT('Manitoba' AS Winnipeg, 'foo' AS bar) AS province_capital", + "spark": "SELECT STRUCT('Winnipeg' AS Manitoba, 'bar' AS foo) AS province_capital", }, ) self.validate_all( @@ -418,13 +541,11 @@ WHERE }, ) self.validate_all( - 'x:a:"b c"', + '''SELECT PARSE_JSON('{"a": {"b c": "foo"}}'):a:"b c"''', write={ - "duckdb": "x['a']['b c']", - "hive": "x['a']['b c']", - "presto": "x['a']['b c']", - "snowflake": "x['a']['b c']", - "spark": "x['a']['b c']", + "duckdb": """SELECT JSON('{"a": {"b c": "foo"}}') -> '$.a' -> '$."b c"'""", + "mysql": """SELECT JSON_EXTRACT(JSON_EXTRACT('{"a": {"b c": "foo"}}', '$.a'), '$."b c"')""", + "snowflake": """SELECT GET_PATH(GET_PATH(PARSE_JSON('{"a": {"b c": "foo"}}'), 'a'), '"b c"')""", }, ) self.validate_all( @@ -457,6 +578,14 @@ WHERE "spark": "SELECT TIMESTAMP_MILLIS(1659981729000)", }, ) + self.validate_all( + "SELECT TO_TIMESTAMP(16599817290000, 4)", + write={ + "bigquery": "SELECT TIMESTAMP_SECONDS(CAST(16599817290000 / POW(10, 4) AS INT64))", + "snowflake": "SELECT TO_TIMESTAMP(16599817290000, 4)", + "spark": "SELECT TIMESTAMP_SECONDS(16599817290000 / POW(10, 4))", + }, + ) self.validate_all( "SELECT TO_TIMESTAMP('1659981729')", write={ @@ -467,11 +596,11 @@ WHERE self.validate_all( "SELECT TO_TIMESTAMP(1659981729000000000, 9)", write={ - "bigquery": "SELECT TIMESTAMP_MICROS(CAST(1659981729000000000 / 1000 AS INT64))", - "duckdb": "SELECT TO_TIMESTAMP(1659981729000000000 / 1000000000)", - "presto": "SELECT FROM_UNIXTIME(CAST(1659981729000000000 AS DOUBLE) / 1000000000)", + "bigquery": "SELECT TIMESTAMP_SECONDS(CAST(1659981729000000000 / POW(10, 9) AS INT64))", + "duckdb": "SELECT TO_TIMESTAMP(1659981729000000000 / POW(10, 9))", + "presto": "SELECT FROM_UNIXTIME(CAST(1659981729000000000 AS DOUBLE) / POW(10, 9))", "snowflake": "SELECT TO_TIMESTAMP(1659981729000000000, 9)", - "spark": "SELECT TIMESTAMP_SECONDS(1659981729000000000 / 1000000000)", + "spark": "SELECT TIMESTAMP_SECONDS(1659981729000000000 / POW(10, 9))", }, ) self.validate_all( @@ -660,7 +789,7 @@ WHERE ) self.validate_identity( "SELECT parse_json($1):a.b FROM @mystage2/data1.json.gz", - "SELECT PARSE_JSON($1)['a'].b FROM @mystage2/data1.json.gz", + "SELECT GET_PATH(PARSE_JSON($1), 'a.b') FROM @mystage2/data1.json.gz", ) self.validate_identity( "SELECT * FROM @mystage t (c1)", @@ -676,15 +805,24 @@ WHERE self.validate_identity("SELECT * FROM testtable TABLESAMPLE (100)") self.validate_identity("SELECT * FROM testtable TABLESAMPLE SYSTEM (3) SEED (82)") self.validate_identity("SELECT * FROM testtable TABLESAMPLE (10 ROWS)") - self.validate_identity("SELECT * FROM testtable SAMPLE (10)") - self.validate_identity("SELECT * FROM testtable SAMPLE ROW (0)") - self.validate_identity("SELECT a FROM test SAMPLE BLOCK (0.5) SEED (42)") self.validate_identity( "SELECT i, j FROM table1 AS t1 INNER JOIN table2 AS t2 TABLESAMPLE (50) WHERE t2.j = t1.i" ) self.validate_identity( "SELECT * FROM (SELECT * FROM t1 JOIN t2 ON t1.a = t2.c) TABLESAMPLE (1)" ) + self.validate_identity( + "SELECT * FROM testtable SAMPLE (10)", + "SELECT * FROM testtable TABLESAMPLE (10)", + ) + self.validate_identity( + "SELECT * FROM testtable SAMPLE ROW (0)", + "SELECT * FROM testtable TABLESAMPLE ROW (0)", + ) + self.validate_identity( + "SELECT a FROM test SAMPLE BLOCK (0.5) SEED (42)", + "SELECT a FROM test TABLESAMPLE BLOCK (0.5) SEED (42)", + ) self.validate_all( """ @@ -695,20 +833,20 @@ WHERE table2 AS t2 SAMPLE (50) -- 50% of rows in table2 WHERE t2.j = t1.i""", write={ - "snowflake": "SELECT i, j FROM table1 AS t1 SAMPLE (25) /* 25% of rows in table1 */ INNER JOIN table2 AS t2 SAMPLE (50) /* 50% of rows in table2 */ WHERE t2.j = t1.i", + "snowflake": "SELECT i, j FROM table1 AS t1 TABLESAMPLE (25) /* 25% of rows in table1 */ INNER JOIN table2 AS t2 TABLESAMPLE (50) /* 50% of rows in table2 */ WHERE t2.j = t1.i", }, ) self.validate_all( "SELECT * FROM testtable SAMPLE BLOCK (0.012) REPEATABLE (99992)", write={ - "snowflake": "SELECT * FROM testtable SAMPLE BLOCK (0.012) SEED (99992)", + "snowflake": "SELECT * FROM testtable TABLESAMPLE BLOCK (0.012) SEED (99992)", }, ) self.validate_all( "SELECT * FROM (SELECT * FROM t1 join t2 on t1.a = t2.c) SAMPLE (1)", write={ - "snowflake": "SELECT * FROM (SELECT * FROM t1 JOIN t2 ON t1.a = t2.c) SAMPLE (1)", - "spark": "SELECT * FROM (SELECT * FROM t1 JOIN t2 ON t1.a = t2.c) SAMPLE (1 PERCENT)", + "snowflake": "SELECT * FROM (SELECT * FROM t1 JOIN t2 ON t1.a = t2.c) TABLESAMPLE (1)", + "spark": "SELECT * FROM (SELECT * FROM t1 JOIN t2 ON t1.a = t2.c) TABLESAMPLE (1 PERCENT)", }, ) @@ -791,6 +929,11 @@ WHERE "DATEDIFF(DAY, CAST('2007-12-25' AS DATE), CAST('2008-12-25' AS DATE))", ) + self.validate_identity("DATEADD(y, 5, x)", "DATEADD(YEAR, 5, x)") + self.validate_identity("DATEADD(y, 5, x)", "DATEADD(YEAR, 5, x)") + self.validate_identity("DATE_PART(yyy, x)", "DATE_PART(YEAR, x)") + self.validate_identity("DATE_TRUNC(yr, x)", "DATE_TRUNC('YEAR', x)") + def test_semi_structured_types(self): self.validate_identity("SELECT CAST(a AS VARIANT)") self.validate_identity("SELECT CAST(a AS ARRAY)") @@ -876,7 +1019,7 @@ WHERE location=@s2/logs/ partition_type = user_specified file_format = (type = parquet)""", - "CREATE EXTERNAL TABLE et2 (col1 DATE AS (CAST(PARSE_JSON(metadata$external_table_partition)['COL1'] AS DATE)), col2 VARCHAR AS (CAST(PARSE_JSON(metadata$external_table_partition)['COL2'] AS VARCHAR)), col3 DECIMAL AS (CAST(PARSE_JSON(metadata$external_table_partition)['COL3'] AS DECIMAL))) LOCATION @s2/logs/ PARTITION BY (col1, col2, col3) partition_type=user_specified file_format=(type = parquet)", + "CREATE EXTERNAL TABLE et2 (col1 DATE AS (CAST(GET_PATH(PARSE_JSON(metadata$external_table_partition), 'COL1') AS DATE)), col2 VARCHAR AS (CAST(GET_PATH(PARSE_JSON(metadata$external_table_partition), 'COL2') AS VARCHAR)), col3 DECIMAL AS (CAST(GET_PATH(PARSE_JSON(metadata$external_table_partition), 'COL3') AS DECIMAL))) LOCATION @s2/logs/ PARTITION BY (col1, col2, col3) partition_type=user_specified file_format=(type = parquet)", ) self.validate_identity("CREATE OR REPLACE VIEW foo (uid) COPY GRANTS AS (SELECT 1)") self.validate_identity("CREATE TABLE geospatial_table (id INT, g GEOGRAPHY)") @@ -1092,9 +1235,9 @@ FROM cs.telescope.dag_report, TABLE(FLATTEN(input => SPLIT(operators, ','))) AS "snowflake": """SELECT id AS "ID", f.value AS "Contact", - f1.value['type'] AS "Type", - f1.value['content'] AS "Details" -FROM persons AS p, LATERAL FLATTEN(input => p.c, path => 'contact') AS f(SEQ, KEY, PATH, INDEX, VALUE, THIS), LATERAL FLATTEN(input => f.value['business']) AS f1(SEQ, KEY, PATH, INDEX, VALUE, THIS)""", + GET_PATH(f1.value, 'type') AS "Type", + GET_PATH(f1.value, 'content') AS "Details" +FROM persons AS p, LATERAL FLATTEN(input => p.c, path => 'contact') AS f(SEQ, KEY, PATH, INDEX, VALUE, THIS), LATERAL FLATTEN(input => GET_PATH(f.value, 'business')) AS f1(SEQ, KEY, PATH, INDEX, VALUE, THIS)""", }, pretty=True, ) @@ -1306,10 +1449,9 @@ MATCH_RECOGNIZE ( def test_show(self): # Parsed as Command - self.validate_identity("SHOW COLUMNS IN TABLE dt_test") self.validate_identity("SHOW TABLES LIKE 'line%' IN tpch.public") - ast = parse_one("SHOW TABLES HISTORY IN tpch.public") + ast = parse_one("SHOW TABLES HISTORY IN tpch.public", read="snowflake") self.assertIsInstance(ast, exp.Command) # Parsed as Show @@ -1331,9 +1473,22 @@ MATCH_RECOGNIZE ( ast = parse_one('SHOW PRIMARY KEYS IN "TEST"."PUBLIC"."customers"', read="snowflake") table = ast.find(exp.Table) - self.assertIsNotNone(table) self.assertEqual(table.sql(dialect="snowflake"), '"TEST"."PUBLIC"."customers"') + self.validate_identity("SHOW COLUMNS") + self.validate_identity("SHOW COLUMNS IN TABLE dt_test") + self.validate_identity("SHOW COLUMNS LIKE '_foo%' IN TABLE dt_test") + self.validate_identity("SHOW COLUMNS IN VIEW") + self.validate_identity("SHOW COLUMNS LIKE '_foo%' IN VIEW dt_test") + + ast = parse_one("SHOW COLUMNS LIKE '_testing%' IN dt_test", read="snowflake") + table = ast.find(exp.Table) + literal = ast.find(exp.Literal) + + self.assertEqual(table.sql(dialect="snowflake"), "dt_test") + + self.assertEqual(literal.sql(dialect="snowflake"), "'_testing%'") + def test_swap(self): ast = parse_one("ALTER TABLE a SWAP WITH b", read="snowflake") assert isinstance(ast, exp.AlterTable) -- cgit v1.2.3