From 3d48060515ba25b4c49d975a520ee0682327d1b7 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 16 Feb 2024 06:45:52 +0100 Subject: Merging upstream version 21.1.1. Signed-off-by: Daniel Baumann --- tests/dialects/test_bigquery.py | 266 +++++++++++++++++++--------------------- 1 file changed, 123 insertions(+), 143 deletions(-) (limited to 'tests/dialects/test_bigquery.py') diff --git a/tests/dialects/test_bigquery.py b/tests/dialects/test_bigquery.py index 5cc5480..f231179 100644 --- a/tests/dialects/test_bigquery.py +++ b/tests/dialects/test_bigquery.py @@ -18,78 +18,6 @@ class TestBigQuery(Validator): maxDiff = None def test_bigquery(self): - self.validate_identity("ARRAY_AGG(x IGNORE NULLS LIMIT 1)") - self.validate_identity("ARRAY_AGG(x IGNORE NULLS ORDER BY x LIMIT 1)") - self.validate_identity("ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY x LIMIT 1)") - self.validate_identity("ARRAY_AGG(x IGNORE NULLS)") - self.validate_identity("ARRAY_AGG(DISTINCT x IGNORE NULLS HAVING MAX x ORDER BY x LIMIT 1)") - - self.validate_all( - "SELECT SUM(x IGNORE NULLS) AS x", - read={ - "bigquery": "SELECT SUM(x IGNORE NULLS) AS x", - "duckdb": "SELECT SUM(x IGNORE NULLS) AS x", - "postgres": "SELECT SUM(x) IGNORE NULLS AS x", - "spark": "SELECT SUM(x) IGNORE NULLS AS x", - "snowflake": "SELECT SUM(x) IGNORE NULLS AS x", - }, - write={ - "bigquery": "SELECT SUM(x IGNORE NULLS) AS x", - "duckdb": "SELECT SUM(x IGNORE NULLS) AS x", - "postgres": "SELECT SUM(x) IGNORE NULLS AS x", - "spark": "SELECT SUM(x) IGNORE NULLS AS x", - "snowflake": "SELECT SUM(x) IGNORE NULLS AS x", - }, - ) - self.validate_all( - "SELECT SUM(x RESPECT NULLS) AS x", - read={ - "bigquery": "SELECT SUM(x RESPECT NULLS) AS x", - "duckdb": "SELECT SUM(x RESPECT NULLS) AS x", - "postgres": "SELECT SUM(x) RESPECT NULLS AS x", - "spark": "SELECT SUM(x) RESPECT NULLS AS x", - "snowflake": "SELECT SUM(x) RESPECT NULLS AS x", - }, - write={ - "bigquery": "SELECT SUM(x RESPECT NULLS) AS x", - "duckdb": "SELECT SUM(x RESPECT NULLS) AS x", - "postgres": "SELECT SUM(x) RESPECT NULLS AS x", - "spark": "SELECT SUM(x) RESPECT NULLS AS x", - "snowflake": "SELECT SUM(x) RESPECT NULLS AS x", - }, - ) - self.validate_all( - "SELECT PERCENTILE_CONT(x, 0.5 RESPECT NULLS) OVER ()", - write={ - "bigquery": "SELECT PERCENTILE_CONT(x, 0.5 RESPECT NULLS) OVER ()", - "duckdb": "SELECT QUANTILE_CONT(x, 0.5 RESPECT NULLS) OVER ()", - "spark": "SELECT PERCENTILE_CONT(x, 0.5) RESPECT NULLS OVER ()", - }, - ) - self.validate_all( - "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a, b DESC LIMIT 10) AS x", - write={ - "bigquery": "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a, b DESC LIMIT 10) AS x", - "duckdb": "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a NULLS FIRST, b DESC LIMIT 10) AS x", - "spark": "SELECT COLLECT_LIST(DISTINCT x ORDER BY a, b DESC LIMIT 10) IGNORE NULLS AS x", - }, - ) - self.validate_all( - "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a, b DESC LIMIT 1, 10) AS x", - write={ - "bigquery": "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a, b DESC LIMIT 1, 10) AS x", - "duckdb": "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a NULLS FIRST, b DESC LIMIT 1, 10) AS x", - "spark": "SELECT COLLECT_LIST(DISTINCT x ORDER BY a, b DESC LIMIT 1, 10) IGNORE NULLS AS x", - }, - ) - self.validate_identity("SELECT COUNT(x RESPECT NULLS)") - self.validate_identity("SELECT LAST_VALUE(x IGNORE NULLS) OVER y AS x") - - self.validate_identity( - "create or replace view test (tenant_id OPTIONS(description='Test description on table creation')) select 1 as tenant_id, 1 as customer_id;", - "CREATE OR REPLACE VIEW test (tenant_id OPTIONS (description='Test description on table creation')) AS SELECT 1 AS tenant_id, 1 AS customer_id", - ) - with self.assertLogs(helper_logger) as cm: statements = parse( """ @@ -131,19 +59,12 @@ class TestBigQuery(Validator): self.validate_all( "a[0]", read={ + "bigquery": "a[0]", "duckdb": "a[1]", "presto": "a[1]", }, ) - self.validate_identity( - "select array_contains([1, 2, 3], 1)", - "SELECT EXISTS(SELECT 1 FROM UNNEST([1, 2, 3]) AS _col WHERE _col = 1)", - ) - self.validate_identity("CREATE SCHEMA x DEFAULT COLLATE 'en'") - self.validate_identity("CREATE TABLE x (y INT64) DEFAULT COLLATE 'en'") - self.validate_identity("PARSE_JSON('{}', wide_number_mode => 'exact')") - with self.assertRaises(TokenError): transpile("'\\'", read="bigquery") @@ -179,6 +100,16 @@ class TestBigQuery(Validator): ) assert "'END FOR'" in cm.output[0] + self.validate_identity("CREATE SCHEMA x DEFAULT COLLATE 'en'") + self.validate_identity("CREATE TABLE x (y INT64) DEFAULT COLLATE 'en'") + self.validate_identity("PARSE_JSON('{}', wide_number_mode => 'exact')") + self.validate_identity("FOO(values)") + self.validate_identity("STRUCT(values AS value)") + self.validate_identity("ARRAY_AGG(x IGNORE NULLS LIMIT 1)") + self.validate_identity("ARRAY_AGG(x IGNORE NULLS ORDER BY x LIMIT 1)") + self.validate_identity("ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY x LIMIT 1)") + self.validate_identity("ARRAY_AGG(x IGNORE NULLS)") + self.validate_identity("ARRAY_AGG(DISTINCT x IGNORE NULLS HAVING MAX x ORDER BY x LIMIT 1)") self.validate_identity("SELECT * FROM dataset.my_table TABLESAMPLE SYSTEM (10 PERCENT)") self.validate_identity("TIME('2008-12-25 15:30:00+08')") self.validate_identity("TIME('2008-12-25 15:30:00+08', 'America/Los_Angeles')") @@ -237,6 +168,13 @@ class TestBigQuery(Validator): self.validate_identity("SELECT TIMESTAMP_SECONDS(2) AS t") self.validate_identity("SELECT TIMESTAMP_MILLIS(2) AS t") self.validate_identity("""SELECT JSON_EXTRACT_SCALAR('{"a": 5}', '$.a')""") + self.validate_identity("UPDATE x SET y = NULL") + self.validate_identity("LOG(n, b)") + self.validate_identity("SELECT COUNT(x RESPECT NULLS)") + self.validate_identity("SELECT LAST_VALUE(x IGNORE NULLS) OVER y AS x") + self.validate_identity( + "SELECT * FROM test QUALIFY a IS DISTINCT FROM b WINDOW c AS (PARTITION BY d)" + ) self.validate_identity( "FOR record IN (SELECT word, word_count FROM bigquery-public-data.samples.shakespeare LIMIT 5) DO SELECT record.word, record.word_count" ) @@ -264,6 +202,14 @@ class TestBigQuery(Validator): self.validate_identity( """SELECT JSON_EXTRACT_SCALAR('5')""", """SELECT JSON_EXTRACT_SCALAR('5', '$')""" ) + self.validate_identity( + "select array_contains([1, 2, 3], 1)", + "SELECT EXISTS(SELECT 1 FROM UNNEST([1, 2, 3]) AS _col WHERE _col = 1)", + ) + self.validate_identity( + "create or replace view test (tenant_id OPTIONS(description='Test description on table creation')) select 1 as tenant_id, 1 as customer_id;", + "CREATE OR REPLACE VIEW test (tenant_id OPTIONS (description='Test description on table creation')) AS SELECT 1 AS tenant_id, 1 AS customer_id", + ) self.validate_identity( "SELECT SPLIT(foo)", "SELECT SPLIT(foo, ',')", @@ -312,7 +258,81 @@ class TestBigQuery(Validator): "SELECT * FROM UNNEST(x) WITH OFFSET EXCEPT DISTINCT SELECT * FROM UNNEST(y) WITH OFFSET", "SELECT * FROM UNNEST(x) WITH OFFSET AS offset EXCEPT DISTINCT SELECT * FROM UNNEST(y) WITH OFFSET AS offset", ) + self.validate_identity( + "SELECT * FROM (SELECT a, b, c FROM test) PIVOT(SUM(b) d, COUNT(*) e FOR c IN ('x', 'y'))", + "SELECT * FROM (SELECT a, b, c FROM test) PIVOT(SUM(b) AS d, COUNT(*) AS e FOR c IN ('x', 'y'))", + ) + self.validate_identity( + r"REGEXP_EXTRACT(svc_plugin_output, r'\\\((.*)')", + r"REGEXP_EXTRACT(svc_plugin_output, '\\\\\\((.*)')", + ) + self.validate_all( + "TIMESTAMP(x)", + write={ + "bigquery": "TIMESTAMP(x)", + "duckdb": "CAST(x AS TIMESTAMPTZ)", + "presto": "CAST(x AS TIMESTAMP WITH TIME ZONE)", + }, + ) + self.validate_all( + "SELECT SUM(x IGNORE NULLS) AS x", + read={ + "bigquery": "SELECT SUM(x IGNORE NULLS) AS x", + "duckdb": "SELECT SUM(x IGNORE NULLS) AS x", + "postgres": "SELECT SUM(x) IGNORE NULLS AS x", + "spark": "SELECT SUM(x) IGNORE NULLS AS x", + "snowflake": "SELECT SUM(x) IGNORE NULLS AS x", + }, + write={ + "bigquery": "SELECT SUM(x IGNORE NULLS) AS x", + "duckdb": "SELECT SUM(x IGNORE NULLS) AS x", + "postgres": "SELECT SUM(x) IGNORE NULLS AS x", + "spark": "SELECT SUM(x) IGNORE NULLS AS x", + "snowflake": "SELECT SUM(x) IGNORE NULLS AS x", + }, + ) + self.validate_all( + "SELECT SUM(x RESPECT NULLS) AS x", + read={ + "bigquery": "SELECT SUM(x RESPECT NULLS) AS x", + "duckdb": "SELECT SUM(x RESPECT NULLS) AS x", + "postgres": "SELECT SUM(x) RESPECT NULLS AS x", + "spark": "SELECT SUM(x) RESPECT NULLS AS x", + "snowflake": "SELECT SUM(x) RESPECT NULLS AS x", + }, + write={ + "bigquery": "SELECT SUM(x RESPECT NULLS) AS x", + "duckdb": "SELECT SUM(x RESPECT NULLS) AS x", + "postgres": "SELECT SUM(x) RESPECT NULLS AS x", + "spark": "SELECT SUM(x) RESPECT NULLS AS x", + "snowflake": "SELECT SUM(x) RESPECT NULLS AS x", + }, + ) + self.validate_all( + "SELECT PERCENTILE_CONT(x, 0.5 RESPECT NULLS) OVER ()", + write={ + "bigquery": "SELECT PERCENTILE_CONT(x, 0.5 RESPECT NULLS) OVER ()", + "duckdb": "SELECT QUANTILE_CONT(x, 0.5 RESPECT NULLS) OVER ()", + "spark": "SELECT PERCENTILE_CONT(x, 0.5) RESPECT NULLS OVER ()", + }, + ) + self.validate_all( + "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a, b DESC LIMIT 10) AS x", + write={ + "bigquery": "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a, b DESC LIMIT 10) AS x", + "duckdb": "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a NULLS FIRST, b DESC LIMIT 10) AS x", + "spark": "SELECT COLLECT_LIST(DISTINCT x ORDER BY a, b DESC LIMIT 10) IGNORE NULLS AS x", + }, + ) + self.validate_all( + "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a, b DESC LIMIT 1, 10) AS x", + write={ + "bigquery": "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a, b DESC LIMIT 1, 10) AS x", + "duckdb": "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a NULLS FIRST, b DESC LIMIT 1, 10) AS x", + "spark": "SELECT COLLECT_LIST(DISTINCT x ORDER BY a, b DESC LIMIT 1, 10) IGNORE NULLS AS x", + }, + ) self.validate_all( "SELECT * FROM Produce UNPIVOT((first_half_sales, second_half_sales) FOR semesters IN ((Q1, Q2) AS 'semester_1', (Q3, Q4) AS 'semester_2'))", read={ @@ -464,7 +484,6 @@ class TestBigQuery(Validator): "duckdb": "SELECT * FROM t WHERE EXISTS(SELECT * FROM UNNEST(nums) AS _t(x) WHERE x > 1)", }, ) - self.validate_identity("UPDATE x SET y = NULL") self.validate_all( "NULL", read={ @@ -620,6 +639,14 @@ class TestBigQuery(Validator): "spark": "WITH cte AS (SELECT ARRAY(1, 2, 3) AS arr) SELECT EXPLODE(arr) FROM cte" }, ) + self.validate_all( + "SELECT IF(pos = pos_2, col, NULL) AS col FROM UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(IF(ARRAY_LENGTH(COALESCE([], [])) = 0, [[][SAFE_ORDINAL(0)]], []))) - 1)) AS pos CROSS JOIN UNNEST(IF(ARRAY_LENGTH(COALESCE([], [])) = 0, [[][SAFE_ORDINAL(0)]], [])) AS col WITH OFFSET AS pos_2 WHERE pos = pos_2 OR (pos > (ARRAY_LENGTH(IF(ARRAY_LENGTH(COALESCE([], [])) = 0, [[][SAFE_ORDINAL(0)]], [])) - 1) AND pos_2 = (ARRAY_LENGTH(IF(ARRAY_LENGTH(COALESCE([], [])) = 0, [[][SAFE_ORDINAL(0)]], [])) - 1))", + read={"spark": "select explode_outer([])"}, + ) + self.validate_all( + "SELECT IF(pos = pos_2, col, NULL) AS col, IF(pos = pos_2, pos_2, NULL) AS pos_2 FROM UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(IF(ARRAY_LENGTH(COALESCE([], [])) = 0, [[][SAFE_ORDINAL(0)]], []))) - 1)) AS pos CROSS JOIN UNNEST(IF(ARRAY_LENGTH(COALESCE([], [])) = 0, [[][SAFE_ORDINAL(0)]], [])) AS col WITH OFFSET AS pos_2 WHERE pos = pos_2 OR (pos > (ARRAY_LENGTH(IF(ARRAY_LENGTH(COALESCE([], [])) = 0, [[][SAFE_ORDINAL(0)]], [])) - 1) AND pos_2 = (ARRAY_LENGTH(IF(ARRAY_LENGTH(COALESCE([], [])) = 0, [[][SAFE_ORDINAL(0)]], [])) - 1))", + read={"spark": "select posexplode_outer([])"}, + ) self.validate_all( "SELECT AS STRUCT ARRAY(SELECT AS STRUCT b FROM x) AS y FROM z", write={ @@ -660,10 +687,6 @@ class TestBigQuery(Validator): "bigquery": "SELECT ARRAY(SELECT AS STRUCT 1 AS a, 2 AS b)", }, ) - self.validate_identity( - r"REGEXP_EXTRACT(svc_plugin_output, r'\\\((.*)')", - r"REGEXP_EXTRACT(svc_plugin_output, '\\\\\\((.*)')", - ) self.validate_all( "REGEXP_CONTAINS('foo', '.*')", read={ @@ -986,9 +1009,6 @@ class TestBigQuery(Validator): "postgres": "CURRENT_DATE AT TIME ZONE 'UTC'", }, ) - self.validate_identity( - "SELECT * FROM test QUALIFY a IS DISTINCT FROM b WINDOW c AS (PARTITION BY d)" - ) self.validate_all( "SELECT a FROM test WHERE a = 1 GROUP BY a HAVING a = 2 QUALIFY z ORDER BY a LIMIT 10", write={ @@ -997,45 +1017,20 @@ class TestBigQuery(Validator): }, ) self.validate_all( - "SELECT cola, colb FROM (VALUES (1, 'test')) AS tab(cola, colb)", - write={ - "spark": "SELECT cola, colb FROM VALUES (1, 'test') AS tab(cola, colb)", + "SELECT cola, colb FROM UNNEST([STRUCT(1 AS cola, 'test' AS colb)])", + read={ "bigquery": "SELECT cola, colb FROM UNNEST([STRUCT(1 AS cola, 'test' AS colb)])", "snowflake": "SELECT cola, colb FROM (VALUES (1, 'test')) AS tab(cola, colb)", - }, - ) - self.validate_all( - "SELECT cola, colb FROM (VALUES (1, 'test')) AS tab", - write={ - "bigquery": "SELECT cola, colb FROM UNNEST([STRUCT(1 AS _c0, 'test' AS _c1)])", - }, - ) - self.validate_all( - "SELECT cola, colb FROM (VALUES (1, 'test'))", - write={ - "bigquery": "SELECT cola, colb FROM UNNEST([STRUCT(1 AS _c0, 'test' AS _c1)])", + "spark": "SELECT cola, colb FROM VALUES (1, 'test') AS tab(cola, colb)", }, ) self.validate_all( "SELECT * FROM UNNEST([STRUCT(1 AS id)]) CROSS JOIN UNNEST([STRUCT(1 AS id)])", read={ + "bigquery": "SELECT * FROM UNNEST([STRUCT(1 AS id)]) CROSS JOIN UNNEST([STRUCT(1 AS id)])", "postgres": "SELECT * FROM (VALUES (1)) AS t1(id) CROSS JOIN (VALUES (1)) AS t2(id)", }, ) - self.validate_all( - "SELECT cola, colb, colc FROM (VALUES (1, 'test', NULL)) AS tab(cola, colb, colc)", - write={ - "spark": "SELECT cola, colb, colc FROM VALUES (1, 'test', NULL) AS tab(cola, colb, colc)", - "bigquery": "SELECT cola, colb, colc FROM UNNEST([STRUCT(1 AS cola, 'test' AS colb, NULL AS colc)])", - "snowflake": "SELECT cola, colb, colc FROM (VALUES (1, 'test', NULL)) AS tab(cola, colb, colc)", - }, - ) - self.validate_all( - "SELECT * FROM (SELECT a, b, c FROM test) PIVOT(SUM(b) d, COUNT(*) e FOR c IN ('x', 'y'))", - write={ - "bigquery": "SELECT * FROM (SELECT a, b, c FROM test) PIVOT(SUM(b) AS d, COUNT(*) AS e FOR c IN ('x', 'y'))", - }, - ) self.validate_all( "SELECT REGEXP_EXTRACT(abc, 'pattern(group)') FROM table", write={ @@ -1091,8 +1086,6 @@ WHERE pretty=True, ) - self.validate_identity("LOG(n, b)") - def test_user_defined_functions(self): self.validate_identity( "CREATE TEMPORARY FUNCTION a(x FLOAT64, y FLOAT64) RETURNS FLOAT64 NOT DETERMINISTIC LANGUAGE js AS 'return x*y;'" @@ -1114,35 +1107,22 @@ WHERE ) def test_remove_precision_parameterized_types(self): - self.validate_all( - "SELECT CAST(1 AS NUMERIC(10, 2))", - write={ - "bigquery": "SELECT CAST(1 AS NUMERIC)", - }, + self.validate_identity("CREATE TABLE test (a NUMERIC(10, 2))") + self.validate_identity( + "INSERT INTO test (cola, colb) VALUES (CAST(7 AS STRING(10)), CAST(14 AS STRING(10)))", + "INSERT INTO test (cola, colb) VALUES (CAST(7 AS STRING), CAST(14 AS STRING))", ) - self.validate_all( - "CREATE TABLE test (a NUMERIC(10, 2))", - write={ - "bigquery": "CREATE TABLE test (a NUMERIC(10, 2))", - }, + self.validate_identity( + "SELECT CAST(1 AS NUMERIC(10, 2))", + "SELECT CAST(1 AS NUMERIC)", ) - self.validate_all( + self.validate_identity( "SELECT CAST('1' AS STRING(10)) UNION ALL SELECT CAST('2' AS STRING(10))", - write={ - "bigquery": "SELECT CAST('1' AS STRING) UNION ALL SELECT CAST('2' AS STRING)", - }, + "SELECT CAST('1' AS STRING) UNION ALL SELECT CAST('2' AS STRING)", ) - self.validate_all( + self.validate_identity( "SELECT cola FROM (SELECT CAST('1' AS STRING(10)) AS cola UNION ALL SELECT CAST('2' AS STRING(10)) AS cola)", - write={ - "bigquery": "SELECT cola FROM (SELECT CAST('1' AS STRING) AS cola UNION ALL SELECT CAST('2' AS STRING) AS cola)", - }, - ) - self.validate_all( - "INSERT INTO test (cola, colb) VALUES (CAST(7 AS STRING(10)), CAST(14 AS STRING(10)))", - write={ - "bigquery": "INSERT INTO test (cola, colb) VALUES (CAST(7 AS STRING), CAST(14 AS STRING))", - }, + "SELECT cola FROM (SELECT CAST('1' AS STRING) AS cola UNION ALL SELECT CAST('2' AS STRING) AS cola)", ) def test_models(self): -- cgit v1.2.3