from unittest import mock
from sqlglot import (
ErrorLevel,
ParseError,
TokenError,
UnsupportedError,
parse,
transpile,
)
from sqlglot.helper import logger as helper_logger
from tests.dialects.test_dialect import Validator
class TestBigQuery(Validator):
dialect = "bigquery"
maxDiff = None
def test_bigquery(self):
with self.assertLogs(helper_logger) as cm:
self.validate_all(
"SELECT a[1], b[OFFSET(1)], c[ORDINAL(1)], d[SAFE_OFFSET(1)], e[SAFE_ORDINAL(1)]",
write={
"duckdb": "SELECT a[2], b[2], c[1], d[2], e[1]",
"bigquery": "SELECT a[1], b[OFFSET(1)], c[ORDINAL(1)], d[SAFE_OFFSET(1)], e[SAFE_ORDINAL(1)]",
"presto": "SELECT a[2], b[2], c[1], ELEMENT_AT(d, 2), ELEMENT_AT(e, 1)",
},
)
self.validate_all(
"a[0]",
read={
"duckdb": "a[1]",
"presto": "a[1]",
},
)
self.validate_identity(
"select array_contains([1, 2, 3], 1)",
"SELECT EXISTS(SELECT 1 FROM UNNEST([1, 2, 3]) AS _col WHERE _col = 1)",
)
self.validate_identity("CREATE SCHEMA x DEFAULT COLLATE 'en'")
self.validate_identity("CREATE TABLE x (y INT64) DEFAULT COLLATE 'en'")
self.validate_identity("PARSE_JSON('{}', wide_number_mode => 'exact')")
with self.assertRaises(TokenError):
transpile("'\\'", read="bigquery")
# Reference: https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#set_operators
with self.assertRaises(UnsupportedError):
transpile(
"SELECT * FROM a INTERSECT ALL SELECT * FROM b",
write="bigquery",
unsupported_level=ErrorLevel.RAISE,
)
with self.assertRaises(UnsupportedError):
transpile(
"SELECT * FROM a EXCEPT ALL SELECT * FROM b",
write="bigquery",
unsupported_level=ErrorLevel.RAISE,
)
with self.assertRaises(ParseError):
transpile("SELECT * FROM UNNEST(x) AS x(y)", read="bigquery")
with self.assertRaises(ParseError):
transpile("DATE_ADD(x, day)", read="bigquery")
for_in_stmts = parse(
"FOR record IN (SELECT word FROM shakespeare) DO SELECT record.word; END FOR;",
read="bigquery",
)
self.assertEqual(
[s.sql(dialect="bigquery") for s in for_in_stmts],
["FOR record IN (SELECT word FROM shakespeare) DO SELECT record.word", "END FOR"],
)
self.validate_identity("SELECT test.Unknown FROM test")
self.validate_identity(r"SELECT '\n\r\a\v\f\t'")
self.validate_identity("SELECT * FROM tbl FOR SYSTEM_TIME AS OF z")
self.validate_identity("STRING_AGG(DISTINCT a ORDER BY b DESC, c DESC LIMIT 10)")
self.validate_identity("SELECT PARSE_TIMESTAMP('%c', 'Thu Dec 25 07:30:00 2008', 'UTC')")
self.validate_identity("SELECT ANY_VALUE(fruit HAVING MAX sold) FROM fruits")
self.validate_identity("SELECT ANY_VALUE(fruit HAVING MIN sold) FROM fruits")
self.validate_identity("SELECT `project-id`.udfs.func(call.dir)")
self.validate_identity("SELECT CAST(CURRENT_DATE AS STRING FORMAT 'DAY') AS current_day")
self.validate_identity("SAFE_CAST(encrypted_value AS STRING FORMAT 'BASE64')")
self.validate_identity("CAST(encrypted_value AS STRING FORMAT 'BASE64')")
self.validate_identity("CAST(STRUCT(1) AS STRUCT)")
self.validate_identity("STRING_AGG(a)")
self.validate_identity("STRING_AGG(a, ' & ')")
self.validate_identity("STRING_AGG(DISTINCT a, ' & ')")
self.validate_identity("STRING_AGG(a, ' & ' ORDER BY LENGTH(a))")
self.validate_identity("DATE(2016, 12, 25)")
self.validate_identity("DATE(CAST('2016-12-25 23:59:59' AS DATETIME))")
self.validate_identity("SELECT foo IN UNNEST(bar) AS bla")
self.validate_identity("SELECT * FROM x-0.a")
self.validate_identity("SELECT * FROM pivot CROSS JOIN foo")
self.validate_identity("SAFE_CAST(x AS STRING)")
self.validate_identity("SELECT * FROM a-b-c.mydataset.mytable")
self.validate_identity("SELECT * FROM abc-def-ghi")
self.validate_identity("SELECT * FROM a-b-c")
self.validate_identity("SELECT * FROM my-table")
self.validate_identity("SELECT * FROM my-project.mydataset.mytable")
self.validate_identity("SELECT * FROM pro-ject_id.c.d CROSS JOIN foo-bar")
self.validate_identity("SELECT * FROM foo.bar.25", "SELECT * FROM foo.bar.`25`")
self.validate_identity("SELECT * FROM foo.bar.25_", "SELECT * FROM foo.bar.`25_`")
self.validate_identity("SELECT * FROM foo.bar.25x a", "SELECT * FROM foo.bar.`25x` AS a")
self.validate_identity("SELECT * FROM foo.bar.25ab c", "SELECT * FROM foo.bar.`25ab` AS c")
self.validate_identity("x <> ''")
self.validate_identity("DATE_TRUNC(col, WEEK(MONDAY))")
self.validate_identity("SELECT b'abc'")
self.validate_identity("""SELECT * FROM UNNEST(ARRAY>[])""")
self.validate_identity("SELECT AS STRUCT 1 AS a, 2 AS b")
self.validate_identity("SELECT DISTINCT AS STRUCT 1 AS a, 2 AS b")
self.validate_identity("SELECT AS VALUE STRUCT(1 AS a, 2 AS b)")
self.validate_identity("SELECT STRUCT>(['2023-01-17'])")
self.validate_identity("SELECT STRUCT((SELECT a FROM b.c LIMIT 1)).*")
self.validate_identity("SELECT * FROM q UNPIVOT(values FOR quarter IN (b, c))")
self.validate_identity("""CREATE TABLE x (a STRUCT>)""")
self.validate_identity("""CREATE TABLE x (a STRUCT)""")
self.validate_identity("CAST(x AS TIMESTAMP)")
self.validate_identity("REGEXP_EXTRACT(`foo`, 'bar: (.+?)', 1, 1)")
self.validate_identity("BEGIN A B C D E F")
self.validate_identity("BEGIN TRANSACTION")
self.validate_identity("COMMIT TRANSACTION")
self.validate_identity("ROLLBACK TRANSACTION")
self.validate_identity("CAST(x AS BIGNUMERIC)")
self.validate_identity("SELECT y + 1 FROM x GROUP BY y + 1 ORDER BY 1")
self.validate_identity("SELECT TIMESTAMP_SECONDS(2) AS t")
self.validate_identity("SELECT TIMESTAMP_MILLIS(2) AS t")
self.validate_identity(
"FOR record IN (SELECT word, word_count FROM bigquery-public-data.samples.shakespeare LIMIT 5) DO SELECT record.word, record.word_count"
)
self.validate_identity(
"DATE(CAST('2016-12-25 05:30:00+07' AS DATETIME), 'America/Los_Angeles')"
)
self.validate_identity(
"""CREATE TABLE x (a STRING OPTIONS (description='x')) OPTIONS (table_expiration_days=1)"""
)
self.validate_identity(
"SELECT * FROM (SELECT * FROM `t`) AS a UNPIVOT((c) FOR c_name IN (v1, v2))"
)
self.validate_identity(
"CREATE TABLE IF NOT EXISTS foo AS SELECT * FROM bla EXCEPT DISTINCT (SELECT * FROM bar) LIMIT 0"
)
self.validate_identity(
"SELECT ROW() OVER (y ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) FROM x WINDOW y AS (PARTITION BY CATEGORY)"
)
self.validate_identity(
"SELECT item, purchases, LAST_VALUE(item) OVER (item_window ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) AS most_popular FROM Produce WINDOW item_window AS (ORDER BY purchases)"
)
self.validate_identity(
"SELECT LAST_VALUE(a IGNORE NULLS) OVER y FROM x WINDOW y AS (PARTITION BY CATEGORY)",
)
self.validate_identity(
"SELECT a overlaps",
"SELECT a AS overlaps",
)
self.validate_identity(
"SELECT y + 1 z FROM x GROUP BY y + 1 ORDER BY z",
"SELECT y + 1 AS z FROM x GROUP BY z ORDER BY z",
)
self.validate_identity(
"SELECT y + 1 z FROM x GROUP BY y + 1",
"SELECT y + 1 AS z FROM x GROUP BY y + 1",
)
self.validate_identity(
"""SELECT JSON '"foo"' AS json_data""",
"""SELECT PARSE_JSON('"foo"') AS json_data""",
)
self.validate_identity(
"CREATE OR REPLACE TABLE `a.b.c` CLONE `a.b.d`",
"CREATE OR REPLACE TABLE a.b.c CLONE a.b.d",
)
self.validate_identity(
"SELECT * FROM UNNEST(x) WITH OFFSET EXCEPT DISTINCT SELECT * FROM UNNEST(y) WITH OFFSET",
"SELECT * FROM UNNEST(x) WITH OFFSET AS offset EXCEPT DISTINCT SELECT * FROM UNNEST(y) WITH OFFSET AS offset",
)
self.validate_all("SELECT SPLIT(foo)", write={"bigquery": "SELECT SPLIT(foo, ',')"})
self.validate_all("SELECT 1 AS hash", write={"bigquery": "SELECT 1 AS `hash`"})
self.validate_all("SELECT 1 AS at", write={"bigquery": "SELECT 1 AS `at`"})
self.validate_all('x <> ""', write={"bigquery": "x <> ''"})
self.validate_all('x <> """"""', write={"bigquery": "x <> ''"})
self.validate_all("x <> ''''''", write={"bigquery": "x <> ''"})
self.validate_all("CAST(x AS DATETIME)", read={"": "x::timestamp"})
self.validate_all(
"SELECT TIMESTAMP_MICROS(x)",
read={
"duckdb": "SELECT MAKE_TIMESTAMP(x)",
"spark": "SELECT TIMESTAMP_MICROS(x)",
},
write={
"bigquery": "SELECT TIMESTAMP_MICROS(x)",
"duckdb": "SELECT MAKE_TIMESTAMP(x)",
"snowflake": "SELECT TO_TIMESTAMP(x / 1000, 3)",
"spark": "SELECT TIMESTAMP_MICROS(x)",
},
)
self.validate_all(
"SELECT * FROM t WHERE EXISTS(SELECT * FROM unnest(nums) AS x WHERE x > 1)",
write={
"bigquery": "SELECT * FROM t WHERE EXISTS(SELECT * FROM UNNEST(nums) AS x WHERE x > 1)",
"duckdb": "SELECT * FROM t WHERE EXISTS(SELECT * FROM UNNEST(nums) AS _t(x) WHERE x > 1)",
},
)
self.validate_all(
"NULL",
read={
"duckdb": "NULL = a",
"postgres": "a = NULL",
},
)
self.validate_all(
"SELECT '\\n'",
read={
"bigquery": "SELECT '''\n'''",
},
write={
"bigquery": "SELECT '\\n'",
"postgres": "SELECT '\n'",
},
)
self.validate_all(
"TRIM(item, '*')",
read={
"snowflake": "TRIM(item, '*')",
"spark": "TRIM('*', item)",
},
write={
"bigquery": "TRIM(item, '*')",
"snowflake": "TRIM(item, '*')",
"spark": "TRIM('*' FROM item)",
},
)
self.validate_all(
"CREATE OR REPLACE TABLE `a.b.c` COPY `a.b.d`",
write={
"bigquery": "CREATE OR REPLACE TABLE a.b.c COPY a.b.d",
"snowflake": "CREATE OR REPLACE TABLE a.b.c CLONE a.b.d",
},
)
self.validate_all(
"SELECT DATETIME_DIFF('2023-01-01T00:00:00', '2023-01-01T05:00:00', MILLISECOND)",
write={
"bigquery": "SELECT DATETIME_DIFF('2023-01-01T00:00:00', '2023-01-01T05:00:00', MILLISECOND)",
"databricks": "SELECT TIMESTAMPDIFF(MILLISECOND, '2023-01-01T05:00:00', '2023-01-01T00:00:00')",
},
),
self.validate_all(
"SELECT DATETIME_ADD('2023-01-01T00:00:00', INTERVAL 1 MILLISECOND)",
write={
"bigquery": "SELECT DATETIME_ADD('2023-01-01T00:00:00', INTERVAL 1 MILLISECOND)",
"databricks": "SELECT TIMESTAMPADD(MILLISECOND, 1, '2023-01-01T00:00:00')",
},
),
self.validate_all(
"SELECT DATETIME_SUB('2023-01-01T00:00:00', INTERVAL 1 MILLISECOND)",
write={
"bigquery": "SELECT DATETIME_SUB('2023-01-01T00:00:00', INTERVAL 1 MILLISECOND)",
"databricks": "SELECT TIMESTAMPADD(MILLISECOND, 1 * -1, '2023-01-01T00:00:00')",
},
),
self.validate_all(
"SELECT DATETIME_TRUNC('2023-01-01T01:01:01', HOUR)",
write={
"bigquery": "SELECT DATETIME_TRUNC('2023-01-01T01:01:01', HOUR)",
"databricks": "SELECT DATE_TRUNC('HOUR', '2023-01-01T01:01:01')",
},
),
self.validate_all("LEAST(x, y)", read={"sqlite": "MIN(x, y)"})
self.validate_all("CAST(x AS CHAR)", write={"bigquery": "CAST(x AS STRING)"})
self.validate_all("CAST(x AS NCHAR)", write={"bigquery": "CAST(x AS STRING)"})
self.validate_all("CAST(x AS NVARCHAR)", write={"bigquery": "CAST(x AS STRING)"})
self.validate_all("CAST(x AS TIMESTAMPTZ)", write={"bigquery": "CAST(x AS TIMESTAMP)"})
self.validate_all("CAST(x AS RECORD)", write={"bigquery": "CAST(x AS STRUCT)"})
self.validate_all(
'SELECT TIMESTAMP_ADD(TIMESTAMP "2008-12-25 15:30:00+00", INTERVAL 10 MINUTE)',
write={
"bigquery": "SELECT TIMESTAMP_ADD(CAST('2008-12-25 15:30:00+00' AS TIMESTAMP), INTERVAL 10 MINUTE)",
"databricks": "SELECT DATEADD(MINUTE, 10, CAST('2008-12-25 15:30:00+00' AS TIMESTAMP))",
"mysql": "SELECT DATE_ADD(TIMESTAMP('2008-12-25 15:30:00+00'), INTERVAL 10 MINUTE)",
"spark": "SELECT DATEADD(MINUTE, 10, CAST('2008-12-25 15:30:00+00' AS TIMESTAMP))",
},
)
self.validate_all(
'SELECT TIMESTAMP_SUB(TIMESTAMP "2008-12-25 15:30:00+00", INTERVAL 10 MINUTE)',
write={
"bigquery": "SELECT TIMESTAMP_SUB(CAST('2008-12-25 15:30:00+00' AS TIMESTAMP), INTERVAL 10 MINUTE)",
"mysql": "SELECT DATE_SUB(TIMESTAMP('2008-12-25 15:30:00+00'), INTERVAL 10 MINUTE)",
},
)
self.validate_all(
"MD5(x)",
write={
"": "MD5_DIGEST(x)",
"bigquery": "MD5(x)",
"hive": "UNHEX(MD5(x))",
"spark": "UNHEX(MD5(x))",
},
)
self.validate_all(
"SELECT TO_HEX(MD5(some_string))",
read={
"duckdb": "SELECT MD5(some_string)",
"spark": "SELECT MD5(some_string)",
},
write={
"": "SELECT MD5(some_string)",
"bigquery": "SELECT TO_HEX(MD5(some_string))",
"duckdb": "SELECT MD5(some_string)",
},
)
self.validate_all(
"SHA256(x)",
write={
"bigquery": "SHA256(x)",
"spark2": "SHA2(x, 256)",
},
)
self.validate_all(
"SHA512(x)",
write={
"bigquery": "SHA512(x)",
"spark2": "SHA2(x, 512)",
},
)
self.validate_all(
"SELECT CAST('20201225' AS TIMESTAMP FORMAT 'YYYYMMDD' AT TIME ZONE 'America/New_York')",
write={"bigquery": "SELECT PARSE_TIMESTAMP('%Y%m%d', '20201225', 'America/New_York')"},
)
self.validate_all(
"SELECT CAST('20201225' AS TIMESTAMP FORMAT 'YYYYMMDD')",
write={"bigquery": "SELECT PARSE_TIMESTAMP('%Y%m%d', '20201225')"},
)
self.validate_all(
"SELECT CAST(TIMESTAMP '2008-12-25 00:00:00+00:00' AS STRING FORMAT 'YYYY-MM-DD HH24:MI:SS TZH:TZM') AS date_time_to_string",
write={
"bigquery": "SELECT CAST(CAST('2008-12-25 00:00:00+00:00' AS TIMESTAMP) AS STRING FORMAT 'YYYY-MM-DD HH24:MI:SS TZH:TZM') AS date_time_to_string",
},
)
self.validate_all(
"SELECT CAST(TIMESTAMP '2008-12-25 00:00:00+00:00' AS STRING FORMAT 'YYYY-MM-DD HH24:MI:SS TZH:TZM' AT TIME ZONE 'Asia/Kolkata') AS date_time_to_string",
write={
"bigquery": "SELECT CAST(CAST('2008-12-25 00:00:00+00:00' AS TIMESTAMP) AS STRING FORMAT 'YYYY-MM-DD HH24:MI:SS TZH:TZM' AT TIME ZONE 'Asia/Kolkata') AS date_time_to_string",
},
)
self.validate_all(
"WITH cte AS (SELECT [1, 2, 3] AS arr) SELECT IF(pos = pos_2, col, NULL) AS col FROM cte, UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(arr)) - 1)) AS pos CROSS JOIN UNNEST(arr) AS col WITH OFFSET AS pos_2 WHERE pos = pos_2 OR (pos > (ARRAY_LENGTH(arr) - 1) AND pos_2 = (ARRAY_LENGTH(arr) - 1))",
read={
"spark": "WITH cte AS (SELECT ARRAY(1, 2, 3) AS arr) SELECT EXPLODE(arr) FROM cte"
},
)
self.validate_all(
"SELECT AS STRUCT ARRAY(SELECT AS STRUCT b FROM x) AS y FROM z",
write={
"": "SELECT AS STRUCT ARRAY(SELECT AS STRUCT b FROM x) AS y FROM z",
"bigquery": "SELECT AS STRUCT ARRAY(SELECT AS STRUCT b FROM x) AS y FROM z",
"duckdb": "SELECT {'y': ARRAY(SELECT {'b': b} FROM x)} FROM z",
},
)
self.validate_all(
"cast(x as date format 'MM/DD/YYYY')",
write={
"bigquery": "PARSE_DATE('%m/%d/%Y', x)",
},
)
self.validate_all(
"cast(x as time format 'YYYY.MM.DD HH:MI:SSTZH')",
write={
"bigquery": "PARSE_TIMESTAMP('%Y.%m.%d %I:%M:%S%z', x)",
},
)
self.validate_all(
"CREATE TEMP TABLE foo AS SELECT 1",
write={"bigquery": "CREATE TEMPORARY TABLE foo AS SELECT 1"},
)
self.validate_all(
"SELECT * FROM `SOME_PROJECT_ID.SOME_DATASET_ID.INFORMATION_SCHEMA.SOME_VIEW`",
write={
"bigquery": "SELECT * FROM SOME_PROJECT_ID.SOME_DATASET_ID.INFORMATION_SCHEMA.SOME_VIEW",
},
)
self.validate_all(
"SELECT * FROM `my-project.my-dataset.my-table`",
write={"bigquery": "SELECT * FROM `my-project`.`my-dataset`.`my-table`"},
)
self.validate_all(
"SELECT ARRAY(SELECT AS STRUCT 1 a, 2 b)",
write={
"bigquery": "SELECT ARRAY(SELECT AS STRUCT 1 AS a, 2 AS b)",
},
)
self.validate_identity(
r"REGEXP_EXTRACT(svc_plugin_output, r'\\\((.*)')",
r"REGEXP_EXTRACT(svc_plugin_output, '\\\\\\((.*)')",
)
self.validate_all(
"REGEXP_CONTAINS('foo', '.*')",
read={
"bigquery": "REGEXP_CONTAINS('foo', '.*')",
"mysql": "REGEXP_LIKE('foo', '.*')",
"starrocks": "REGEXP('foo', '.*')",
},
write={
"mysql": "REGEXP_LIKE('foo', '.*')",
"starrocks": "REGEXP('foo', '.*')",
},
)
self.validate_all(
'"""x"""',
write={
"bigquery": "'x'",
"duckdb": "'x'",
"presto": "'x'",
"hive": "'x'",
"spark": "'x'",
},
)
self.validate_all(
'"""x\'"""',
write={
"bigquery": "'x\\''",
"duckdb": "'x'''",
"presto": "'x'''",
"hive": "'x\\''",
"spark": "'x\\''",
},
)
self.validate_all(
"r'x\\''",
write={
"bigquery": "'x\\''",
"hive": "'x\\''",
},
)
self.validate_all(
"r'x\\y'",
write={
"bigquery": "'x\\\\y'",
"hive": "'x\\\\y'",
},
)
self.validate_all(
"'\\\\'",
write={
"bigquery": r"'\\'",
"duckdb": r"'\\'",
"presto": r"'\\'",
"hive": r"'\\'",
},
)
self.validate_all(
r'r"""/\*.*\*/"""',
write={
"bigquery": r"'/\\*.*\\*/'",
"duckdb": r"'/\\*.*\\*/'",
"presto": r"'/\\*.*\\*/'",
"hive": r"'/\\*.*\\*/'",
"spark": r"'/\\*.*\\*/'",
},
)
self.validate_all(
r'R"""/\*.*\*/"""',
write={
"bigquery": r"'/\\*.*\\*/'",
"duckdb": r"'/\\*.*\\*/'",
"presto": r"'/\\*.*\\*/'",
"hive": r"'/\\*.*\\*/'",
"spark": r"'/\\*.*\\*/'",
},
)
self.validate_all(
'r"""a\n"""',
write={
"bigquery": "'a\\n'",
"duckdb": "'a\n'",
},
)
self.validate_all(
'"""a\n"""',
write={
"bigquery": "'a\\n'",
"duckdb": "'a\n'",
},
)
self.validate_all(
"CAST(a AS INT64)",
write={
"bigquery": "CAST(a AS INT64)",
"duckdb": "CAST(a AS BIGINT)",
"presto": "CAST(a AS BIGINT)",
"hive": "CAST(a AS BIGINT)",
"spark": "CAST(a AS BIGINT)",
},
)
self.validate_all(
"CAST(a AS BYTES)",
write={
"bigquery": "CAST(a AS BYTES)",
"duckdb": "CAST(a AS BLOB)",
"presto": "CAST(a AS VARBINARY)",
"hive": "CAST(a AS BINARY)",
"spark": "CAST(a AS BINARY)",
},
)
self.validate_all(
"CAST(a AS NUMERIC)",
write={
"bigquery": "CAST(a AS NUMERIC)",
"duckdb": "CAST(a AS DECIMAL)",
"presto": "CAST(a AS DECIMAL)",
"hive": "CAST(a AS DECIMAL)",
"spark": "CAST(a AS DECIMAL)",
},
)
self.validate_all(
"[1, 2, 3]",
read={
"duckdb": "[1, 2, 3]",
"presto": "ARRAY[1, 2, 3]",
"hive": "ARRAY(1, 2, 3)",
"spark": "ARRAY(1, 2, 3)",
},
write={
"bigquery": "[1, 2, 3]",
"duckdb": "[1, 2, 3]",
"presto": "ARRAY[1, 2, 3]",
"hive": "ARRAY(1, 2, 3)",
"spark": "ARRAY(1, 2, 3)",
},
)
self.validate_all(
"SELECT * FROM UNNEST(['7', '14']) AS x",
read={
"spark": "SELECT * FROM UNNEST(ARRAY('7', '14')) AS (x)",
},
write={
"bigquery": "SELECT * FROM UNNEST(['7', '14']) AS x",
"presto": "SELECT * FROM UNNEST(ARRAY['7', '14']) AS _t(x)",
"spark": "SELECT * FROM UNNEST(ARRAY('7', '14')) AS _t(x)",
},
)
self.validate_all(
"SELECT ARRAY(SELECT x FROM UNNEST([0, 1]) AS x)",
write={"bigquery": "SELECT ARRAY(SELECT x FROM UNNEST([0, 1]) AS x)"},
)
self.validate_all(
"SELECT ARRAY(SELECT DISTINCT x FROM UNNEST(some_numbers) AS x) AS unique_numbers",
write={
"bigquery": "SELECT ARRAY(SELECT DISTINCT x FROM UNNEST(some_numbers) AS x) AS unique_numbers"
},
)
self.validate_all(
"SELECT ARRAY(SELECT * FROM foo JOIN bla ON x = y)",
write={"bigquery": "SELECT ARRAY(SELECT * FROM foo JOIN bla ON x = y)"},
)
self.validate_all(
"x IS unknown",
write={
"bigquery": "x IS NULL",
"duckdb": "x IS NULL",
"presto": "x IS NULL",
"hive": "x IS NULL",
"spark": "x IS NULL",
},
)
self.validate_all(
"x IS NOT unknown",
write={
"bigquery": "NOT x IS NULL",
"duckdb": "NOT x IS NULL",
"presto": "NOT x IS NULL",
"hive": "NOT x IS NULL",
"spark": "NOT x IS NULL",
},
)
self.validate_all(
"CURRENT_TIMESTAMP()",
read={
"tsql": "GETDATE()",
},
write={
"tsql": "GETDATE()",
},
)
self.validate_all(
"current_datetime",
write={
"bigquery": "CURRENT_DATETIME()",
"presto": "CURRENT_DATETIME()",
"hive": "CURRENT_DATETIME()",
"spark": "CURRENT_DATETIME()",
},
)
self.validate_all(
"current_time",
write={
"bigquery": "CURRENT_TIME()",
"duckdb": "CURRENT_TIME",
"presto": "CURRENT_TIME()",
"hive": "CURRENT_TIME()",
"spark": "CURRENT_TIME()",
},
)
self.validate_all(
"CURRENT_TIMESTAMP",
write={
"bigquery": "CURRENT_TIMESTAMP()",
"duckdb": "CURRENT_TIMESTAMP",
"postgres": "CURRENT_TIMESTAMP",
"presto": "CURRENT_TIMESTAMP",
"hive": "CURRENT_TIMESTAMP()",
"spark": "CURRENT_TIMESTAMP()",
},
)
self.validate_all(
"CURRENT_TIMESTAMP()",
write={
"bigquery": "CURRENT_TIMESTAMP()",
"duckdb": "CURRENT_TIMESTAMP",
"postgres": "CURRENT_TIMESTAMP",
"presto": "CURRENT_TIMESTAMP",
"hive": "CURRENT_TIMESTAMP()",
"spark": "CURRENT_TIMESTAMP()",
},
)
self.validate_all(
"DIV(x, y)",
write={
"bigquery": "DIV(x, y)",
"duckdb": "x // y",
},
)
self.validate_all(
"CREATE TABLE db.example_table (col_a struct)",
write={
"bigquery": "CREATE TABLE db.example_table (col_a STRUCT)",
"duckdb": "CREATE TABLE db.example_table (col_a STRUCT(struct_col_a INT, struct_col_b TEXT))",
"presto": "CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b VARCHAR))",
"hive": "CREATE TABLE db.example_table (col_a STRUCT)",
"spark": "CREATE TABLE db.example_table (col_a STRUCT)",
},
)
self.validate_all(
"CREATE TABLE db.example_table (col_a STRUCT>)",
write={
"bigquery": "CREATE TABLE db.example_table (col_a STRUCT>)",
"duckdb": "CREATE TABLE db.example_table (col_a STRUCT(struct_col_a BIGINT, struct_col_b STRUCT(nested_col_a TEXT, nested_col_b TEXT)))",
"presto": "CREATE TABLE db.example_table (col_a ROW(struct_col_a BIGINT, struct_col_b ROW(nested_col_a VARCHAR, nested_col_b VARCHAR)))",
"hive": "CREATE TABLE db.example_table (col_a STRUCT>)",
"spark": "CREATE TABLE db.example_table (col_a STRUCT>)",
},
)
self.validate_all(
"CREATE TABLE db.example_table (x int) PARTITION BY x cluster by x",
write={
"bigquery": "CREATE TABLE db.example_table (x INT64) PARTITION BY x CLUSTER BY x",
},
)
self.validate_all(
"SELECT * FROM a WHERE b IN UNNEST([1, 2, 3])",
write={
"bigquery": "SELECT * FROM a WHERE b IN UNNEST([1, 2, 3])",
"mysql": "SELECT * FROM a WHERE b IN (SELECT UNNEST(ARRAY(1, 2, 3)))",
"presto": "SELECT * FROM a WHERE b IN (SELECT UNNEST(ARRAY[1, 2, 3]))",
"hive": "SELECT * FROM a WHERE b IN (SELECT UNNEST(ARRAY(1, 2, 3)))",
"spark": "SELECT * FROM a WHERE b IN (SELECT UNNEST(ARRAY(1, 2, 3)))",
},
)
self.validate_all(
"DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY)",
write={
"postgres": "CURRENT_DATE - INTERVAL '1 DAY'",
"bigquery": "DATE_SUB(CURRENT_DATE, INTERVAL 1 DAY)",
},
)
self.validate_all(
"DATE_ADD(CURRENT_DATE(), INTERVAL 1 DAY)",
write={
"bigquery": "DATE_ADD(CURRENT_DATE, INTERVAL 1 DAY)",
"duckdb": "CURRENT_DATE + INTERVAL 1 DAY",
"mysql": "DATE_ADD(CURRENT_DATE, INTERVAL 1 DAY)",
"postgres": "CURRENT_DATE + INTERVAL '1 DAY'",
"presto": "DATE_ADD('DAY', 1, CURRENT_DATE)",
"hive": "DATE_ADD(CURRENT_DATE, 1)",
"spark": "DATE_ADD(CURRENT_DATE, 1)",
},
)
self.validate_all(
"DATE_DIFF(DATE '2010-07-07', DATE '2008-12-25', DAY)",
write={
"bigquery": "DATE_DIFF(CAST('2010-07-07' AS DATE), CAST('2008-12-25' AS DATE), DAY)",
"mysql": "DATEDIFF(CAST('2010-07-07' AS DATE), CAST('2008-12-25' AS DATE))",
"starrocks": "DATE_DIFF('DAY', CAST('2010-07-07' AS DATE), CAST('2008-12-25' AS DATE))",
},
)
self.validate_all(
"DATE_DIFF(CAST('2010-07-07' AS DATE), CAST('2008-12-25' AS DATE), DAY)",
read={
"mysql": "DATEDIFF(CAST('2010-07-07' AS DATE), CAST('2008-12-25' AS DATE))",
"starrocks": "DATEDIFF(CAST('2010-07-07' AS DATE), CAST('2008-12-25' AS DATE))",
},
)
self.validate_all(
"DATE_DIFF(DATE '2010-07-07', DATE '2008-12-25', MINUTE)",
write={
"bigquery": "DATE_DIFF(CAST('2010-07-07' AS DATE), CAST('2008-12-25' AS DATE), MINUTE)",
"starrocks": "DATE_DIFF('MINUTE', CAST('2010-07-07' AS DATE), CAST('2008-12-25' AS DATE))",
},
)
self.validate_all(
"CURRENT_DATE('UTC')",
write={
"mysql": "CURRENT_DATE AT TIME ZONE 'UTC'",
"postgres": "CURRENT_DATE AT TIME ZONE 'UTC'",
},
)
self.validate_identity(
"SELECT * FROM test QUALIFY a IS DISTINCT FROM b WINDOW c AS (PARTITION BY d)"
)
self.validate_all(
"SELECT a FROM test WHERE a = 1 GROUP BY a HAVING a = 2 QUALIFY z ORDER BY a LIMIT 10",
write={
"bigquery": "SELECT a FROM test WHERE a = 1 GROUP BY a HAVING a = 2 QUALIFY z ORDER BY a LIMIT 10",
"snowflake": "SELECT a FROM test WHERE a = 1 GROUP BY a HAVING a = 2 QUALIFY z ORDER BY a NULLS FIRST LIMIT 10",
},
)
self.validate_all(
"SELECT cola, colb FROM (VALUES (1, 'test')) AS tab(cola, colb)",
write={
"spark": "SELECT cola, colb FROM VALUES (1, 'test') AS tab(cola, colb)",
"bigquery": "SELECT cola, colb FROM UNNEST([STRUCT(1 AS cola, 'test' AS colb)])",
"snowflake": "SELECT cola, colb FROM (VALUES (1, 'test')) AS tab(cola, colb)",
},
)
self.validate_all(
"SELECT cola, colb FROM (VALUES (1, 'test')) AS tab",
write={
"bigquery": "SELECT cola, colb FROM UNNEST([STRUCT(1 AS _c0, 'test' AS _c1)])",
},
)
self.validate_all(
"SELECT cola, colb FROM (VALUES (1, 'test'))",
write={
"bigquery": "SELECT cola, colb FROM UNNEST([STRUCT(1 AS _c0, 'test' AS _c1)])",
},
)
self.validate_all(
"SELECT * FROM UNNEST([STRUCT(1 AS id)]) CROSS JOIN UNNEST([STRUCT(1 AS id)])",
read={
"postgres": "SELECT * FROM (VALUES (1)) AS t1(id) CROSS JOIN (VALUES (1)) AS t2(id)",
},
)
self.validate_all(
"SELECT cola, colb, colc FROM (VALUES (1, 'test', NULL)) AS tab(cola, colb, colc)",
write={
"spark": "SELECT cola, colb, colc FROM VALUES (1, 'test', NULL) AS tab(cola, colb, colc)",
"bigquery": "SELECT cola, colb, colc FROM UNNEST([STRUCT(1 AS cola, 'test' AS colb, NULL AS colc)])",
"snowflake": "SELECT cola, colb, colc FROM (VALUES (1, 'test', NULL)) AS tab(cola, colb, colc)",
},
)
self.validate_all(
"SELECT * FROM (SELECT a, b, c FROM test) PIVOT(SUM(b) d, COUNT(*) e FOR c IN ('x', 'y'))",
write={
"bigquery": "SELECT * FROM (SELECT a, b, c FROM test) PIVOT(SUM(b) AS d, COUNT(*) AS e FOR c IN ('x', 'y'))",
},
)
self.validate_all(
"SELECT REGEXP_EXTRACT(abc, 'pattern(group)') FROM table",
write={
"bigquery": "SELECT REGEXP_EXTRACT(abc, 'pattern(group)') FROM table",
"duckdb": "SELECT REGEXP_EXTRACT(abc, 'pattern(group)', 1) FROM table",
},
)
self.validate_all(
"SELECT * FROM UNNEST([1]) WITH OFFSET",
write={"bigquery": "SELECT * FROM UNNEST([1]) WITH OFFSET AS offset"},
)
self.validate_all(
"SELECT * FROM UNNEST([1]) WITH OFFSET y",
write={"bigquery": "SELECT * FROM UNNEST([1]) WITH OFFSET AS y"},
)
self.validate_all(
"GENERATE_ARRAY(1, 4)",
read={"bigquery": "GENERATE_ARRAY(1, 4)"},
write={"duckdb": "GENERATE_SERIES(1, 4)"},
)
self.validate_all(
"TO_JSON_STRING(x)",
read={"bigquery": "TO_JSON_STRING(x)"},
write={
"bigquery": "TO_JSON_STRING(x)",
"duckdb": "CAST(TO_JSON(x) AS TEXT)",
"presto": "JSON_FORMAT(x)",
"spark": "TO_JSON(x)",
},
)
self.validate_all(
"""SELECT
`u`.`harness_user_email` AS `harness_user_email`,
`d`.`harness_user_id` AS `harness_user_id`,
`harness_account_id` AS `harness_account_id`
FROM `analytics_staging`.`stg_mongodb__users` AS `u`, UNNEST(`u`.`harness_cluster_details`) AS `d`, UNNEST(`d`.`harness_account_ids`) AS `harness_account_id`
WHERE
NOT `harness_account_id` IS NULL""",
read={
"": """
SELECT
"u"."harness_user_email" AS "harness_user_email",
"_q_0"."d"."harness_user_id" AS "harness_user_id",
"_q_1"."harness_account_id" AS "harness_account_id"
FROM
"analytics_staging"."stg_mongodb__users" AS "u",
UNNEST("u"."harness_cluster_details") AS "_q_0"("d"),
UNNEST("_q_0"."d"."harness_account_ids") AS "_q_1"("harness_account_id")
WHERE
NOT "_q_1"."harness_account_id" IS NULL
"""
},
pretty=True,
)
self.validate_identity("LOG(n, b)")
def test_user_defined_functions(self):
self.validate_identity(
"CREATE TEMPORARY FUNCTION a(x FLOAT64, y FLOAT64) RETURNS FLOAT64 NOT DETERMINISTIC LANGUAGE js AS 'return x*y;'"
)
self.validate_identity("CREATE TEMPORARY FUNCTION udf(x ANY TYPE) AS (x)")
self.validate_identity("CREATE TEMPORARY FUNCTION a(x FLOAT64, y FLOAT64) AS ((x + 4) / y)")
self.validate_identity(
"CREATE TABLE FUNCTION a(x INT64) RETURNS TABLE AS SELECT s, t"
)
self.validate_identity(
'''CREATE TEMPORARY FUNCTION string_length_0(strings ARRAY) RETURNS FLOAT64 LANGUAGE js AS """'use strict'; function string_length(strings) { return _.sum(_.map(strings, ((x) => x.length))); } return string_length(strings);""" OPTIONS (library=['gs://ibis-testing-libraries/lodash.min.js'])''',
"CREATE TEMPORARY FUNCTION string_length_0(strings ARRAY) RETURNS FLOAT64 LANGUAGE js OPTIONS (library=['gs://ibis-testing-libraries/lodash.min.js']) AS '\\'use strict\\'; function string_length(strings) { return _.sum(_.map(strings, ((x) => x.length))); } return string_length(strings);'",
)
def test_group_concat(self):
self.validate_all(
"SELECT a, GROUP_CONCAT(b) FROM table GROUP BY a",
write={"bigquery": "SELECT a, STRING_AGG(b) FROM table GROUP BY a"},
)
def test_remove_precision_parameterized_types(self):
self.validate_all(
"SELECT CAST(1 AS NUMERIC(10, 2))",
write={
"bigquery": "SELECT CAST(1 AS NUMERIC)",
},
)
self.validate_all(
"CREATE TABLE test (a NUMERIC(10, 2))",
write={
"bigquery": "CREATE TABLE test (a NUMERIC(10, 2))",
},
)
self.validate_all(
"SELECT CAST('1' AS STRING(10)) UNION ALL SELECT CAST('2' AS STRING(10))",
write={
"bigquery": "SELECT CAST('1' AS STRING) UNION ALL SELECT CAST('2' AS STRING)",
},
)
self.validate_all(
"SELECT cola FROM (SELECT CAST('1' AS STRING(10)) AS cola UNION ALL SELECT CAST('2' AS STRING(10)) AS cola)",
write={
"bigquery": "SELECT cola FROM (SELECT CAST('1' AS STRING) AS cola UNION ALL SELECT CAST('2' AS STRING) AS cola)",
},
)
self.validate_all(
"INSERT INTO test (cola, colb) VALUES (CAST(7 AS STRING(10)), CAST(14 AS STRING(10)))",
write={
"bigquery": "INSERT INTO test (cola, colb) VALUES (CAST(7 AS STRING), CAST(14 AS STRING))",
},
)
def test_models(self):
self.validate_identity(
"SELECT * FROM ML.PREDICT(MODEL mydataset.mymodel, (SELECT label, column1, column2 FROM mydataset.mytable))"
)
self.validate_identity(
"SELECT label, predicted_label1, predicted_label AS predicted_label2 FROM ML.PREDICT(MODEL mydataset.mymodel2, (SELECT * EXCEPT (predicted_label), predicted_label AS predicted_label1 FROM ML.PREDICT(MODEL mydataset.mymodel1, TABLE mydataset.mytable)))"
)
self.validate_identity(
"SELECT * FROM ML.PREDICT(MODEL mydataset.mymodel, (SELECT custom_label, column1, column2 FROM mydataset.mytable), STRUCT(0.55 AS threshold))"
)
self.validate_identity(
"SELECT * FROM ML.PREDICT(MODEL `my_project`.my_dataset.my_model, (SELECT * FROM input_data))"
)
self.validate_identity(
"SELECT * FROM ML.PREDICT(MODEL my_dataset.vision_model, (SELECT uri, ML.RESIZE_IMAGE(ML.DECODE_IMAGE(data), 480, 480, FALSE) AS input FROM my_dataset.object_table))"
)
self.validate_identity(
"SELECT * FROM ML.PREDICT(MODEL my_dataset.vision_model, (SELECT uri, ML.CONVERT_COLOR_SPACE(ML.RESIZE_IMAGE(ML.DECODE_IMAGE(data), 224, 280, TRUE), 'YIQ') AS input FROM my_dataset.object_table WHERE content_type = 'image/jpeg'))"
)
self.validate_identity(
"CREATE OR REPLACE MODEL foo OPTIONS (model_type='linear_reg') AS SELECT bla FROM foo WHERE cond"
)
self.validate_identity(
"""CREATE OR REPLACE MODEL m
TRANSFORM(
ML.FEATURE_CROSS(STRUCT(f1, f2)) AS cross_f,
ML.QUANTILE_BUCKETIZE(f3) OVER () AS buckets,
label_col
)
OPTIONS (
model_type='linear_reg',
input_label_cols=['label_col']
) AS
SELECT
*
FROM t""",
pretty=True,
)
self.validate_identity(
"""CREATE MODEL project_id.mydataset.mymodel
INPUT(
f1 INT64,
f2 FLOAT64,
f3 STRING,
f4 ARRAY
)
OUTPUT(
out1 INT64,
out2 INT64
)
REMOTE WITH CONNECTION myproject.us.test_connection
OPTIONS (
ENDPOINT='https://us-central1-aiplatform.googleapis.com/v1/projects/myproject/locations/us-central1/endpoints/1234'
)""",
pretty=True,
)
def test_merge(self):
self.validate_all(
"""
MERGE dataset.Inventory T
USING dataset.NewArrivals S ON FALSE
WHEN NOT MATCHED BY TARGET AND product LIKE '%a%'
THEN DELETE
WHEN NOT MATCHED BY SOURCE AND product LIKE '%b%'
THEN DELETE""",
write={
"bigquery": "MERGE INTO dataset.Inventory AS T USING dataset.NewArrivals AS S ON FALSE WHEN NOT MATCHED AND product LIKE '%a%' THEN DELETE WHEN NOT MATCHED BY SOURCE AND product LIKE '%b%' THEN DELETE",
"snowflake": "MERGE INTO dataset.Inventory AS T USING dataset.NewArrivals AS S ON FALSE WHEN NOT MATCHED AND product LIKE '%a%' THEN DELETE WHEN NOT MATCHED AND product LIKE '%b%' THEN DELETE",
},
)
def test_rename_table(self):
self.validate_all(
"ALTER TABLE db.t1 RENAME TO db.t2",
write={
"snowflake": "ALTER TABLE db.t1 RENAME TO db.t2",
"bigquery": "ALTER TABLE db.t1 RENAME TO t2",
},
)
@mock.patch("sqlglot.dialects.bigquery.logger")
def test_pushdown_cte_column_names(self, logger):
with self.assertRaises(UnsupportedError):
transpile(
"WITH cte(foo) AS (SELECT * FROM tbl) SELECT foo FROM cte",
read="spark",
write="bigquery",
unsupported_level=ErrorLevel.RAISE,
)
self.validate_all(
"WITH cte AS (SELECT 1 AS foo) SELECT foo FROM cte",
read={"spark": "WITH cte(foo) AS (SELECT 1) SELECT foo FROM cte"},
)
self.validate_all(
"WITH cte AS (SELECT 1 AS foo) SELECT foo FROM cte",
read={"spark": "WITH cte(foo) AS (SELECT 1 AS bar) SELECT foo FROM cte"},
)
self.validate_all(
"WITH cte AS (SELECT 1 AS bar) SELECT bar FROM cte",
read={"spark": "WITH cte AS (SELECT 1 AS bar) SELECT bar FROM cte"},
)
self.validate_all(
"WITH cte AS (SELECT 1 AS foo, 2) SELECT foo FROM cte",
read={"postgres": "WITH cte(foo) AS (SELECT 1, 2) SELECT foo FROM cte"},
)
self.validate_all(
"WITH cte AS (SELECT 1 AS foo UNION ALL SELECT 2) SELECT foo FROM cte",
read={"postgres": "WITH cte(foo) AS (SELECT 1 UNION ALL SELECT 2) SELECT foo FROM cte"},
)
def test_json_object(self):
self.validate_identity("SELECT JSON_OBJECT() AS json_data")
self.validate_identity("SELECT JSON_OBJECT('foo', 10, 'bar', TRUE) AS json_data")
self.validate_identity("SELECT JSON_OBJECT('foo', 10, 'bar', ['a', 'b']) AS json_data")
self.validate_identity("SELECT JSON_OBJECT('a', 10, 'a', 'foo') AS json_data")
self.validate_identity(
"SELECT JSON_OBJECT(['a', 'b'], [10, NULL]) AS json_data",
"SELECT JSON_OBJECT('a', 10, 'b', NULL) AS json_data",
)
self.validate_identity(
"""SELECT JSON_OBJECT(['a', 'b'], [JSON '10', JSON '"foo"']) AS json_data""",
"""SELECT JSON_OBJECT('a', PARSE_JSON('10'), 'b', PARSE_JSON('"foo"')) AS json_data""",
)
self.validate_identity(
"SELECT JSON_OBJECT(['a', 'b'], [STRUCT(10 AS id, 'Red' AS color), STRUCT(20 AS id, 'Blue' AS color)]) AS json_data",
"SELECT JSON_OBJECT('a', STRUCT(10 AS id, 'Red' AS color), 'b', STRUCT(20 AS id, 'Blue' AS color)) AS json_data",
)
self.validate_identity(
"SELECT JSON_OBJECT(['a', 'b'], [TO_JSON(10), TO_JSON(['foo', 'bar'])]) AS json_data",
"SELECT JSON_OBJECT('a', TO_JSON(10), 'b', TO_JSON(['foo', 'bar'])) AS json_data",
)
with self.assertRaises(ParseError):
transpile("SELECT JSON_OBJECT('a', 1, 'b') AS json_data", read="bigquery")