From 38e6461a8afbd7cb83709ddb998f03d40ba87755 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Tue, 23 Jan 2024 06:06:14 +0100 Subject: Merging upstream version 20.9.0. Signed-off-by: Daniel Baumann --- tests/dialects/test_bigquery.py | 168 ++++++++++++++++++++-- tests/dialects/test_clickhouse.py | 36 +++-- tests/dialects/test_databricks.py | 50 +++++-- tests/dialects/test_dialect.py | 95 +++++++++---- tests/dialects/test_doris.py | 23 +++ tests/dialects/test_duckdb.py | 140 +++++++++++++++---- tests/dialects/test_hive.py | 15 +- tests/dialects/test_mysql.py | 24 ++-- tests/dialects/test_oracle.py | 46 +++++- tests/dialects/test_postgres.py | 37 +++-- tests/dialects/test_presto.py | 36 +++-- tests/dialects/test_redshift.py | 64 +++++---- tests/dialects/test_snowflake.py | 211 ++++++++++++++++++++++++---- tests/dialects/test_spark.py | 44 +++--- tests/dialects/test_starrocks.py | 4 +- tests/dialects/test_tsql.py | 286 +++++++++++++++++++++++++------------- 16 files changed, 962 insertions(+), 317 deletions(-) (limited to 'tests/dialects') diff --git a/tests/dialects/test_bigquery.py b/tests/dialects/test_bigquery.py index f263581..48480f9 100644 --- a/tests/dialects/test_bigquery.py +++ b/tests/dialects/test_bigquery.py @@ -17,6 +17,16 @@ class TestBigQuery(Validator): maxDiff = None def test_bigquery(self): + with self.assertLogs(helper_logger) as cm: + self.validate_identity( + "SELECT * FROM t AS t(c1, c2)", + "SELECT * FROM t AS t", + ) + + self.assertEqual( + cm.output, ["WARNING:sqlglot:Named columns are not supported in table alias."] + ) + with self.assertLogs(helper_logger) as cm: self.validate_all( "SELECT a[1], b[OFFSET(1)], c[ORDINAL(1)], d[SAFE_OFFSET(1)], e[SAFE_ORDINAL(1)]", @@ -76,6 +86,9 @@ class TestBigQuery(Validator): ["FOR record IN (SELECT word FROM shakespeare) DO SELECT record.word", "END FOR"], ) + self.validate_identity("SELECT * FROM dataset.my_table TABLESAMPLE SYSTEM (10 PERCENT)") + self.validate_identity("TIME('2008-12-25 15:30:00+08')") + self.validate_identity("TIME('2008-12-25 15:30:00+08', 'America/Los_Angeles')") self.validate_identity("SELECT test.Unknown FROM test") self.validate_identity(r"SELECT '\n\r\a\v\f\t'") self.validate_identity("SELECT * FROM tbl FOR SYSTEM_TIME AS OF z") @@ -130,6 +143,7 @@ class TestBigQuery(Validator): self.validate_identity("SELECT y + 1 FROM x GROUP BY y + 1 ORDER BY 1") self.validate_identity("SELECT TIMESTAMP_SECONDS(2) AS t") self.validate_identity("SELECT TIMESTAMP_MILLIS(2) AS t") + self.validate_identity("""SELECT JSON_EXTRACT_SCALAR('{"a": 5}', '$.a')""") self.validate_identity( "FOR record IN (SELECT word, word_count FROM bigquery-public-data.samples.shakespeare LIMIT 5) DO SELECT record.word, record.word_count" ) @@ -154,6 +168,33 @@ class TestBigQuery(Validator): self.validate_identity( "SELECT LAST_VALUE(a IGNORE NULLS) OVER y FROM x WINDOW y AS (PARTITION BY CATEGORY)", ) + self.validate_identity( + """SELECT JSON_EXTRACT_SCALAR('5')""", """SELECT JSON_EXTRACT_SCALAR('5', '$')""" + ) + self.validate_identity( + "SELECT SPLIT(foo)", + "SELECT SPLIT(foo, ',')", + ) + self.validate_identity( + "SELECT 1 AS hash", + "SELECT 1 AS `hash`", + ) + self.validate_identity( + "SELECT 1 AS at", + "SELECT 1 AS `at`", + ) + self.validate_identity( + 'x <> ""', + "x <> ''", + ) + self.validate_identity( + 'x <> """"""', + "x <> ''", + ) + self.validate_identity( + "x <> ''''''", + "x <> ''", + ) self.validate_identity( "SELECT a overlaps", "SELECT a AS overlaps", @@ -179,13 +220,122 @@ class TestBigQuery(Validator): "SELECT * FROM UNNEST(x) WITH OFFSET AS offset EXCEPT DISTINCT SELECT * FROM UNNEST(y) WITH OFFSET AS offset", ) - self.validate_all("SELECT SPLIT(foo)", write={"bigquery": "SELECT SPLIT(foo, ',')"}) - self.validate_all("SELECT 1 AS hash", write={"bigquery": "SELECT 1 AS `hash`"}) - self.validate_all("SELECT 1 AS at", write={"bigquery": "SELECT 1 AS `at`"}) - self.validate_all('x <> ""', write={"bigquery": "x <> ''"}) - self.validate_all('x <> """"""', write={"bigquery": "x <> ''"}) - self.validate_all("x <> ''''''", write={"bigquery": "x <> ''"}) - self.validate_all("CAST(x AS DATETIME)", read={"": "x::timestamp"}) + self.validate_all( + "SELECT * FROM Produce UNPIVOT((first_half_sales, second_half_sales) FOR semesters IN ((Q1, Q2) AS 'semester_1', (Q3, Q4) AS 'semester_2'))", + read={ + "spark": "SELECT * FROM Produce UNPIVOT((first_half_sales, second_half_sales) FOR semesters IN ((Q1, Q2) AS semester_1, (Q3, Q4) AS semester_2))", + }, + write={ + "bigquery": "SELECT * FROM Produce UNPIVOT((first_half_sales, second_half_sales) FOR semesters IN ((Q1, Q2) AS 'semester_1', (Q3, Q4) AS 'semester_2'))", + "spark": "SELECT * FROM Produce UNPIVOT((first_half_sales, second_half_sales) FOR semesters IN ((Q1, Q2) AS semester_1, (Q3, Q4) AS semester_2))", + }, + ) + self.validate_all( + "SELECT * FROM Produce UNPIVOT((first_half_sales, second_half_sales) FOR semesters IN ((Q1, Q2) AS 1, (Q3, Q4) AS 2))", + write={ + "bigquery": "SELECT * FROM Produce UNPIVOT((first_half_sales, second_half_sales) FOR semesters IN ((Q1, Q2) AS 1, (Q3, Q4) AS 2))", + "spark": "SELECT * FROM Produce UNPIVOT((first_half_sales, second_half_sales) FOR semesters IN ((Q1, Q2) AS `1`, (Q3, Q4) AS `2`))", + }, + ) + self.validate_all( + "SELECT UNIX_DATE(DATE '2008-12-25')", + write={ + "bigquery": "SELECT UNIX_DATE(CAST('2008-12-25' AS DATE))", + "duckdb": "SELECT DATE_DIFF('DAY', CAST('1970-01-01' AS DATE), CAST('2008-12-25' AS DATE))", + }, + ) + self.validate_all( + "SELECT LAST_DAY(CAST('2008-11-25' AS DATE), MONTH)", + read={ + "snowflake": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE), MONS)", + }, + write={ + "bigquery": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE), MONTH)", + "duckdb": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))", + "clickhouse": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))", + "mysql": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))", + "oracle": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))", + "postgres": "SELECT CAST(DATE_TRUNC('MONTH', CAST('2008-11-25' AS DATE)) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)", + "presto": "SELECT LAST_DAY_OF_MONTH(CAST('2008-11-25' AS DATE))", + "redshift": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))", + "snowflake": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE), MONTH)", + "spark": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))", + "tsql": "SELECT EOMONTH(CAST('2008-11-25' AS DATE))", + }, + ) + self.validate_all( + "SELECT LAST_DAY(CAST('2008-11-25' AS DATE), QUARTER)", + read={ + "snowflake": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE), QUARTER)", + }, + write={ + "duckdb": UnsupportedError, + "bigquery": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE), QUARTER)", + "snowflake": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE), QUARTER)", + }, + ) + self.validate_all( + "CAST(x AS DATETIME)", + read={ + "": "x::timestamp", + }, + ) + self.validate_all( + "SELECT TIME(15, 30, 00)", + read={ + "duckdb": "SELECT MAKE_TIME(15, 30, 00)", + "mysql": "SELECT MAKETIME(15, 30, 00)", + "postgres": "SELECT MAKE_TIME(15, 30, 00)", + "snowflake": "SELECT TIME_FROM_PARTS(15, 30, 00)", + }, + write={ + "bigquery": "SELECT TIME(15, 30, 00)", + "duckdb": "SELECT MAKE_TIME(15, 30, 00)", + "mysql": "SELECT MAKETIME(15, 30, 00)", + "postgres": "SELECT MAKE_TIME(15, 30, 00)", + "snowflake": "SELECT TIME_FROM_PARTS(15, 30, 00)", + "tsql": "SELECT TIMEFROMPARTS(15, 30, 00, 0, 0)", + }, + ) + self.validate_all( + "SELECT TIME('2008-12-25 15:30:00')", + write={ + "bigquery": "SELECT TIME('2008-12-25 15:30:00')", + "duckdb": "SELECT CAST('2008-12-25 15:30:00' AS TIME)", + "mysql": "SELECT CAST('2008-12-25 15:30:00' AS TIME)", + "postgres": "SELECT CAST('2008-12-25 15:30:00' AS TIME)", + "redshift": "SELECT CAST('2008-12-25 15:30:00' AS TIME)", + "spark": "SELECT CAST('2008-12-25 15:30:00' AS TIMESTAMP)", + "tsql": "SELECT CAST('2008-12-25 15:30:00' AS TIME)", + }, + ) + self.validate_all( + "SELECT FORMAT_DATE('%Y%m%d', '2023-12-25')", + write={ + "bigquery": "SELECT FORMAT_DATE('%Y%m%d', '2023-12-25')", + "duckdb": "SELECT STRFTIME(CAST('2023-12-25' AS DATE), '%Y%m%d')", + }, + ) + self.validate_all( + "SELECT COUNTIF(x)", + read={ + "clickhouse": "SELECT countIf(x)", + "duckdb": "SELECT COUNT_IF(x)", + }, + write={ + "bigquery": "SELECT COUNTIF(x)", + "clickhouse": "SELECT countIf(x)", + "duckdb": "SELECT COUNT_IF(x)", + }, + ) + self.validate_all( + "SELECT TIMESTAMP_DIFF(TIMESTAMP_SECONDS(60), TIMESTAMP_SECONDS(0), minute)", + write={ + "bigquery": "SELECT TIMESTAMP_DIFF(TIMESTAMP_SECONDS(60), TIMESTAMP_SECONDS(0), MINUTE)", + "duckdb": "SELECT DATE_DIFF('MINUTE', TO_TIMESTAMP(0), TO_TIMESTAMP(60))", + "snowflake": "SELECT TIMESTAMPDIFF(MINUTE, TO_TIMESTAMP(0), TO_TIMESTAMP(60))", + }, + ) self.validate_all( "SELECT TIMESTAMP_MICROS(x)", read={ @@ -195,7 +345,7 @@ class TestBigQuery(Validator): write={ "bigquery": "SELECT TIMESTAMP_MICROS(x)", "duckdb": "SELECT MAKE_TIMESTAMP(x)", - "snowflake": "SELECT TO_TIMESTAMP(x / 1000, 3)", + "snowflake": "SELECT TO_TIMESTAMP(x, 6)", "spark": "SELECT TIMESTAMP_MICROS(x)", }, ) @@ -349,7 +499,7 @@ class TestBigQuery(Validator): }, ) self.validate_all( - "WITH cte AS (SELECT [1, 2, 3] AS arr) SELECT IF(pos = pos_2, col, NULL) AS col FROM cte, UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(arr)) - 1)) AS pos CROSS JOIN UNNEST(arr) AS col WITH OFFSET AS pos_2 WHERE pos = pos_2 OR (pos > (ARRAY_LENGTH(arr) - 1) AND pos_2 = (ARRAY_LENGTH(arr) - 1))", + "WITH cte AS (SELECT [1, 2, 3] AS arr) SELECT IF(pos = pos_2, col, NULL) AS col FROM cte CROSS JOIN UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(arr)) - 1)) AS pos CROSS JOIN UNNEST(arr) AS col WITH OFFSET AS pos_2 WHERE pos = pos_2 OR (pos > (ARRAY_LENGTH(arr) - 1) AND pos_2 = (ARRAY_LENGTH(arr) - 1))", read={ "spark": "WITH cte AS (SELECT ARRAY(1, 2, 3) AS arr) SELECT EXPLODE(arr) FROM cte" }, diff --git a/tests/dialects/test_clickhouse.py b/tests/dialects/test_clickhouse.py index bb993b5..84903aa 100644 --- a/tests/dialects/test_clickhouse.py +++ b/tests/dialects/test_clickhouse.py @@ -26,6 +26,7 @@ class TestClickhouse(Validator): self.assertEqual(expr.sql(dialect="clickhouse"), "COUNT(x)") self.assertIsNone(expr._meta) + self.validate_identity("countIf(x, y)") self.validate_identity("x = y") self.validate_identity("x <> y") self.validate_identity("SELECT * FROM (SELECT a FROM b SAMPLE 0.01)") @@ -44,10 +45,10 @@ class TestClickhouse(Validator): self.validate_identity("ATTACH DATABASE DEFAULT ENGINE = ORDINARY") self.validate_identity("CAST(['hello'], 'Array(Enum8(''hello'' = 1))')") self.validate_identity("SELECT x, COUNT() FROM y GROUP BY x WITH TOTALS") - self.validate_identity("SELECT INTERVAL t.days day") + self.validate_identity("SELECT INTERVAL t.days DAY") self.validate_identity("SELECT match('abc', '([a-z]+)')") self.validate_identity("dictGet(x, 'y')") - self.validate_identity("SELECT * FROM final") + self.validate_identity("WITH final AS (SELECT 1) SELECT * FROM final") self.validate_identity("SELECT * FROM x FINAL") self.validate_identity("SELECT * FROM x AS y FINAL") self.validate_identity("'a' IN mapKeys(map('a', 1, 'b', 2))") @@ -60,6 +61,9 @@ class TestClickhouse(Validator): self.validate_identity("SELECT * FROM foo GLOBAL LEFT ANY JOIN bla") self.validate_identity("SELECT quantile(0.5)(a)") self.validate_identity("SELECT quantiles(0.5)(a) AS x FROM t") + self.validate_identity("SELECT quantilesIf(0.5)(a, a > 1) AS x FROM t") + self.validate_identity("SELECT quantileState(0.5)(a) AS x FROM t") + self.validate_identity("SELECT deltaSumMerge(a) AS x FROM t") self.validate_identity("SELECT quantiles(0.1, 0.2, 0.3)(a)") self.validate_identity("SELECT quantileTiming(0.5)(RANGE(100))") self.validate_identity("SELECT histogram(5)(a)") @@ -132,7 +136,7 @@ class TestClickhouse(Validator): }, ) self.validate_all( - "SELECT CAST('2020-01-01' AS TIMESTAMP) + INTERVAL '500' microsecond", + "SELECT CAST('2020-01-01' AS TIMESTAMP) + INTERVAL '500' MICROSECOND", read={ "duckdb": "SELECT TIMESTAMP '2020-01-01' + INTERVAL '500 us'", "postgres": "SELECT TIMESTAMP '2020-01-01' + INTERVAL '500 us'", @@ -175,27 +179,27 @@ class TestClickhouse(Validator): }, ) self.validate_all( - "DATE_ADD('day', 1, x)", + "DATE_ADD(DAY, 1, x)", read={ - "clickhouse": "dateAdd(day, 1, x)", - "presto": "DATE_ADD('day', 1, x)", + "clickhouse": "dateAdd(DAY, 1, x)", + "presto": "DATE_ADD('DAY', 1, x)", }, write={ - "clickhouse": "DATE_ADD('day', 1, x)", - "presto": "DATE_ADD('day', 1, x)", - "": "DATE_ADD(x, 1, 'day')", + "clickhouse": "DATE_ADD(DAY, 1, x)", + "presto": "DATE_ADD('DAY', 1, x)", + "": "DATE_ADD(x, 1, 'DAY')", }, ) self.validate_all( - "DATE_DIFF('day', a, b)", + "DATE_DIFF(DAY, a, b)", read={ - "clickhouse": "dateDiff('day', a, b)", - "presto": "DATE_DIFF('day', a, b)", + "clickhouse": "dateDiff(DAY, a, b)", + "presto": "DATE_DIFF('DAY', a, b)", }, write={ - "clickhouse": "DATE_DIFF('day', a, b)", - "presto": "DATE_DIFF('day', a, b)", - "": "DATEDIFF(b, a, day)", + "clickhouse": "DATE_DIFF(DAY, a, b)", + "presto": "DATE_DIFF('DAY', a, b)", + "": "DATEDIFF(b, a, DAY)", }, ) self.validate_all( @@ -722,3 +726,5 @@ LIFETIME(MIN 0 MAX 0)""", }, pretty=True, ) + self.validate_identity("""CREATE TABLE ip_data (ip4 IPv4, ip6 IPv6) ENGINE=TinyLog()""") + self.validate_identity("""CREATE TABLE dates (dt1 Date32) ENGINE=TinyLog()""") diff --git a/tests/dialects/test_databricks.py b/tests/dialects/test_databricks.py index 7c13e79..8222170 100644 --- a/tests/dialects/test_databricks.py +++ b/tests/dialects/test_databricks.py @@ -1,3 +1,5 @@ +from sqlglot import transpile +from sqlglot.errors import ParseError from tests.dialects.test_dialect import Validator @@ -28,6 +30,14 @@ class TestDatabricks(Validator): "SELECT * FROM sales UNPIVOT EXCLUDE NULLS (sales FOR quarter IN (q1 AS `Jan-Mar`))" ) + self.validate_identity( + "CREATE FUNCTION add_one(x INT) RETURNS INT LANGUAGE PYTHON AS $$def add_one(x):\n return x+1$$" + ) + + self.validate_identity( + "CREATE FUNCTION add_one(x INT) RETURNS INT LANGUAGE PYTHON AS $FOO$def add_one(x):\n return x+1$FOO$" + ) + self.validate_all( "CREATE TABLE foo (x INT GENERATED ALWAYS AS (YEAR(y)))", write={ @@ -42,6 +52,18 @@ class TestDatabricks(Validator): }, ) + with self.assertRaises(ParseError): + transpile( + "CREATE FUNCTION add_one(x INT) RETURNS INT LANGUAGE PYTHON AS $foo$def add_one(x):\n return x+1$$", + read="databricks", + ) + + with self.assertRaises(ParseError): + transpile( + "CREATE FUNCTION add_one(x INT) RETURNS INT LANGUAGE PYTHON AS $foo bar$def add_one(x):\n return x+1$foo bar$", + read="databricks", + ) + # https://docs.databricks.com/sql/language-manual/functions/colonsign.html def test_json(self): self.validate_identity("""SELECT c1 : price FROM VALUES ('{ "price": 5 }') AS T(c1)""") @@ -81,77 +103,77 @@ class TestDatabricks(Validator): self.validate_all( "SELECT DATEDIFF(year, 'start', 'end')", write={ - "tsql": "SELECT DATEDIFF(year, 'start', 'end')", - "databricks": "SELECT DATEDIFF(year, 'start', 'end')", + "tsql": "SELECT DATEDIFF(YEAR, 'start', 'end')", + "databricks": "SELECT DATEDIFF(YEAR, 'start', 'end')", }, ) self.validate_all( "SELECT DATEDIFF(microsecond, 'start', 'end')", write={ - "databricks": "SELECT DATEDIFF(microsecond, 'start', 'end')", + "databricks": "SELECT DATEDIFF(MICROSECOND, 'start', 'end')", "postgres": "SELECT CAST(EXTRACT(epoch FROM CAST('end' AS TIMESTAMP) - CAST('start' AS TIMESTAMP)) * 1000000 AS BIGINT)", }, ) self.validate_all( "SELECT DATEDIFF(millisecond, 'start', 'end')", write={ - "databricks": "SELECT DATEDIFF(millisecond, 'start', 'end')", + "databricks": "SELECT DATEDIFF(MILLISECOND, 'start', 'end')", "postgres": "SELECT CAST(EXTRACT(epoch FROM CAST('end' AS TIMESTAMP) - CAST('start' AS TIMESTAMP)) * 1000 AS BIGINT)", }, ) self.validate_all( "SELECT DATEDIFF(second, 'start', 'end')", write={ - "databricks": "SELECT DATEDIFF(second, 'start', 'end')", + "databricks": "SELECT DATEDIFF(SECOND, 'start', 'end')", "postgres": "SELECT CAST(EXTRACT(epoch FROM CAST('end' AS TIMESTAMP) - CAST('start' AS TIMESTAMP)) AS BIGINT)", }, ) self.validate_all( "SELECT DATEDIFF(minute, 'start', 'end')", write={ - "databricks": "SELECT DATEDIFF(minute, 'start', 'end')", + "databricks": "SELECT DATEDIFF(MINUTE, 'start', 'end')", "postgres": "SELECT CAST(EXTRACT(epoch FROM CAST('end' AS TIMESTAMP) - CAST('start' AS TIMESTAMP)) / 60 AS BIGINT)", }, ) self.validate_all( "SELECT DATEDIFF(hour, 'start', 'end')", write={ - "databricks": "SELECT DATEDIFF(hour, 'start', 'end')", + "databricks": "SELECT DATEDIFF(HOUR, 'start', 'end')", "postgres": "SELECT CAST(EXTRACT(epoch FROM CAST('end' AS TIMESTAMP) - CAST('start' AS TIMESTAMP)) / 3600 AS BIGINT)", }, ) self.validate_all( "SELECT DATEDIFF(day, 'start', 'end')", write={ - "databricks": "SELECT DATEDIFF(day, 'start', 'end')", + "databricks": "SELECT DATEDIFF(DAY, 'start', 'end')", "postgres": "SELECT CAST(EXTRACT(epoch FROM CAST('end' AS TIMESTAMP) - CAST('start' AS TIMESTAMP)) / 86400 AS BIGINT)", }, ) self.validate_all( "SELECT DATEDIFF(week, 'start', 'end')", write={ - "databricks": "SELECT DATEDIFF(week, 'start', 'end')", + "databricks": "SELECT DATEDIFF(WEEK, 'start', 'end')", "postgres": "SELECT CAST(EXTRACT(days FROM (CAST('end' AS TIMESTAMP) - CAST('start' AS TIMESTAMP))) / 7 AS BIGINT)", }, ) self.validate_all( "SELECT DATEDIFF(month, 'start', 'end')", write={ - "databricks": "SELECT DATEDIFF(month, 'start', 'end')", + "databricks": "SELECT DATEDIFF(MONTH, 'start', 'end')", "postgres": "SELECT CAST(EXTRACT(year FROM AGE(CAST('end' AS TIMESTAMP), CAST('start' AS TIMESTAMP))) * 12 + EXTRACT(month FROM AGE(CAST('end' AS TIMESTAMP), CAST('start' AS TIMESTAMP))) AS BIGINT)", }, ) self.validate_all( "SELECT DATEDIFF(quarter, 'start', 'end')", write={ - "databricks": "SELECT DATEDIFF(quarter, 'start', 'end')", + "databricks": "SELECT DATEDIFF(QUARTER, 'start', 'end')", "postgres": "SELECT CAST(EXTRACT(year FROM AGE(CAST('end' AS TIMESTAMP), CAST('start' AS TIMESTAMP))) * 4 + EXTRACT(month FROM AGE(CAST('end' AS TIMESTAMP), CAST('start' AS TIMESTAMP))) / 3 AS BIGINT)", }, ) self.validate_all( "SELECT DATEDIFF(year, 'start', 'end')", write={ - "databricks": "SELECT DATEDIFF(year, 'start', 'end')", + "databricks": "SELECT DATEDIFF(YEAR, 'start', 'end')", "postgres": "SELECT CAST(EXTRACT(year FROM AGE(CAST('end' AS TIMESTAMP), CAST('start' AS TIMESTAMP))) AS BIGINT)", }, ) @@ -160,8 +182,8 @@ class TestDatabricks(Validator): self.validate_all( "SELECT DATEADD(year, 1, '2020-01-01')", write={ - "tsql": "SELECT DATEADD(year, 1, '2020-01-01')", - "databricks": "SELECT DATEADD(year, 1, '2020-01-01')", + "tsql": "SELECT DATEADD(YEAR, 1, '2020-01-01')", + "databricks": "SELECT DATEADD(YEAR, 1, '2020-01-01')", }, ) self.validate_all( diff --git a/tests/dialects/test_dialect.py b/tests/dialects/test_dialect.py index a49d067..3cf4ddc 100644 --- a/tests/dialects/test_dialect.py +++ b/tests/dialects/test_dialect.py @@ -104,6 +104,16 @@ class TestDialect(Validator): "Please use the correct format: 'dialect [, k1 = v2 [, ...]]'.", ) + with self.assertRaises(ValueError) as cm: + Dialect.get_or_raise("myqsl") + + self.assertEqual(str(cm.exception), "Unknown dialect 'myqsl'. Did you mean mysql?") + + with self.assertRaises(ValueError) as cm: + Dialect.get_or_raise("asdfjasodiufjsd") + + self.assertEqual(str(cm.exception), "Unknown dialect 'asdfjasodiufjsd'.") + def test_compare_dialects(self): bigquery_class = Dialect["bigquery"] bigquery_object = BigQuery() @@ -767,32 +777,32 @@ class TestDialect(Validator): "duckdb": "x + INTERVAL 1 DAY", "hive": "DATE_ADD(x, 1)", "mysql": "DATE_ADD(x, INTERVAL 1 DAY)", - "presto": "DATE_ADD('day', 1, x)", + "presto": "DATE_ADD('DAY', 1, x)", "spark": "DATE_ADD(x, 1)", "starrocks": "DATE_ADD(x, INTERVAL 1 DAY)", "doris": "DATE_ADD(x, INTERVAL 1 DAY)", }, ) self.validate_all( - "DATE_TRUNC('day', x)", + "DATE_TRUNC('DAY', x)", read={ "bigquery": "DATE_TRUNC(x, day)", "spark": "TRUNC(x, 'day')", }, write={ - "bigquery": "DATE_TRUNC(x, day)", - "duckdb": "DATE_TRUNC('day', x)", + "bigquery": "DATE_TRUNC(x, DAY)", + "duckdb": "DATE_TRUNC('DAY', x)", "mysql": "DATE(x)", - "presto": "DATE_TRUNC('day', x)", - "postgres": "DATE_TRUNC('day', x)", - "snowflake": "DATE_TRUNC('day', x)", - "starrocks": "DATE_TRUNC('day', x)", - "spark": "TRUNC(x, 'day')", - "doris": "DATE_TRUNC(x, 'day')", + "presto": "DATE_TRUNC('DAY', x)", + "postgres": "DATE_TRUNC('DAY', x)", + "snowflake": "DATE_TRUNC('DAY', x)", + "starrocks": "DATE_TRUNC('DAY', x)", + "spark": "TRUNC(x, 'DAY')", + "doris": "DATE_TRUNC(x, 'DAY')", }, ) self.validate_all( - "TIMESTAMP_TRUNC(x, day)", + "TIMESTAMP_TRUNC(x, DAY)", read={ "bigquery": "TIMESTAMP_TRUNC(x, day)", "duckdb": "DATE_TRUNC('day', x)", @@ -805,18 +815,18 @@ class TestDialect(Validator): }, ) self.validate_all( - "DATE_TRUNC('day', CAST(x AS DATE))", + "DATE_TRUNC('DAY', CAST(x AS DATE))", read={ - "presto": "DATE_TRUNC('day', x::DATE)", - "snowflake": "DATE_TRUNC('day', x::DATE)", + "presto": "DATE_TRUNC('DAY', x::DATE)", + "snowflake": "DATE_TRUNC('DAY', x::DATE)", }, ) self.validate_all( - "TIMESTAMP_TRUNC(CAST(x AS DATE), day)", + "TIMESTAMP_TRUNC(CAST(x AS DATE), DAY)", read={"postgres": "DATE_TRUNC('day', x::DATE)"}, ) self.validate_all( - "TIMESTAMP_TRUNC(CAST(x AS DATE), day)", + "TIMESTAMP_TRUNC(CAST(x AS DATE), DAY)", read={"starrocks": "DATE_TRUNC('day', x::DATE)"}, ) self.validate_all( @@ -850,23 +860,23 @@ class TestDialect(Validator): }, ) self.validate_all( - "DATE_TRUNC('year', x)", + "DATE_TRUNC('YEAR', x)", read={ "bigquery": "DATE_TRUNC(x, year)", "spark": "TRUNC(x, 'year')", }, write={ - "bigquery": "DATE_TRUNC(x, year)", + "bigquery": "DATE_TRUNC(x, YEAR)", "mysql": "STR_TO_DATE(CONCAT(YEAR(x), ' 1 1'), '%Y %c %e')", - "postgres": "DATE_TRUNC('year', x)", - "snowflake": "DATE_TRUNC('year', x)", - "starrocks": "DATE_TRUNC('year', x)", - "spark": "TRUNC(x, 'year')", - "doris": "DATE_TRUNC(x, 'year')", + "postgres": "DATE_TRUNC('YEAR', x)", + "snowflake": "DATE_TRUNC('YEAR', x)", + "starrocks": "DATE_TRUNC('YEAR', x)", + "spark": "TRUNC(x, 'YEAR')", + "doris": "DATE_TRUNC(x, 'YEAR')", }, ) self.validate_all( - "TIMESTAMP_TRUNC(x, year)", + "TIMESTAMP_TRUNC(x, YEAR)", read={ "bigquery": "TIMESTAMP_TRUNC(x, year)", "postgres": "DATE_TRUNC(year, x)", @@ -875,9 +885,9 @@ class TestDialect(Validator): "starrocks": "DATE_TRUNC('year', x)", }, write={ - "bigquery": "TIMESTAMP_TRUNC(x, year)", - "spark": "DATE_TRUNC('year', x)", - "doris": "DATE_TRUNC(x, 'year')", + "bigquery": "TIMESTAMP_TRUNC(x, YEAR)", + "spark": "DATE_TRUNC('YEAR', x)", + "doris": "DATE_TRUNC(x, 'YEAR')", }, ) self.validate_all( @@ -952,7 +962,7 @@ class TestDialect(Validator): "drill": "DATE_ADD(CAST('2020-01-01' AS DATE), INTERVAL 1 DAY)", "duckdb": "CAST('2020-01-01' AS DATE) + INTERVAL 1 DAY", "hive": "DATE_ADD(CAST('2020-01-01' AS DATE), 1)", - "presto": "DATE_ADD('day', 1, CAST('2020-01-01' AS DATE))", + "presto": "DATE_ADD('DAY', 1, CAST('2020-01-01' AS DATE))", "spark": "DATE_ADD(CAST('2020-01-01' AS DATE), 1)", }, ) @@ -1003,7 +1013,7 @@ class TestDialect(Validator): ) self.validate_all( f"{unit}(TS_OR_DS_TO_DATE(x))", - read={ + write={ dialect: f"{unit}(x)" for dialect in ( "mysql", @@ -1011,7 +1021,10 @@ class TestDialect(Validator): "starrocks", ) }, - write={ + ) + self.validate_all( + f"{unit}(CAST(x AS DATE))", + read={ dialect: f"{unit}(x)" for dialect in ( "mysql", @@ -1670,6 +1683,28 @@ class TestDialect(Validator): ) def test_alias(self): + self.validate_all( + 'SELECT 1 AS "foo"', + read={ + "mysql": "SELECT 1 'foo'", + "sqlite": "SELECT 1 'foo'", + "tsql": "SELECT 1 'foo'", + }, + ) + + for dialect in ( + "presto", + "hive", + "postgres", + "clickhouse", + "bigquery", + "snowflake", + "duckdb", + ): + with self.subTest(f"string alias: {dialect}"): + with self.assertRaises(ParseError): + parse_one("SELECT 1 'foo'", dialect=dialect) + self.validate_all( "SELECT a AS b FROM x GROUP BY b", write={ diff --git a/tests/dialects/test_doris.py b/tests/dialects/test_doris.py index 9591269..5ae23ad 100644 --- a/tests/dialects/test_doris.py +++ b/tests/dialects/test_doris.py @@ -4,6 +4,29 @@ from tests.dialects.test_dialect import Validator class TestDoris(Validator): dialect = "doris" + def test_doris(self): + self.validate_all( + "SELECT TO_DATE('2020-02-02 00:00:00')", + write={ + "doris": "SELECT TO_DATE('2020-02-02 00:00:00')", + "oracle": "SELECT CAST('2020-02-02 00:00:00' AS DATE)", + }, + ) + self.validate_all( + "SELECT MAX_BY(a, b), MIN_BY(c, d)", + read={"clickhouse": "SELECT argMax(a, b), argMin(c, d)"}, + ) + self.validate_all( + "SELECT ARRAY_SUM(x -> x * x, ARRAY(2, 3))", + read={ + "clickhouse": "SELECT arraySum(x -> x*x, [2, 3])", + }, + write={ + "clickhouse": "SELECT arraySum(x -> x * x, [2, 3])", + "doris": "SELECT ARRAY_SUM(x -> x * x, ARRAY(2, 3))", + }, + ) + def test_identity(self): self.validate_identity("COALECSE(a, b, c, d)") self.validate_identity("SELECT CAST(`a`.`b` AS INT) FROM foo") diff --git a/tests/dialects/test_duckdb.py b/tests/dialects/test_duckdb.py index f915168..e5f7e0c 100644 --- a/tests/dialects/test_duckdb.py +++ b/tests/dialects/test_duckdb.py @@ -7,6 +7,41 @@ class TestDuckDB(Validator): dialect = "duckdb" def test_duckdb(self): + struct_pack = parse_one('STRUCT_PACK("a b" := 1)', read="duckdb") + self.assertIsInstance(struct_pack.expressions[0].this, exp.Identifier) + self.assertEqual(struct_pack.sql(dialect="duckdb"), "{'a b': 1}") + + self.validate_all( + "SELECT SUM(X) OVER (ORDER BY x)", + write={ + "bigquery": "SELECT SUM(X) OVER (ORDER BY x NULLS LAST)", + "duckdb": "SELECT SUM(X) OVER (ORDER BY x)", + "mysql": "SELECT SUM(X) OVER (ORDER BY CASE WHEN x IS NULL THEN 1 ELSE 0 END, x)", + }, + ) + self.validate_all( + "SELECT SUM(X) OVER (ORDER BY x RANGE BETWEEN 1 PRECEDING AND CURRENT ROW)", + write={ + "bigquery": "SELECT SUM(X) OVER (ORDER BY x RANGE BETWEEN 1 PRECEDING AND CURRENT ROW)", + "duckdb": "SELECT SUM(X) OVER (ORDER BY x RANGE BETWEEN 1 PRECEDING AND CURRENT ROW)", + "mysql": "SELECT SUM(X) OVER (ORDER BY x RANGE BETWEEN 1 PRECEDING AND CURRENT ROW)", + }, + ) + self.validate_all( + "SELECT * FROM x ORDER BY 1 NULLS LAST", + write={ + "duckdb": "SELECT * FROM x ORDER BY 1", + "mysql": "SELECT * FROM x ORDER BY 1", + }, + ) + + self.validate_all( + "CREATE TEMPORARY FUNCTION f1(a, b) AS (a + b)", + read={"bigquery": "CREATE TEMP FUNCTION f1(a INT64, b INT64) AS (a + b)"}, + ) + self.validate_identity("SELECT 1 WHERE x > $1") + self.validate_identity("SELECT 1 WHERE x > $name") + self.assertEqual( parse_one("select * from t limit (select 5)").sql(dialect="duckdb"), exp.select("*").from_("t").limit(exp.select("5").subquery()).sql(dialect="duckdb"), @@ -54,12 +89,36 @@ class TestDuckDB(Validator): }, ) + self.validate_all( + """SELECT JSON('{"fruit":"banana"}') -> 'fruit'""", + write={ + "duckdb": """SELECT JSON('{"fruit":"banana"}') -> 'fruit'""", + "snowflake": """SELECT PARSE_JSON('{"fruit":"banana"}')['fruit']""", + }, + ) + self.validate_all( + """SELECT JSON('{"fruit": {"foo": "banana"}}') -> 'fruit' -> 'foo'""", + write={ + "duckdb": """SELECT JSON('{"fruit": {"foo": "banana"}}') -> 'fruit' -> 'foo'""", + "snowflake": """SELECT PARSE_JSON('{"fruit": {"foo": "banana"}}')['fruit']['foo']""", + }, + ) + self.validate_all( + "WITH _data AS (SELECT [{'a': 1, 'b': 2}, {'a': 2, 'b': 3}] AS col) SELECT (SELECT col['b'] FROM UNNEST(col) AS t(col) WHERE col['a'] = 1) FROM _data", + write={ + "bigquery": "WITH _data AS (SELECT [STRUCT(1 AS a, 2 AS b), STRUCT(2 AS a, 3 AS b)] AS col) SELECT (SELECT col.b FROM UNNEST(col) AS col WHERE col.a = 1) FROM _data", + "duckdb": "WITH _data AS (SELECT [{'a': 1, 'b': 2}, {'a': 2, 'b': 3}] AS col) SELECT (SELECT col['b'] FROM UNNEST(col) AS t(col) WHERE col['a'] = 1) FROM _data", + }, + ) self.validate_all( "SELECT {'bla': column1, 'foo': column2, 'bar': column3} AS data FROM source_table", read={ "bigquery": "SELECT STRUCT(column1 AS bla, column2 AS foo, column3 AS bar) AS data FROM source_table", "duckdb": "SELECT {'bla': column1, 'foo': column2, 'bar': column3} AS data FROM source_table", }, + write={ + "bigquery": "SELECT STRUCT(column1 AS bla, column2 AS foo, column3 AS bar) AS data FROM source_table", + }, ) self.validate_all( "WITH cte(x) AS (SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3) SELECT AVG(x) FILTER (WHERE x > 1) FROM cte", @@ -86,8 +145,8 @@ class TestDuckDB(Validator): self.validate_all( "SELECT UNNEST(ARRAY[1, 2, 3]), UNNEST(ARRAY[4, 5]), UNNEST(ARRAY[6]) FROM x", write={ - "bigquery": "SELECT IF(pos = pos_2, col, NULL) AS col, IF(pos = pos_3, col_2, NULL) AS col_2, IF(pos = pos_4, col_3, NULL) AS col_3 FROM x, UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH([1, 2, 3]), ARRAY_LENGTH([4, 5]), ARRAY_LENGTH([6])) - 1)) AS pos CROSS JOIN UNNEST([1, 2, 3]) AS col WITH OFFSET AS pos_2 CROSS JOIN UNNEST([4, 5]) AS col_2 WITH OFFSET AS pos_3 CROSS JOIN UNNEST([6]) AS col_3 WITH OFFSET AS pos_4 WHERE ((pos = pos_2 OR (pos > (ARRAY_LENGTH([1, 2, 3]) - 1) AND pos_2 = (ARRAY_LENGTH([1, 2, 3]) - 1))) AND (pos = pos_3 OR (pos > (ARRAY_LENGTH([4, 5]) - 1) AND pos_3 = (ARRAY_LENGTH([4, 5]) - 1)))) AND (pos = pos_4 OR (pos > (ARRAY_LENGTH([6]) - 1) AND pos_4 = (ARRAY_LENGTH([6]) - 1)))", - "presto": "SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col, IF(_u.pos = _u_3.pos_3, _u_3.col_2) AS col_2, IF(_u.pos = _u_4.pos_4, _u_4.col_3) AS col_3 FROM x, UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(ARRAY[1, 2, 3]), CARDINALITY(ARRAY[4, 5]), CARDINALITY(ARRAY[6])))) AS _u(pos) CROSS JOIN UNNEST(ARRAY[1, 2, 3]) WITH ORDINALITY AS _u_2(col, pos_2) CROSS JOIN UNNEST(ARRAY[4, 5]) WITH ORDINALITY AS _u_3(col_2, pos_3) CROSS JOIN UNNEST(ARRAY[6]) WITH ORDINALITY AS _u_4(col_3, pos_4) WHERE ((_u.pos = _u_2.pos_2 OR (_u.pos > CARDINALITY(ARRAY[1, 2, 3]) AND _u_2.pos_2 = CARDINALITY(ARRAY[1, 2, 3]))) AND (_u.pos = _u_3.pos_3 OR (_u.pos > CARDINALITY(ARRAY[4, 5]) AND _u_3.pos_3 = CARDINALITY(ARRAY[4, 5])))) AND (_u.pos = _u_4.pos_4 OR (_u.pos > CARDINALITY(ARRAY[6]) AND _u_4.pos_4 = CARDINALITY(ARRAY[6])))", + "bigquery": "SELECT IF(pos = pos_2, col, NULL) AS col, IF(pos = pos_3, col_2, NULL) AS col_2, IF(pos = pos_4, col_3, NULL) AS col_3 FROM x CROSS JOIN UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH([1, 2, 3]), ARRAY_LENGTH([4, 5]), ARRAY_LENGTH([6])) - 1)) AS pos CROSS JOIN UNNEST([1, 2, 3]) AS col WITH OFFSET AS pos_2 CROSS JOIN UNNEST([4, 5]) AS col_2 WITH OFFSET AS pos_3 CROSS JOIN UNNEST([6]) AS col_3 WITH OFFSET AS pos_4 WHERE ((pos = pos_2 OR (pos > (ARRAY_LENGTH([1, 2, 3]) - 1) AND pos_2 = (ARRAY_LENGTH([1, 2, 3]) - 1))) AND (pos = pos_3 OR (pos > (ARRAY_LENGTH([4, 5]) - 1) AND pos_3 = (ARRAY_LENGTH([4, 5]) - 1)))) AND (pos = pos_4 OR (pos > (ARRAY_LENGTH([6]) - 1) AND pos_4 = (ARRAY_LENGTH([6]) - 1)))", + "presto": "SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col, IF(_u.pos = _u_3.pos_3, _u_3.col_2) AS col_2, IF(_u.pos = _u_4.pos_4, _u_4.col_3) AS col_3 FROM x CROSS JOIN UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(ARRAY[1, 2, 3]), CARDINALITY(ARRAY[4, 5]), CARDINALITY(ARRAY[6])))) AS _u(pos) CROSS JOIN UNNEST(ARRAY[1, 2, 3]) WITH ORDINALITY AS _u_2(col, pos_2) CROSS JOIN UNNEST(ARRAY[4, 5]) WITH ORDINALITY AS _u_3(col_2, pos_3) CROSS JOIN UNNEST(ARRAY[6]) WITH ORDINALITY AS _u_4(col_3, pos_4) WHERE ((_u.pos = _u_2.pos_2 OR (_u.pos > CARDINALITY(ARRAY[1, 2, 3]) AND _u_2.pos_2 = CARDINALITY(ARRAY[1, 2, 3]))) AND (_u.pos = _u_3.pos_3 OR (_u.pos > CARDINALITY(ARRAY[4, 5]) AND _u_3.pos_3 = CARDINALITY(ARRAY[4, 5])))) AND (_u.pos = _u_4.pos_4 OR (_u.pos > CARDINALITY(ARRAY[6]) AND _u_4.pos_4 = CARDINALITY(ARRAY[6])))", }, ) self.validate_all( @@ -164,6 +223,13 @@ class TestDuckDB(Validator): self.validate_all("0x1010", write={"": "0 AS x1010"}) self.validate_all("x ~ y", write={"duckdb": "REGEXP_MATCHES(x, y)"}) self.validate_all("SELECT * FROM 'x.y'", write={"duckdb": 'SELECT * FROM "x.y"'}) + self.validate_all( + "SELECT STRFTIME(CAST('2020-01-01' AS TIMESTAMP), CONCAT('%Y', '%m'))", + write={ + "duckdb": "SELECT STRFTIME(CAST('2020-01-01' AS TIMESTAMP), CONCAT('%Y', '%m'))", + "tsql": "SELECT FORMAT(CAST('2020-01-01' AS DATETIME2), CONCAT('yyyy', 'MM'))", + }, + ) self.validate_all( "SELECT * FROM produce PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2'))", read={ @@ -188,7 +254,7 @@ class TestDuckDB(Validator): }, ) self.validate_all( - "DATE_DIFF('day', CAST(b AS DATE), CAST(a AS DATE))", + "DATE_DIFF('DAY', CAST(b AS DATE), CAST(a AS DATE))", read={ "duckdb": "DATE_DIFF('day', CAST(b AS DATE), CAST(a AS DATE))", "hive": "DATEDIFF(a, b)", @@ -226,15 +292,15 @@ class TestDuckDB(Validator): self.validate_all( """SELECT DATEDIFF('day', t1."A", t1."B") FROM "table" AS t1""", write={ - "duckdb": """SELECT DATE_DIFF('day', t1."A", t1."B") FROM "table" AS t1""", - "trino": """SELECT DATE_DIFF('day', t1."A", t1."B") FROM "table" AS t1""", + "duckdb": """SELECT DATE_DIFF('DAY', t1."A", t1."B") FROM "table" AS t1""", + "trino": """SELECT DATE_DIFF('DAY', t1."A", t1."B") FROM "table" AS t1""", }, ) self.validate_all( "SELECT DATE_DIFF('day', DATE '2020-01-01', DATE '2020-01-05')", write={ - "duckdb": "SELECT DATE_DIFF('day', CAST('2020-01-01' AS DATE), CAST('2020-01-05' AS DATE))", - "trino": "SELECT DATE_DIFF('day', CAST('2020-01-01' AS DATE), CAST('2020-01-05' AS DATE))", + "duckdb": "SELECT DATE_DIFF('DAY', CAST('2020-01-01' AS DATE), CAST('2020-01-05' AS DATE))", + "trino": "SELECT DATE_DIFF('DAY', CAST('2020-01-01' AS DATE), CAST('2020-01-05' AS DATE))", }, ) self.validate_all( @@ -366,7 +432,7 @@ class TestDuckDB(Validator): }, ) self.validate_all( - "IF(y <> 0, x / y, NULL)", + "IF((y) <> 0, (x) / (y), NULL)", read={ "bigquery": "SAFE_DIVIDE(x, y)", }, @@ -374,6 +440,7 @@ class TestDuckDB(Validator): self.validate_all( "STRUCT_PACK(x := 1, y := '2')", write={ + "bigquery": "STRUCT(1 AS x, '2' AS y)", "duckdb": "{'x': 1, 'y': '2'}", "spark": "STRUCT(1 AS x, '2' AS y)", }, @@ -381,6 +448,7 @@ class TestDuckDB(Validator): self.validate_all( "STRUCT_PACK(key1 := 'value1', key2 := 42)", write={ + "bigquery": "STRUCT('value1' AS key1, 42 AS key2)", "duckdb": "{'key1': 'value1', 'key2': 42}", "spark": "STRUCT('value1' AS key1, 42 AS key2)", }, @@ -478,6 +546,8 @@ class TestDuckDB(Validator): "SELECT CAST('2020-05-06' AS DATE) + INTERVAL 5 DAY", read={"bigquery": "SELECT DATE_ADD(CAST('2020-05-06' AS DATE), INTERVAL 5 DAY)"}, ) + self.validate_identity("SELECT PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY y DESC) FROM t") + self.validate_identity("SELECT PERCENTILE_DISC(0.25) WITHIN GROUP (ORDER BY y DESC) FROM t") self.validate_all( "SELECT QUANTILE_CONT(x, q) FROM t", write={ @@ -513,6 +583,14 @@ class TestDuckDB(Validator): self.validate_identity("SELECT ISNAN(x)") + self.validate_all( + "SELECT COUNT_IF(x)", + write={ + "duckdb": "SELECT COUNT_IF(x)", + "bigquery": "SELECT COUNTIF(x)", + }, + ) + def test_array_index(self): with self.assertLogs(helper_logger) as cm: self.validate_all( @@ -569,10 +647,10 @@ class TestDuckDB(Validator): ) self.validate_all( "SELECT INTERVAL '1 quarter'", - write={"duckdb": "SELECT (90 * INTERVAL '1' day)"}, + write={"duckdb": "SELECT (90 * INTERVAL '1' DAY)"}, ) self.validate_all( - "SELECT ((DATE_TRUNC('DAY', CAST(CAST(DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS DATE) AS TIMESTAMP) + INTERVAL (0 - MOD((DAYOFWEEK(CAST(CAST(DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS DATE) AS TIMESTAMP)) % 7) - 1 + 7, 7)) day) + (7 * INTERVAL (-5) day))) AS t1", + "SELECT ((DATE_TRUNC('DAY', CAST(CAST(DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS DATE) AS TIMESTAMP) + INTERVAL (0 - MOD((DAYOFWEEK(CAST(CAST(DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS DATE) AS TIMESTAMP)) % 7) - 1 + 7, 7)) DAY) + (7 * INTERVAL (-5) DAY))) AS t1", read={ "presto": "SELECT ((DATE_ADD('week', -5, DATE_TRUNC('DAY', DATE_ADD('day', (0 - MOD((DAY_OF_WEEK(CAST(CAST(DATE_TRUNC('DAY', NOW()) AS DATE) AS TIMESTAMP)) % 7) - 1 + 7, 7)), CAST(CAST(DATE_TRUNC('DAY', NOW()) AS DATE) AS TIMESTAMP)))))) AS t1", }, @@ -594,7 +672,7 @@ class TestDuckDB(Validator): write={ "bigquery": "TIMESTAMP_MILLIS(x)", "duckdb": "EPOCH_MS(x)", - "presto": "FROM_UNIXTIME(CAST(x AS DOUBLE) / 1000)", + "presto": "FROM_UNIXTIME(CAST(x AS DOUBLE) / POW(10, 3))", "spark": "TIMESTAMP_MILLIS(x)", }, ) @@ -659,36 +737,44 @@ class TestDuckDB(Validator): ) def test_sample(self): - self.validate_all( + self.validate_identity( "SELECT * FROM tbl USING SAMPLE 5", - write={"duckdb": "SELECT * FROM tbl USING SAMPLE (5)"}, + "SELECT * FROM tbl USING SAMPLE (5 ROWS)", ) - self.validate_all( + self.validate_identity( "SELECT * FROM tbl USING SAMPLE 10%", - write={"duckdb": "SELECT * FROM tbl USING SAMPLE (10 PERCENT)"}, + "SELECT * FROM tbl USING SAMPLE (10 PERCENT)", ) - self.validate_all( + self.validate_identity( "SELECT * FROM tbl USING SAMPLE 10 PERCENT (bernoulli)", - write={"duckdb": "SELECT * FROM tbl USING SAMPLE BERNOULLI (10 PERCENT)"}, + "SELECT * FROM tbl USING SAMPLE BERNOULLI (10 PERCENT)", ) - self.validate_all( + self.validate_identity( "SELECT * FROM tbl USING SAMPLE reservoir(50 ROWS) REPEATABLE (100)", - write={"duckdb": "SELECT * FROM tbl USING SAMPLE RESERVOIR (50 ROWS) REPEATABLE (100)"}, + "SELECT * FROM tbl USING SAMPLE RESERVOIR (50 ROWS) REPEATABLE (100)", ) - self.validate_all( + self.validate_identity( "SELECT * FROM tbl USING SAMPLE 10% (system, 377)", - write={"duckdb": "SELECT * FROM tbl USING SAMPLE SYSTEM (10 PERCENT) REPEATABLE (377)"}, + "SELECT * FROM tbl USING SAMPLE SYSTEM (10 PERCENT) REPEATABLE (377)", ) - self.validate_all( + self.validate_identity( "SELECT * FROM tbl TABLESAMPLE RESERVOIR(20%), tbl2 WHERE tbl.i=tbl2.i", - write={ - "duckdb": "SELECT * FROM tbl TABLESAMPLE RESERVOIR (20 PERCENT), tbl2 WHERE tbl.i = tbl2.i" - }, + "SELECT * FROM tbl TABLESAMPLE RESERVOIR (20 PERCENT), tbl2 WHERE tbl.i = tbl2.i", ) - self.validate_all( + self.validate_identity( "SELECT * FROM tbl, tbl2 WHERE tbl.i=tbl2.i USING SAMPLE RESERVOIR(20%)", + "SELECT * FROM tbl, tbl2 WHERE tbl.i = tbl2.i USING SAMPLE RESERVOIR (20 PERCENT)", + ) + + self.validate_all( + "SELECT * FROM example TABLESAMPLE (3 ROWS) REPEATABLE (82)", + read={ + "duckdb": "SELECT * FROM example TABLESAMPLE (3) REPEATABLE (82)", + "snowflake": "SELECT * FROM example SAMPLE (3 ROWS) SEED (82)", + }, write={ - "duckdb": "SELECT * FROM tbl, tbl2 WHERE tbl.i = tbl2.i USING SAMPLE RESERVOIR (20 PERCENT)" + "duckdb": "SELECT * FROM example TABLESAMPLE (3 ROWS) REPEATABLE (82)", + "snowflake": "SELECT * FROM example TABLESAMPLE (3 ROWS) SEED (82)", }, ) diff --git a/tests/dialects/test_hive.py b/tests/dialects/test_hive.py index b3366a2..8b5a945 100644 --- a/tests/dialects/test_hive.py +++ b/tests/dialects/test_hive.py @@ -296,11 +296,11 @@ class TestHive(Validator): self.validate_all( "DATEDIFF(a, b)", write={ - "duckdb": "DATE_DIFF('day', CAST(b AS DATE), CAST(a AS DATE))", - "presto": "DATE_DIFF('day', CAST(CAST(b AS TIMESTAMP) AS DATE), CAST(CAST(a AS TIMESTAMP) AS DATE))", + "duckdb": "DATE_DIFF('DAY', CAST(b AS DATE), CAST(a AS DATE))", + "presto": "DATE_DIFF('DAY', CAST(CAST(b AS TIMESTAMP) AS DATE), CAST(CAST(a AS TIMESTAMP) AS DATE))", "hive": "DATEDIFF(TO_DATE(a), TO_DATE(b))", "spark": "DATEDIFF(TO_DATE(a), TO_DATE(b))", - "": "DATEDIFF(TS_OR_DS_TO_DATE(a), TS_OR_DS_TO_DATE(b))", + "": "DATEDIFF(CAST(a AS DATE), CAST(b AS DATE))", }, ) self.validate_all( @@ -358,11 +358,11 @@ class TestHive(Validator): self.validate_all( "DATEDIFF(TO_DATE(y), x)", write={ - "duckdb": "DATE_DIFF('day', CAST(x AS DATE), CAST(CAST(y AS DATE) AS DATE))", - "presto": "DATE_DIFF('day', CAST(CAST(x AS TIMESTAMP) AS DATE), CAST(CAST(CAST(CAST(y AS TIMESTAMP) AS DATE) AS TIMESTAMP) AS DATE))", + "duckdb": "DATE_DIFF('DAY', CAST(x AS DATE), CAST(y AS DATE))", + "presto": "DATE_DIFF('DAY', CAST(CAST(x AS TIMESTAMP) AS DATE), CAST(CAST(CAST(CAST(y AS TIMESTAMP) AS DATE) AS TIMESTAMP) AS DATE))", "hive": "DATEDIFF(TO_DATE(y), TO_DATE(x))", "spark": "DATEDIFF(TO_DATE(y), TO_DATE(x))", - "": "DATEDIFF(TS_OR_DS_TO_DATE(TS_OR_DS_TO_DATE(y)), TS_OR_DS_TO_DATE(x))", + "": "DATEDIFF(CAST(y AS DATE), CAST(x AS DATE))", }, ) self.validate_all( @@ -399,6 +399,7 @@ class TestHive(Validator): ) def test_hive(self): + self.validate_identity("(VALUES (1 AS a, 2 AS b, 3))") self.validate_identity("SELECT * FROM my_table TIMESTAMP AS OF DATE_ADD(CURRENT_DATE, -1)") self.validate_identity("SELECT * FROM my_table VERSION AS OF DATE_ADD(CURRENT_DATE, -1)") @@ -737,9 +738,11 @@ class TestHive(Validator): "SELECT * FROM x TABLESAMPLE (1 PERCENT) AS foo", read={ "presto": "SELECT * FROM x AS foo TABLESAMPLE BERNOULLI (1)", + "snowflake": "SELECT * FROM x AS foo TABLESAMPLE (1)", }, write={ "hive": "SELECT * FROM x TABLESAMPLE (1 PERCENT) AS foo", + "snowflake": "SELECT * FROM x AS foo TABLESAMPLE (1)", "spark": "SELECT * FROM x TABLESAMPLE (1 PERCENT) AS foo", }, ) diff --git a/tests/dialects/test_mysql.py b/tests/dialects/test_mysql.py index 19245f0..85bf261 100644 --- a/tests/dialects/test_mysql.py +++ b/tests/dialects/test_mysql.py @@ -533,45 +533,45 @@ class TestMySQL(Validator): self.validate_all( "SELECT DATEDIFF(x, y)", read={ - "presto": "SELECT DATE_DIFF('day', y, x)", - "redshift": "SELECT DATEDIFF(day, y, x)", + "presto": "SELECT DATE_DIFF('DAY', y, x)", + "redshift": "SELECT DATEDIFF(DAY, y, x)", }, write={ "mysql": "SELECT DATEDIFF(x, y)", - "presto": "SELECT DATE_DIFF('day', y, x)", - "redshift": "SELECT DATEDIFF(day, y, x)", + "presto": "SELECT DATE_DIFF('DAY', y, x)", + "redshift": "SELECT DATEDIFF(DAY, y, x)", }, ) self.validate_all( "DAYOFYEAR(x)", write={ "mysql": "DAYOFYEAR(x)", - "": "DAY_OF_YEAR(TS_OR_DS_TO_DATE(x))", + "": "DAY_OF_YEAR(CAST(x AS DATE))", }, ) self.validate_all( "DAYOFMONTH(x)", - write={"mysql": "DAYOFMONTH(x)", "": "DAY_OF_MONTH(TS_OR_DS_TO_DATE(x))"}, + write={"mysql": "DAYOFMONTH(x)", "": "DAY_OF_MONTH(CAST(x AS DATE))"}, ) self.validate_all( "DAYOFWEEK(x)", - write={"mysql": "DAYOFWEEK(x)", "": "DAY_OF_WEEK(TS_OR_DS_TO_DATE(x))"}, + write={"mysql": "DAYOFWEEK(x)", "": "DAY_OF_WEEK(CAST(x AS DATE))"}, ) self.validate_all( "WEEKOFYEAR(x)", - write={"mysql": "WEEKOFYEAR(x)", "": "WEEK_OF_YEAR(TS_OR_DS_TO_DATE(x))"}, + write={"mysql": "WEEKOFYEAR(x)", "": "WEEK_OF_YEAR(CAST(x AS DATE))"}, ) self.validate_all( "DAY(x)", - write={"mysql": "DAY(x)", "": "DAY(TS_OR_DS_TO_DATE(x))"}, + write={"mysql": "DAY(x)", "": "DAY(CAST(x AS DATE))"}, ) self.validate_all( "WEEK(x)", - write={"mysql": "WEEK(x)", "": "WEEK(TS_OR_DS_TO_DATE(x))"}, + write={"mysql": "WEEK(x)", "": "WEEK(CAST(x AS DATE))"}, ) self.validate_all( "YEAR(x)", - write={"mysql": "YEAR(x)", "": "YEAR(TS_OR_DS_TO_DATE(x))"}, + write={"mysql": "YEAR(x)", "": "YEAR(CAST(x AS DATE))"}, ) self.validate_all( "DATE(x)", @@ -1005,7 +1005,7 @@ COMMENT='客户账户表'""" self.validate_all( "MONTHNAME(x)", write={ - "": "TIME_TO_STR(TS_OR_DS_TO_DATE(x), '%B')", + "": "TIME_TO_STR(CAST(x AS DATE), '%B')", "mysql": "DATE_FORMAT(x, '%M')", }, ) diff --git a/tests/dialects/test_oracle.py b/tests/dialects/test_oracle.py index e9ebac1..fce714e 100644 --- a/tests/dialects/test_oracle.py +++ b/tests/dialects/test_oracle.py @@ -7,14 +7,13 @@ class TestOracle(Validator): dialect = "oracle" def test_oracle(self): - self.validate_identity("ALTER TABLE tbl_name DROP FOREIGN KEY fk_symbol") - self.assertIsInstance( - parse_one("ALTER TABLE tbl_name DROP FOREIGN KEY fk_symbol", dialect="oracle"), - exp.AlterTable, - ) - self.validate_identity( - "ALTER TABLE Payments ADD (Stock NUMBER NOT NULL, dropid VARCHAR2(500) NOT NULL)" + self.validate_identity("REGEXP_REPLACE('source', 'search')") + parse_one("ALTER TABLE tbl_name DROP FOREIGN KEY fk_symbol", dialect="oracle").assert_is( + exp.AlterTable ) + + self.validate_identity("CURRENT_TIMESTAMP(precision)") + self.validate_identity("ALTER TABLE tbl_name DROP FOREIGN KEY fk_symbol") self.validate_identity("ALTER TABLE Payments ADD Stock NUMBER NOT NULL") self.validate_identity("SELECT x FROM t WHERE cond FOR UPDATE") self.validate_identity("SELECT JSON_OBJECT(k1: v1 FORMAT JSON, k2: v2 FORMAT JSON)") @@ -34,7 +33,14 @@ class TestOracle(Validator): self.validate_identity("SELECT STANDARD_HASH('hello', 'MD5')") self.validate_identity("SELECT * FROM table_name@dblink_name.database_link_domain") self.validate_identity("SELECT * FROM table_name SAMPLE (25) s") + self.validate_identity("SELECT COUNT(*) * 10 FROM orders SAMPLE (10) SEED (1)") self.validate_identity("SELECT * FROM V$SESSION") + self.validate_identity( + "SELECT last_name, employee_id, manager_id, LEVEL FROM employees START WITH employee_id = 100 CONNECT BY PRIOR employee_id = manager_id ORDER SIBLINGS BY last_name" + ) + self.validate_identity( + "ALTER TABLE Payments ADD (Stock NUMBER NOT NULL, dropid VARCHAR2(500) NOT NULL)" + ) self.validate_identity( "SELECT JSON_ARRAYAGG(JSON_OBJECT('RNK': RNK, 'RATING_CODE': RATING_CODE, 'DATE_VALUE': DATE_VALUE, 'AGENT_ID': AGENT_ID RETURNING CLOB) RETURNING CLOB) AS JSON_DATA FROM tablename" ) @@ -50,6 +56,18 @@ class TestOracle(Validator): self.validate_identity( "SELECT MIN(column_name) KEEP (DENSE_RANK FIRST ORDER BY column_name DESC) FROM table_name" ) + self.validate_identity( + """SELECT JSON_OBJECT(KEY 'key1' IS emp.column1, KEY 'key2' IS emp.column1) "emp_key" FROM emp""", + """SELECT JSON_OBJECT('key1': emp.column1, 'key2': emp.column1) AS "emp_key" FROM emp""", + ) + self.validate_identity( + "SELECT JSON_OBJECTAGG(KEY department_name VALUE department_id) FROM dep WHERE id <= 30", + "SELECT JSON_OBJECTAGG(department_name: department_id) FROM dep WHERE id <= 30", + ) + self.validate_identity( + "SYSDATE", + "CURRENT_TIMESTAMP", + ) self.validate_identity( "SELECT last_name, department_id, salary, MIN(salary) KEEP (DENSE_RANK FIRST ORDER BY commission_pct) " 'OVER (PARTITION BY department_id) AS "Worst", MAX(salary) KEEP (DENSE_RANK LAST ORDER BY commission_pct) ' @@ -131,6 +149,20 @@ class TestOracle(Validator): "postgres": "CAST(x AS sch.udt)", }, ) + self.validate_all( + "SELECT TO_TIMESTAMP('2024-12-12 12:12:12.000000', 'YYYY-MM-DD HH24:MI:SS.FF6')", + write={ + "oracle": "SELECT TO_TIMESTAMP('2024-12-12 12:12:12.000000', 'YYYY-MM-DD HH24:MI:SS.FF6')", + "duckdb": "SELECT STRPTIME('2024-12-12 12:12:12.000000', '%Y-%m-%d %H:%M:%S.%f')", + }, + ) + self.validate_all( + "SELECT TO_DATE('2024-12-12', 'YYYY-MM-DD')", + write={ + "oracle": "SELECT TO_DATE('2024-12-12', 'YYYY-MM-DD')", + "duckdb": "SELECT CAST(STRPTIME('2024-12-12', '%Y-%m-%d') AS DATE)", + }, + ) def test_join_marker(self): self.validate_identity("SELECT e1.x, e2.x FROM e e1, e e2 WHERE e1.y (+) = e2.y") diff --git a/tests/dialects/test_postgres.py b/tests/dialects/test_postgres.py index 9155696..f46eeba 100644 --- a/tests/dialects/test_postgres.py +++ b/tests/dialects/test_postgres.py @@ -1,4 +1,4 @@ -from sqlglot import ParseError, exp, parse_one, transpile +from sqlglot import ParseError, UnsupportedError, exp, parse_one, transpile from sqlglot.helper import logger as helper_logger from tests.dialects.test_dialect import Validator @@ -11,7 +11,7 @@ class TestPostgres(Validator): expr = parse_one("CREATE TABLE t (x INTERVAL day)", read="postgres") cdef = expr.find(exp.ColumnDef) cdef.args["kind"].assert_is(exp.DataType) - self.assertEqual(expr.sql(dialect="postgres"), "CREATE TABLE t (x INTERVAL day)") + self.assertEqual(expr.sql(dialect="postgres"), "CREATE TABLE t (x INTERVAL DAY)") self.validate_identity("CREATE INDEX idx_x ON x USING BTREE(x, y) WHERE (NOT y IS NULL)") self.validate_identity("CREATE TABLE test (elems JSONB[])") @@ -74,6 +74,12 @@ class TestPostgres(Validator): "CREATE TABLE test (x TIMESTAMP WITHOUT TIME ZONE[][])", "CREATE TABLE test (x TIMESTAMP[][])", ) + self.validate_identity( + "CREATE UNLOGGED TABLE foo AS WITH t(c) AS (SELECT 1) SELECT * FROM (SELECT c AS c FROM t) AS temp" + ) + self.validate_identity( + "WITH t(c) AS (SELECT 1) SELECT * INTO UNLOGGED foo FROM (SELECT c AS c FROM t) AS temp" + ) self.validate_all( "CREATE OR REPLACE FUNCTION function_name (input_a character varying DEFAULT NULL::character varying)", @@ -158,7 +164,7 @@ class TestPostgres(Validator): write={ "hive": "SELECT EXPLODE(c) FROM t", "postgres": "SELECT UNNEST(c) FROM t", - "presto": "SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col FROM t, UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(c)))) AS _u(pos) CROSS JOIN UNNEST(c) WITH ORDINALITY AS _u_2(col, pos_2) WHERE _u.pos = _u_2.pos_2 OR (_u.pos > CARDINALITY(c) AND _u_2.pos_2 = CARDINALITY(c))", + "presto": "SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col FROM t CROSS JOIN UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(c)))) AS _u(pos) CROSS JOIN UNNEST(c) WITH ORDINALITY AS _u_2(col, pos_2) WHERE _u.pos = _u_2.pos_2 OR (_u.pos > CARDINALITY(c) AND _u_2.pos_2 = CARDINALITY(c))", }, ) self.validate_all( @@ -262,6 +268,7 @@ class TestPostgres(Validator): self.validate_identity( "SELECT * FROM JSON_ARRAY_ELEMENTS('[1,true, [2,false]]') WITH ORDINALITY AS kv_json(a, b)" ) + self.validate_identity("SELECT * FROM t TABLESAMPLE SYSTEM (50) REPEATABLE (55)") self.validate_identity("x @@ y") self.validate_identity("CAST(x AS MONEY)") self.validate_identity("CAST(x AS INT4RANGE)") @@ -277,7 +284,7 @@ class TestPostgres(Validator): self.validate_identity("CAST(x AS DATERANGE)") self.validate_identity("CAST(x AS DATEMULTIRANGE)") self.validate_identity( - """LAST_VALUE("col1") OVER (ORDER BY "col2" RANGE BETWEEN INTERVAL '1 day' PRECEDING AND '1 month' FOLLOWING)""" + """LAST_VALUE("col1") OVER (ORDER BY "col2" RANGE BETWEEN INTERVAL '1 DAY' PRECEDING AND '1 month' FOLLOWING)""" ) self.validate_identity("SELECT ARRAY[1, 2, 3] @> ARRAY[1, 2]") self.validate_identity("SELECT ARRAY[1, 2, 3] <@ ARRAY[1, 2]") @@ -325,6 +332,13 @@ class TestPostgres(Validator): "SELECT TO_TIMESTAMP(1284352323.5), TO_TIMESTAMP('05 Dec 2000', 'DD Mon YYYY')" ) + self.validate_all( + "SELECT * FROM t TABLESAMPLE SYSTEM (50)", + write={ + "postgres": "SELECT * FROM t TABLESAMPLE SYSTEM (50)", + "redshift": UnsupportedError, + }, + ) self.validate_all( "SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY amount)", write={ @@ -387,17 +401,17 @@ class TestPostgres(Validator): self.validate_all( "GENERATE_SERIES(a, b, ' 2 days ')", write={ - "postgres": "GENERATE_SERIES(a, b, INTERVAL '2 days')", - "presto": "SEQUENCE(a, b, INTERVAL '2' day)", - "trino": "SEQUENCE(a, b, INTERVAL '2' day)", + "postgres": "GENERATE_SERIES(a, b, INTERVAL '2 DAYS')", + "presto": "SEQUENCE(a, b, INTERVAL '2' DAY)", + "trino": "SEQUENCE(a, b, INTERVAL '2' DAY)", }, ) self.validate_all( "GENERATE_SERIES('2019-01-01'::TIMESTAMP, NOW(), '1day')", write={ - "postgres": "GENERATE_SERIES(CAST('2019-01-01' AS TIMESTAMP), CURRENT_TIMESTAMP, INTERVAL '1 day')", - "presto": "SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' day)", - "trino": "SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' day)", + "postgres": "GENERATE_SERIES(CAST('2019-01-01' AS TIMESTAMP), CURRENT_TIMESTAMP, INTERVAL '1 DAY')", + "presto": "SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' DAY)", + "trino": "SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' DAY)", }, ) self.validate_all( @@ -657,7 +671,7 @@ class TestPostgres(Validator): ) self.validate_all( "'45 days'::interval day", - write={"postgres": "CAST('45 days' AS INTERVAL day)"}, + write={"postgres": "CAST('45 days' AS INTERVAL DAY)"}, ) self.validate_all( "'x' 'y' 'z'", @@ -726,6 +740,7 @@ class TestPostgres(Validator): """merge into x as x using (select id) as y on a = b WHEN matched then update set X."A" = y.b""", write={ "postgres": """MERGE INTO x AS x USING (SELECT id) AS y ON a = b WHEN MATCHED THEN UPDATE SET "A" = y.b""", + "trino": """MERGE INTO x AS x USING (SELECT id) AS y ON a = b WHEN MATCHED THEN UPDATE SET "A" = y.b""", "snowflake": """MERGE INTO x AS x USING (SELECT id) AS y ON a = b WHEN MATCHED THEN UPDATE SET X."A" = y.b""", }, ) diff --git a/tests/dialects/test_presto.py b/tests/dialects/test_presto.py index 8b5080c..387b0e0 100644 --- a/tests/dialects/test_presto.py +++ b/tests/dialects/test_presto.py @@ -182,12 +182,12 @@ class TestPresto(Validator): def test_interval_plural_to_singular(self): # Microseconds, weeks and quarters are not supported in Presto/Trino INTERVAL literals unit_to_expected = { - "SeCoNds": "second", - "minutes": "minute", - "hours": "hour", - "days": "day", - "months": "month", - "years": "year", + "SeCoNds": "SECOND", + "minutes": "MINUTE", + "hours": "HOUR", + "days": "DAY", + "months": "MONTH", + "years": "YEAR", } for unit, expected in unit_to_expected.items(): @@ -279,10 +279,10 @@ class TestPresto(Validator): }, ) self.validate_all( - "DATE_ADD('day', 1, x)", + "DATE_ADD('DAY', 1, x)", write={ - "duckdb": "x + INTERVAL 1 day", - "presto": "DATE_ADD('day', 1, x)", + "duckdb": "x + INTERVAL 1 DAY", + "presto": "DATE_ADD('DAY', 1, x)", "hive": "DATE_ADD(x, 1)", "spark": "DATE_ADD(x, 1)", }, @@ -582,6 +582,8 @@ class TestPresto(Validator): }, ) + self.validate_identity("SELECT a FROM test TABLESAMPLE BERNOULLI (50)") + self.validate_identity("SELECT a FROM test TABLESAMPLE SYSTEM (75)") self.validate_identity("string_agg(x, ',')", "ARRAY_JOIN(ARRAY_AGG(x), ',')") self.validate_identity("SELECT * FROM x OFFSET 1 LIMIT 1") self.validate_identity("SELECT * FROM x OFFSET 1 FETCH FIRST 1 ROWS ONLY") @@ -600,6 +602,16 @@ class TestPresto(Validator): "SELECT * FROM example.testdb.customer_orders FOR TIMESTAMP AS OF CAST('2022-03-23 09:59:29.803 Europe/Vienna' AS TIMESTAMP)" ) + self.validate_all( + "SELECT LAST_DAY_OF_MONTH(CAST('2008-11-25' AS DATE))", + read={ + "duckdb": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))", + }, + write={ + "duckdb": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))", + "presto": "SELECT LAST_DAY_OF_MONTH(CAST('2008-11-25' AS DATE))", + }, + ) self.validate_all( "SELECT MAX_BY(a.id, a.timestamp) FROM a", read={ @@ -707,9 +719,9 @@ class TestPresto(Validator): }, ) self.validate_all("VALUES 1, 2, 3", write={"presto": "VALUES (1), (2), (3)"}) - self.validate_all("INTERVAL '1 day'", write={"trino": "INTERVAL '1' day"}) - self.validate_all("(5 * INTERVAL '7' day)", read={"": "INTERVAL '5' week"}) - self.validate_all("(5 * INTERVAL '7' day)", read={"": "INTERVAL '5' WEEKS"}) + self.validate_all("INTERVAL '1 day'", write={"trino": "INTERVAL '1' DAY"}) + self.validate_all("(5 * INTERVAL '7' DAY)", read={"": "INTERVAL '5' WEEK"}) + self.validate_all("(5 * INTERVAL '7' DAY)", read={"": "INTERVAL '5' WEEKS"}) self.validate_all( "SELECT SUBSTRING(a, 1, 3), SUBSTRING(a, LENGTH(a) - (3 - 1))", read={ diff --git a/tests/dialects/test_redshift.py b/tests/dialects/test_redshift.py index c6be789..88fef67 100644 --- a/tests/dialects/test_redshift.py +++ b/tests/dialects/test_redshift.py @@ -6,6 +6,7 @@ class TestRedshift(Validator): dialect = "redshift" def test_redshift(self): + self.validate_identity("CREATE MATERIALIZED VIEW orders AUTO REFRESH YES AS SELECT 1") self.validate_all( "LISTAGG(sellerid, ', ')", read={ @@ -55,7 +56,7 @@ class TestRedshift(Validator): }, ) self.validate_all( - "SELECT INTERVAL '5 days'", + "SELECT INTERVAL '5 DAYS'", read={ "": "SELECT INTERVAL '5' days", }, @@ -64,10 +65,10 @@ class TestRedshift(Validator): "SELECT ADD_MONTHS('2008-03-31', 1)", write={ "bigquery": "SELECT DATE_ADD(CAST('2008-03-31' AS DATETIME), INTERVAL 1 MONTH)", - "duckdb": "SELECT CAST('2008-03-31' AS TIMESTAMP) + INTERVAL 1 month", - "redshift": "SELECT DATEADD(month, 1, '2008-03-31')", - "trino": "SELECT DATE_ADD('month', 1, CAST('2008-03-31' AS TIMESTAMP))", - "tsql": "SELECT DATEADD(month, 1, CAST('2008-03-31' AS DATETIME2))", + "duckdb": "SELECT CAST('2008-03-31' AS TIMESTAMP) + INTERVAL 1 MONTH", + "redshift": "SELECT DATEADD(MONTH, 1, '2008-03-31')", + "trino": "SELECT DATE_ADD('MONTH', 1, CAST('2008-03-31' AS TIMESTAMP))", + "tsql": "SELECT DATEADD(MONTH, 1, CAST('2008-03-31' AS DATETIME2))", }, ) self.validate_all( @@ -234,45 +235,45 @@ class TestRedshift(Validator): self.validate_all( "DATEDIFF('day', a, b)", write={ - "bigquery": "DATE_DIFF(CAST(b AS DATETIME), CAST(a AS DATETIME), day)", - "duckdb": "DATE_DIFF('day', CAST(a AS TIMESTAMP), CAST(b AS TIMESTAMP))", + "bigquery": "DATE_DIFF(CAST(b AS DATETIME), CAST(a AS DATETIME), DAY)", + "duckdb": "DATE_DIFF('DAY', CAST(a AS TIMESTAMP), CAST(b AS TIMESTAMP))", "hive": "DATEDIFF(b, a)", - "redshift": "DATEDIFF(day, a, b)", - "presto": "DATE_DIFF('day', CAST(a AS TIMESTAMP), CAST(b AS TIMESTAMP))", + "redshift": "DATEDIFF(DAY, a, b)", + "presto": "DATE_DIFF('DAY', CAST(a AS TIMESTAMP), CAST(b AS TIMESTAMP))", }, ) self.validate_all( "SELECT DATEADD(month, 18, '2008-02-28')", write={ "bigquery": "SELECT DATE_ADD(CAST('2008-02-28' AS DATETIME), INTERVAL 18 MONTH)", - "duckdb": "SELECT CAST('2008-02-28' AS TIMESTAMP) + INTERVAL 18 month", + "duckdb": "SELECT CAST('2008-02-28' AS TIMESTAMP) + INTERVAL 18 MONTH", "hive": "SELECT ADD_MONTHS('2008-02-28', 18)", "mysql": "SELECT DATE_ADD('2008-02-28', INTERVAL 18 MONTH)", - "postgres": "SELECT CAST('2008-02-28' AS TIMESTAMP) + INTERVAL '18 month'", - "presto": "SELECT DATE_ADD('month', 18, CAST('2008-02-28' AS TIMESTAMP))", - "redshift": "SELECT DATEADD(month, 18, '2008-02-28')", - "snowflake": "SELECT DATEADD(month, 18, CAST('2008-02-28' AS TIMESTAMPNTZ))", - "tsql": "SELECT DATEADD(month, 18, CAST('2008-02-28' AS DATETIME2))", + "postgres": "SELECT CAST('2008-02-28' AS TIMESTAMP) + INTERVAL '18 MONTH'", + "presto": "SELECT DATE_ADD('MONTH', 18, CAST('2008-02-28' AS TIMESTAMP))", + "redshift": "SELECT DATEADD(MONTH, 18, '2008-02-28')", + "snowflake": "SELECT DATEADD(MONTH, 18, CAST('2008-02-28' AS TIMESTAMPNTZ))", + "tsql": "SELECT DATEADD(MONTH, 18, CAST('2008-02-28' AS DATETIME2))", }, ) self.validate_all( "SELECT DATEDIFF(week, '2009-01-01', '2009-12-31')", write={ - "bigquery": "SELECT DATE_DIFF(CAST('2009-12-31' AS DATETIME), CAST('2009-01-01' AS DATETIME), week)", - "duckdb": "SELECT DATE_DIFF('week', CAST('2009-01-01' AS TIMESTAMP), CAST('2009-12-31' AS TIMESTAMP))", + "bigquery": "SELECT DATE_DIFF(CAST('2009-12-31' AS DATETIME), CAST('2009-01-01' AS DATETIME), WEEK)", + "duckdb": "SELECT DATE_DIFF('WEEK', CAST('2009-01-01' AS TIMESTAMP), CAST('2009-12-31' AS TIMESTAMP))", "hive": "SELECT CAST(DATEDIFF('2009-12-31', '2009-01-01') / 7 AS INT)", "postgres": "SELECT CAST(EXTRACT(days FROM (CAST('2009-12-31' AS TIMESTAMP) - CAST('2009-01-01' AS TIMESTAMP))) / 7 AS BIGINT)", - "presto": "SELECT DATE_DIFF('week', CAST('2009-01-01' AS TIMESTAMP), CAST('2009-12-31' AS TIMESTAMP))", - "redshift": "SELECT DATEDIFF(week, '2009-01-01', '2009-12-31')", - "snowflake": "SELECT DATEDIFF(week, '2009-01-01', '2009-12-31')", - "tsql": "SELECT DATEDIFF(week, '2009-01-01', '2009-12-31')", + "presto": "SELECT DATE_DIFF('WEEK', CAST('2009-01-01' AS TIMESTAMP), CAST('2009-12-31' AS TIMESTAMP))", + "redshift": "SELECT DATEDIFF(WEEK, '2009-01-01', '2009-12-31')", + "snowflake": "SELECT DATEDIFF(WEEK, '2009-01-01', '2009-12-31')", + "tsql": "SELECT DATEDIFF(WEEK, '2009-01-01', '2009-12-31')", }, ) def test_identity(self): - self.validate_identity("SELECT DATEADD(day, 1, 'today')") + self.validate_identity("SELECT DATEADD(DAY, 1, 'today')") self.validate_identity("SELECT * FROM #x") - self.validate_identity("SELECT INTERVAL '5 day'") + self.validate_identity("SELECT INTERVAL '5 DAY'") self.validate_identity("foo$") self.validate_identity("CAST('bla' AS SUPER)") self.validate_identity("CREATE TABLE real1 (realcol REAL)") @@ -295,11 +296,11 @@ class TestRedshift(Validator): ) self.validate_identity( "SELECT DATE_DIFF('month', CAST('2020-02-29 00:00:00' AS TIMESTAMP), CAST('2020-03-02 00:00:00' AS TIMESTAMP))", - "SELECT DATEDIFF(month, CAST('2020-02-29 00:00:00' AS TIMESTAMP), CAST('2020-03-02 00:00:00' AS TIMESTAMP))", + "SELECT DATEDIFF(MONTH, CAST('2020-02-29 00:00:00' AS TIMESTAMP), CAST('2020-03-02 00:00:00' AS TIMESTAMP))", ) self.validate_identity( "SELECT * FROM x WHERE y = DATEADD('month', -1, DATE_TRUNC('month', (SELECT y FROM #temp_table)))", - "SELECT * FROM x WHERE y = DATEADD(month, -1, DATE_TRUNC('month', (SELECT y FROM #temp_table)))", + "SELECT * FROM x WHERE y = DATEADD(MONTH, -1, DATE_TRUNC('MONTH', (SELECT y FROM #temp_table)))", ) self.validate_identity( "SELECT 'a''b'", @@ -310,16 +311,16 @@ class TestRedshift(Validator): "CREATE TABLE t (c BIGINT IDENTITY(0, 1))", ) self.validate_identity( - "SELECT DATEADD(hour, 0, CAST('2020-02-02 01:03:05.124' AS TIMESTAMP))" + "SELECT DATEADD(HOUR, 0, CAST('2020-02-02 01:03:05.124' AS TIMESTAMP))" ) self.validate_identity( - "SELECT DATEDIFF(second, '2020-02-02 00:00:00.000', '2020-02-02 01:03:05.124')" + "SELECT DATEDIFF(SECOND, '2020-02-02 00:00:00.000', '2020-02-02 01:03:05.124')" ) self.validate_identity( "CREATE OR REPLACE VIEW v1 AS SELECT id, AVG(average_metric1) AS m1, AVG(average_metric2) AS m2 FROM t GROUP BY id WITH NO SCHEMA BINDING" ) self.validate_identity( - "SELECT caldate + INTERVAL '1 second' AS dateplus FROM date WHERE caldate = '12-31-2008'" + "SELECT caldate + INTERVAL '1 SECOND' AS dateplus FROM date WHERE caldate = '12-31-2008'" ) self.validate_identity( "SELECT COUNT(*) FROM event WHERE eventname LIKE '%Ring%' OR eventname LIKE '%Die%'" @@ -341,7 +342,7 @@ class TestRedshift(Validator): ) self.validate_identity( "SELECT DATEADD('day', ndays, caldate)", - "SELECT DATEADD(day, ndays, caldate)", + "SELECT DATEADD(DAY, ndays, caldate)", ) self.validate_identity( "CONVERT(INT, x)", @@ -349,8 +350,11 @@ class TestRedshift(Validator): ) self.validate_identity( "SELECT DATE_ADD('day', 1, DATE('2023-01-01'))", - "SELECT DATEADD(day, 1, DATE('2023-01-01'))", + "SELECT DATEADD(DAY, 1, DATE('2023-01-01'))", ) + + self.validate_identity("SELECT * FROM x AS a, a.b AS c, c.d.e AS f, f.g.h.i.j.k AS l") + self.validate_identity( """SELECT c_name, diff --git a/tests/dialects/test_snowflake.py b/tests/dialects/test_snowflake.py index 4d8168a..0882290 100644 --- a/tests/dialects/test_snowflake.py +++ b/tests/dialects/test_snowflake.py @@ -10,6 +10,9 @@ class TestSnowflake(Validator): dialect = "snowflake" def test_snowflake(self): + self.validate_identity( + "INSERT OVERWRITE TABLE t SELECT 1", "INSERT OVERWRITE INTO t SELECT 1" + ) self.validate_identity("SELECT rename, replace") expr = parse_one("SELECT APPROX_TOP_K(C4, 3, 5) FROM t") expr.selects[0].assert_is(exp.AggFunc) @@ -36,9 +39,12 @@ WHERE )""", ) + self.validate_identity("RM @parquet_stage") + self.validate_identity("REMOVE @parquet_stage") + self.validate_identity("SELECT TIMESTAMP_FROM_PARTS(d, t)") + self.validate_identity("SELECT GET_PATH(v, 'attr[0].name') FROM vartab") self.validate_identity("SELECT TO_ARRAY(CAST(x AS ARRAY))") self.validate_identity("SELECT TO_ARRAY(CAST(['test'] AS VARIANT))") - self.validate_identity("SELECT user_id, value FROM table_name sample ($s) SEED (0)") self.validate_identity("SELECT ARRAY_UNIQUE_AGG(x)") self.validate_identity("SELECT OBJECT_CONSTRUCT()") self.validate_identity("SELECT DAYOFMONTH(CURRENT_TIMESTAMP())") @@ -74,6 +80,49 @@ WHERE self.validate_identity( 'DESCRIBE TABLE "SNOWFLAKE_SAMPLE_DATA"."TPCDS_SF100TCL"."WEB_SITE" type=stage' ) + self.validate_identity( + "SELECT a FROM test PIVOT(SUM(x) FOR y IN ('z', 'q')) AS x TABLESAMPLE (0.1)" + ) + self.validate_identity( + """SELECT PARSE_JSON('{"x": "hello"}'):x LIKE 'hello'""", + """SELECT GET_PATH(PARSE_JSON('{"x": "hello"}'), 'x') LIKE 'hello'""", + ) + self.validate_identity( + """SELECT data:x LIKE 'hello' FROM some_table""", + """SELECT GET_PATH(data, 'x') LIKE 'hello' FROM some_table""", + ) + self.validate_identity( + "SELECT SUM({ fn CONVERT(123, SQL_DOUBLE) })", + "SELECT SUM(CAST(123 AS DOUBLE))", + ) + self.validate_identity( + "SELECT SUM({ fn CONVERT(123, SQL_VARCHAR) })", + "SELECT SUM(CAST(123 AS VARCHAR))", + ) + self.validate_identity( + "SELECT TIMESTAMPFROMPARTS(d, t)", + "SELECT TIMESTAMP_FROM_PARTS(d, t)", + ) + self.validate_identity( + "SELECT user_id, value FROM table_name SAMPLE ($s) SEED (0)", + "SELECT user_id, value FROM table_name TABLESAMPLE ($s) SEED (0)", + ) + self.validate_identity( + "SELECT v:attr[0].name FROM vartab", + "SELECT GET_PATH(v, 'attr[0].name') FROM vartab", + ) + self.validate_identity( + 'SELECT v:"fruit" FROM vartab', + """SELECT GET_PATH(v, '"fruit"') FROM vartab""", + ) + self.validate_identity( + "v:attr[0]:name", + "GET_PATH(GET_PATH(v, 'attr[0]'), 'name')", + ) + self.validate_identity( + """SELECT PARSE_JSON('{"food":{"fruit":"banana"}}'):food.fruit::VARCHAR""", + """SELECT CAST(GET_PATH(PARSE_JSON('{"food":{"fruit":"banana"}}'), 'food.fruit') AS VARCHAR)""", + ) self.validate_identity( "SELECT * FROM foo at", "SELECT * FROM foo AS at", @@ -146,6 +195,80 @@ WHERE "CAST(x AS VARCHAR)", ) + self.validate_all( + "OBJECT_CONSTRUCT_KEEP_NULL('key_1', 'one', 'key_2', NULL)", + read={ + "bigquery": "JSON_OBJECT(['key_1', 'key_2'], ['one', NULL])", + "duckdb": "JSON_OBJECT('key_1', 'one', 'key_2', NULL)", + }, + write={ + "bigquery": "JSON_OBJECT('key_1', 'one', 'key_2', NULL)", + "duckdb": "JSON_OBJECT('key_1', 'one', 'key_2', NULL)", + "snowflake": "OBJECT_CONSTRUCT_KEEP_NULL('key_1', 'one', 'key_2', NULL)", + }, + ) + self.validate_all( + "SELECT * FROM example TABLESAMPLE (3) SEED (82)", + read={ + "databricks": "SELECT * FROM example TABLESAMPLE (3 PERCENT) REPEATABLE (82)", + "duckdb": "SELECT * FROM example TABLESAMPLE (3 PERCENT) REPEATABLE (82)", + }, + write={ + "databricks": "SELECT * FROM example TABLESAMPLE (3 PERCENT) REPEATABLE (82)", + "duckdb": "SELECT * FROM example TABLESAMPLE (3 PERCENT) REPEATABLE (82)", + "snowflake": "SELECT * FROM example TABLESAMPLE (3) SEED (82)", + }, + ) + self.validate_all( + "SELECT TIME_FROM_PARTS(12, 34, 56, 987654321)", + write={ + "duckdb": "SELECT MAKE_TIME(12, 34, 56 + (987654321 / 1000000000.0))", + "snowflake": "SELECT TIME_FROM_PARTS(12, 34, 56, 987654321)", + }, + ) + self.validate_all( + "SELECT TIMESTAMP_FROM_PARTS(2013, 4, 5, 12, 00, 00)", + read={ + "duckdb": "SELECT MAKE_TIMESTAMP(2013, 4, 5, 12, 00, 00)", + }, + write={ + "duckdb": "SELECT MAKE_TIMESTAMP(2013, 4, 5, 12, 00, 00)", + "snowflake": "SELECT TIMESTAMP_FROM_PARTS(2013, 4, 5, 12, 00, 00)", + }, + ) + self.validate_all( + """WITH vartab(v) AS (select parse_json('[{"attr": [{"name": "banana"}]}]')) SELECT GET_PATH(v, '[0].attr[0].name') FROM vartab""", + write={ + "bigquery": """WITH vartab AS (SELECT PARSE_JSON('[{"attr": [{"name": "banana"}]}]') AS v) SELECT JSON_EXTRACT(v, '$[0].attr[0].name') FROM vartab""", + "duckdb": """WITH vartab(v) AS (SELECT JSON('[{"attr": [{"name": "banana"}]}]')) SELECT v -> '$[0].attr[0].name' FROM vartab""", + "mysql": """WITH vartab(v) AS (SELECT '[{"attr": [{"name": "banana"}]}]') SELECT JSON_EXTRACT(v, '$[0].attr[0].name') FROM vartab""", + "presto": """WITH vartab(v) AS (SELECT JSON_PARSE('[{"attr": [{"name": "banana"}]}]')) SELECT JSON_EXTRACT(v, '$[0].attr[0].name') FROM vartab""", + "snowflake": """WITH vartab(v) AS (SELECT PARSE_JSON('[{"attr": [{"name": "banana"}]}]')) SELECT GET_PATH(v, '[0].attr[0].name') FROM vartab""", + "tsql": """WITH vartab(v) AS (SELECT '[{"attr": [{"name": "banana"}]}]') SELECT JSON_VALUE(v, '$[0].attr[0].name') FROM vartab""", + }, + ) + self.validate_all( + """WITH vartab(v) AS (select parse_json('{"attr": [{"name": "banana"}]}')) SELECT GET_PATH(v, 'attr[0].name') FROM vartab""", + write={ + "bigquery": """WITH vartab AS (SELECT PARSE_JSON('{"attr": [{"name": "banana"}]}') AS v) SELECT JSON_EXTRACT(v, '$.attr[0].name') FROM vartab""", + "duckdb": """WITH vartab(v) AS (SELECT JSON('{"attr": [{"name": "banana"}]}')) SELECT v -> '$.attr[0].name' FROM vartab""", + "mysql": """WITH vartab(v) AS (SELECT '{"attr": [{"name": "banana"}]}') SELECT JSON_EXTRACT(v, '$.attr[0].name') FROM vartab""", + "presto": """WITH vartab(v) AS (SELECT JSON_PARSE('{"attr": [{"name": "banana"}]}')) SELECT JSON_EXTRACT(v, '$.attr[0].name') FROM vartab""", + "snowflake": """WITH vartab(v) AS (SELECT PARSE_JSON('{"attr": [{"name": "banana"}]}')) SELECT GET_PATH(v, 'attr[0].name') FROM vartab""", + "tsql": """WITH vartab(v) AS (SELECT '{"attr": [{"name": "banana"}]}') SELECT JSON_VALUE(v, '$.attr[0].name') FROM vartab""", + }, + ) + self.validate_all( + """SELECT PARSE_JSON('{"fruit":"banana"}'):fruit""", + write={ + "bigquery": """SELECT JSON_EXTRACT(PARSE_JSON('{"fruit":"banana"}'), '$.fruit')""", + "duckdb": """SELECT JSON('{"fruit":"banana"}') -> '$.fruit'""", + "mysql": """SELECT JSON_EXTRACT('{"fruit":"banana"}', '$.fruit')""", + "presto": """SELECT JSON_EXTRACT(JSON_PARSE('{"fruit":"banana"}'), '$.fruit')""", + "snowflake": """SELECT GET_PATH(PARSE_JSON('{"fruit":"banana"}'), 'fruit')""", + "tsql": """SELECT JSON_VALUE('{"fruit":"banana"}', '$.fruit')""", + }, + ) self.validate_all( "SELECT TO_ARRAY(['test'])", write={ @@ -162,7 +285,7 @@ WHERE ) self.validate_all( # We need to qualify the columns in this query because "value" would be ambiguous - 'WITH t(x, "value") AS (SELECT [1, 2, 3], 1) SELECT IFF(_u.pos = _u_2.pos_2, _u_2."value", NULL) AS "value" FROM t, TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(t.x)) - 1) + 1))) AS _u(seq, key, path, index, pos, this) CROSS JOIN TABLE(FLATTEN(INPUT => t.x)) AS _u_2(seq, key, path, pos_2, "value", this) WHERE _u.pos = _u_2.pos_2 OR (_u.pos > (ARRAY_SIZE(t.x) - 1) AND _u_2.pos_2 = (ARRAY_SIZE(t.x) - 1))', + 'WITH t(x, "value") AS (SELECT [1, 2, 3], 1) SELECT IFF(_u.pos = _u_2.pos_2, _u_2."value", NULL) AS "value" FROM t CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(t.x)) - 1) + 1))) AS _u(seq, key, path, index, pos, this) CROSS JOIN TABLE(FLATTEN(INPUT => t.x)) AS _u_2(seq, key, path, pos_2, "value", this) WHERE _u.pos = _u_2.pos_2 OR (_u.pos > (ARRAY_SIZE(t.x) - 1) AND _u_2.pos_2 = (ARRAY_SIZE(t.x) - 1))', read={ "duckdb": 'WITH t(x, "value") AS (SELECT [1,2,3], 1) SELECT UNNEST(t.x) AS "value" FROM t', }, @@ -172,7 +295,7 @@ WHERE write={ "duckdb": "SELECT {'Manitoba': 'Winnipeg', 'foo': 'bar'} AS province_capital", "snowflake": "SELECT OBJECT_CONSTRUCT('Manitoba', 'Winnipeg', 'foo', 'bar') AS province_capital", - "spark": "SELECT STRUCT('Manitoba' AS Winnipeg, 'foo' AS bar) AS province_capital", + "spark": "SELECT STRUCT('Winnipeg' AS Manitoba, 'bar' AS foo) AS province_capital", }, ) self.validate_all( @@ -418,13 +541,11 @@ WHERE }, ) self.validate_all( - 'x:a:"b c"', + '''SELECT PARSE_JSON('{"a": {"b c": "foo"}}'):a:"b c"''', write={ - "duckdb": "x['a']['b c']", - "hive": "x['a']['b c']", - "presto": "x['a']['b c']", - "snowflake": "x['a']['b c']", - "spark": "x['a']['b c']", + "duckdb": """SELECT JSON('{"a": {"b c": "foo"}}') -> '$.a' -> '$."b c"'""", + "mysql": """SELECT JSON_EXTRACT(JSON_EXTRACT('{"a": {"b c": "foo"}}', '$.a'), '$."b c"')""", + "snowflake": """SELECT GET_PATH(GET_PATH(PARSE_JSON('{"a": {"b c": "foo"}}'), 'a'), '"b c"')""", }, ) self.validate_all( @@ -457,6 +578,14 @@ WHERE "spark": "SELECT TIMESTAMP_MILLIS(1659981729000)", }, ) + self.validate_all( + "SELECT TO_TIMESTAMP(16599817290000, 4)", + write={ + "bigquery": "SELECT TIMESTAMP_SECONDS(CAST(16599817290000 / POW(10, 4) AS INT64))", + "snowflake": "SELECT TO_TIMESTAMP(16599817290000, 4)", + "spark": "SELECT TIMESTAMP_SECONDS(16599817290000 / POW(10, 4))", + }, + ) self.validate_all( "SELECT TO_TIMESTAMP('1659981729')", write={ @@ -467,11 +596,11 @@ WHERE self.validate_all( "SELECT TO_TIMESTAMP(1659981729000000000, 9)", write={ - "bigquery": "SELECT TIMESTAMP_MICROS(CAST(1659981729000000000 / 1000 AS INT64))", - "duckdb": "SELECT TO_TIMESTAMP(1659981729000000000 / 1000000000)", - "presto": "SELECT FROM_UNIXTIME(CAST(1659981729000000000 AS DOUBLE) / 1000000000)", + "bigquery": "SELECT TIMESTAMP_SECONDS(CAST(1659981729000000000 / POW(10, 9) AS INT64))", + "duckdb": "SELECT TO_TIMESTAMP(1659981729000000000 / POW(10, 9))", + "presto": "SELECT FROM_UNIXTIME(CAST(1659981729000000000 AS DOUBLE) / POW(10, 9))", "snowflake": "SELECT TO_TIMESTAMP(1659981729000000000, 9)", - "spark": "SELECT TIMESTAMP_SECONDS(1659981729000000000 / 1000000000)", + "spark": "SELECT TIMESTAMP_SECONDS(1659981729000000000 / POW(10, 9))", }, ) self.validate_all( @@ -660,7 +789,7 @@ WHERE ) self.validate_identity( "SELECT parse_json($1):a.b FROM @mystage2/data1.json.gz", - "SELECT PARSE_JSON($1)['a'].b FROM @mystage2/data1.json.gz", + "SELECT GET_PATH(PARSE_JSON($1), 'a.b') FROM @mystage2/data1.json.gz", ) self.validate_identity( "SELECT * FROM @mystage t (c1)", @@ -676,15 +805,24 @@ WHERE self.validate_identity("SELECT * FROM testtable TABLESAMPLE (100)") self.validate_identity("SELECT * FROM testtable TABLESAMPLE SYSTEM (3) SEED (82)") self.validate_identity("SELECT * FROM testtable TABLESAMPLE (10 ROWS)") - self.validate_identity("SELECT * FROM testtable SAMPLE (10)") - self.validate_identity("SELECT * FROM testtable SAMPLE ROW (0)") - self.validate_identity("SELECT a FROM test SAMPLE BLOCK (0.5) SEED (42)") self.validate_identity( "SELECT i, j FROM table1 AS t1 INNER JOIN table2 AS t2 TABLESAMPLE (50) WHERE t2.j = t1.i" ) self.validate_identity( "SELECT * FROM (SELECT * FROM t1 JOIN t2 ON t1.a = t2.c) TABLESAMPLE (1)" ) + self.validate_identity( + "SELECT * FROM testtable SAMPLE (10)", + "SELECT * FROM testtable TABLESAMPLE (10)", + ) + self.validate_identity( + "SELECT * FROM testtable SAMPLE ROW (0)", + "SELECT * FROM testtable TABLESAMPLE ROW (0)", + ) + self.validate_identity( + "SELECT a FROM test SAMPLE BLOCK (0.5) SEED (42)", + "SELECT a FROM test TABLESAMPLE BLOCK (0.5) SEED (42)", + ) self.validate_all( """ @@ -695,20 +833,20 @@ WHERE table2 AS t2 SAMPLE (50) -- 50% of rows in table2 WHERE t2.j = t1.i""", write={ - "snowflake": "SELECT i, j FROM table1 AS t1 SAMPLE (25) /* 25% of rows in table1 */ INNER JOIN table2 AS t2 SAMPLE (50) /* 50% of rows in table2 */ WHERE t2.j = t1.i", + "snowflake": "SELECT i, j FROM table1 AS t1 TABLESAMPLE (25) /* 25% of rows in table1 */ INNER JOIN table2 AS t2 TABLESAMPLE (50) /* 50% of rows in table2 */ WHERE t2.j = t1.i", }, ) self.validate_all( "SELECT * FROM testtable SAMPLE BLOCK (0.012) REPEATABLE (99992)", write={ - "snowflake": "SELECT * FROM testtable SAMPLE BLOCK (0.012) SEED (99992)", + "snowflake": "SELECT * FROM testtable TABLESAMPLE BLOCK (0.012) SEED (99992)", }, ) self.validate_all( "SELECT * FROM (SELECT * FROM t1 join t2 on t1.a = t2.c) SAMPLE (1)", write={ - "snowflake": "SELECT * FROM (SELECT * FROM t1 JOIN t2 ON t1.a = t2.c) SAMPLE (1)", - "spark": "SELECT * FROM (SELECT * FROM t1 JOIN t2 ON t1.a = t2.c) SAMPLE (1 PERCENT)", + "snowflake": "SELECT * FROM (SELECT * FROM t1 JOIN t2 ON t1.a = t2.c) TABLESAMPLE (1)", + "spark": "SELECT * FROM (SELECT * FROM t1 JOIN t2 ON t1.a = t2.c) TABLESAMPLE (1 PERCENT)", }, ) @@ -791,6 +929,11 @@ WHERE "DATEDIFF(DAY, CAST('2007-12-25' AS DATE), CAST('2008-12-25' AS DATE))", ) + self.validate_identity("DATEADD(y, 5, x)", "DATEADD(YEAR, 5, x)") + self.validate_identity("DATEADD(y, 5, x)", "DATEADD(YEAR, 5, x)") + self.validate_identity("DATE_PART(yyy, x)", "DATE_PART(YEAR, x)") + self.validate_identity("DATE_TRUNC(yr, x)", "DATE_TRUNC('YEAR', x)") + def test_semi_structured_types(self): self.validate_identity("SELECT CAST(a AS VARIANT)") self.validate_identity("SELECT CAST(a AS ARRAY)") @@ -876,7 +1019,7 @@ WHERE location=@s2/logs/ partition_type = user_specified file_format = (type = parquet)""", - "CREATE EXTERNAL TABLE et2 (col1 DATE AS (CAST(PARSE_JSON(metadata$external_table_partition)['COL1'] AS DATE)), col2 VARCHAR AS (CAST(PARSE_JSON(metadata$external_table_partition)['COL2'] AS VARCHAR)), col3 DECIMAL AS (CAST(PARSE_JSON(metadata$external_table_partition)['COL3'] AS DECIMAL))) LOCATION @s2/logs/ PARTITION BY (col1, col2, col3) partition_type=user_specified file_format=(type = parquet)", + "CREATE EXTERNAL TABLE et2 (col1 DATE AS (CAST(GET_PATH(PARSE_JSON(metadata$external_table_partition), 'COL1') AS DATE)), col2 VARCHAR AS (CAST(GET_PATH(PARSE_JSON(metadata$external_table_partition), 'COL2') AS VARCHAR)), col3 DECIMAL AS (CAST(GET_PATH(PARSE_JSON(metadata$external_table_partition), 'COL3') AS DECIMAL))) LOCATION @s2/logs/ PARTITION BY (col1, col2, col3) partition_type=user_specified file_format=(type = parquet)", ) self.validate_identity("CREATE OR REPLACE VIEW foo (uid) COPY GRANTS AS (SELECT 1)") self.validate_identity("CREATE TABLE geospatial_table (id INT, g GEOGRAPHY)") @@ -1092,9 +1235,9 @@ FROM cs.telescope.dag_report, TABLE(FLATTEN(input => SPLIT(operators, ','))) AS "snowflake": """SELECT id AS "ID", f.value AS "Contact", - f1.value['type'] AS "Type", - f1.value['content'] AS "Details" -FROM persons AS p, LATERAL FLATTEN(input => p.c, path => 'contact') AS f(SEQ, KEY, PATH, INDEX, VALUE, THIS), LATERAL FLATTEN(input => f.value['business']) AS f1(SEQ, KEY, PATH, INDEX, VALUE, THIS)""", + GET_PATH(f1.value, 'type') AS "Type", + GET_PATH(f1.value, 'content') AS "Details" +FROM persons AS p, LATERAL FLATTEN(input => p.c, path => 'contact') AS f(SEQ, KEY, PATH, INDEX, VALUE, THIS), LATERAL FLATTEN(input => GET_PATH(f.value, 'business')) AS f1(SEQ, KEY, PATH, INDEX, VALUE, THIS)""", }, pretty=True, ) @@ -1306,10 +1449,9 @@ MATCH_RECOGNIZE ( def test_show(self): # Parsed as Command - self.validate_identity("SHOW COLUMNS IN TABLE dt_test") self.validate_identity("SHOW TABLES LIKE 'line%' IN tpch.public") - ast = parse_one("SHOW TABLES HISTORY IN tpch.public") + ast = parse_one("SHOW TABLES HISTORY IN tpch.public", read="snowflake") self.assertIsInstance(ast, exp.Command) # Parsed as Show @@ -1331,9 +1473,22 @@ MATCH_RECOGNIZE ( ast = parse_one('SHOW PRIMARY KEYS IN "TEST"."PUBLIC"."customers"', read="snowflake") table = ast.find(exp.Table) - self.assertIsNotNone(table) self.assertEqual(table.sql(dialect="snowflake"), '"TEST"."PUBLIC"."customers"') + self.validate_identity("SHOW COLUMNS") + self.validate_identity("SHOW COLUMNS IN TABLE dt_test") + self.validate_identity("SHOW COLUMNS LIKE '_foo%' IN TABLE dt_test") + self.validate_identity("SHOW COLUMNS IN VIEW") + self.validate_identity("SHOW COLUMNS LIKE '_foo%' IN VIEW dt_test") + + ast = parse_one("SHOW COLUMNS LIKE '_testing%' IN dt_test", read="snowflake") + table = ast.find(exp.Table) + literal = ast.find(exp.Literal) + + self.assertEqual(table.sql(dialect="snowflake"), "dt_test") + + self.assertEqual(literal.sql(dialect="snowflake"), "'_testing%'") + def test_swap(self): ast = parse_one("ALTER TABLE a SWAP WITH b", read="snowflake") assert isinstance(ast, exp.AlterTable) diff --git a/tests/dialects/test_spark.py b/tests/dialects/test_spark.py index 24f68f8..56a573a 100644 --- a/tests/dialects/test_spark.py +++ b/tests/dialects/test_spark.py @@ -237,6 +237,10 @@ TBLPROPERTIES ( "REFRESH TABLE t", ) + self.validate_identity("DESCRIBE EXTENDED db.table") + self.validate_identity("SELECT * FROM test TABLESAMPLE (50 PERCENT)") + self.validate_identity("SELECT * FROM test TABLESAMPLE (5 ROWS)") + self.validate_identity("SELECT * FROM test TABLESAMPLE (BUCKET 4 OUT OF 10)") self.validate_identity("REFRESH 'hdfs://path/to/table'") self.validate_identity("REFRESH TABLE tempDB.view1") self.validate_identity("SELECT CASE WHEN a = NULL THEN 1 ELSE 2 END") @@ -244,13 +248,21 @@ TBLPROPERTIES ( self.validate_identity("SELECT TRANSFORM(ARRAY(1, 2, 3), x -> x + 1)") self.validate_identity("SELECT TRANSFORM(ARRAY(1, 2, 3), (x, i) -> x + i)") self.validate_identity("REFRESH TABLE a.b.c") - self.validate_identity("INTERVAL -86 days") + self.validate_identity("INTERVAL -86 DAYS") self.validate_identity("SELECT UNIX_TIMESTAMP()") self.validate_identity("TRIM(' SparkSQL ')") self.validate_identity("TRIM(BOTH 'SL' FROM 'SSparkSQLS')") self.validate_identity("TRIM(LEADING 'SL' FROM 'SSparkSQLS')") self.validate_identity("TRIM(TRAILING 'SL' FROM 'SSparkSQLS')") self.validate_identity("SPLIT(str, pattern, lim)") + self.validate_identity( + "SELECT INTERVAL '5' HOURS '30' MINUTES '5' SECONDS '6' MILLISECONDS '7' MICROSECONDS", + "SELECT INTERVAL '5' HOURS + INTERVAL '30' MINUTES + INTERVAL '5' SECONDS + INTERVAL '6' MILLISECONDS + INTERVAL '7' MICROSECONDS", + ) + self.validate_identity( + "SELECT INTERVAL 5 HOURS 30 MINUTES 5 SECONDS 6 MILLISECONDS 7 MICROSECONDS", + "SELECT INTERVAL '5' HOURS + INTERVAL '30' MINUTES + INTERVAL '5' SECONDS + INTERVAL '6' MILLISECONDS + INTERVAL '7' MICROSECONDS", + ) self.validate_identity( "SELECT REGEXP_REPLACE('100-200', r'([^0-9])', '')", "SELECT REGEXP_REPLACE('100-200', '([^0-9])', '')", @@ -299,25 +311,25 @@ TBLPROPERTIES ( }, ) self.validate_all( - "SELECT DATEDIFF(month, CAST('1996-10-30' AS TIMESTAMP), CAST('1997-02-28 10:30:00' AS TIMESTAMP))", + "SELECT DATEDIFF(MONTH, CAST('1996-10-30' AS TIMESTAMP), CAST('1997-02-28 10:30:00' AS TIMESTAMP))", read={ "duckdb": "SELECT DATEDIFF('month', CAST('1996-10-30' AS TIMESTAMP), CAST('1997-02-28 10:30:00' AS TIMESTAMP))", }, write={ - "spark": "SELECT DATEDIFF(month, TO_DATE(CAST('1996-10-30' AS TIMESTAMP)), TO_DATE(CAST('1997-02-28 10:30:00' AS TIMESTAMP)))", + "spark": "SELECT DATEDIFF(MONTH, TO_DATE(CAST('1996-10-30' AS TIMESTAMP)), TO_DATE(CAST('1997-02-28 10:30:00' AS TIMESTAMP)))", "spark2": "SELECT CAST(MONTHS_BETWEEN(TO_DATE(CAST('1997-02-28 10:30:00' AS TIMESTAMP)), TO_DATE(CAST('1996-10-30' AS TIMESTAMP))) AS INT)", }, ) self.validate_all( "SELECT DATEDIFF(week, '2020-01-01', '2020-12-31')", write={ - "bigquery": "SELECT DATE_DIFF(CAST('2020-12-31' AS DATE), CAST('2020-01-01' AS DATE), week)", - "duckdb": "SELECT DATE_DIFF('week', CAST('2020-01-01' AS DATE), CAST('2020-12-31' AS DATE))", + "bigquery": "SELECT DATE_DIFF(CAST('2020-12-31' AS DATE), CAST('2020-01-01' AS DATE), WEEK)", + "duckdb": "SELECT DATE_DIFF('WEEK', CAST('2020-01-01' AS DATE), CAST('2020-12-31' AS DATE))", "hive": "SELECT CAST(DATEDIFF(TO_DATE('2020-12-31'), TO_DATE('2020-01-01')) / 7 AS INT)", "postgres": "SELECT CAST(EXTRACT(days FROM (CAST(CAST('2020-12-31' AS DATE) AS TIMESTAMP) - CAST(CAST('2020-01-01' AS DATE) AS TIMESTAMP))) / 7 AS BIGINT)", - "redshift": "SELECT DATEDIFF(week, CAST('2020-01-01' AS DATE), CAST('2020-12-31' AS DATE))", - "snowflake": "SELECT DATEDIFF(week, CAST('2020-01-01' AS DATE), CAST('2020-12-31' AS DATE))", - "spark": "SELECT DATEDIFF(week, TO_DATE('2020-01-01'), TO_DATE('2020-12-31'))", + "redshift": "SELECT DATEDIFF(WEEK, CAST('2020-01-01' AS DATE), CAST('2020-12-31' AS DATE))", + "snowflake": "SELECT DATEDIFF(WEEK, CAST('2020-01-01' AS DATE), CAST('2020-12-31' AS DATE))", + "spark": "SELECT DATEDIFF(WEEK, TO_DATE('2020-01-01'), TO_DATE('2020-12-31'))", }, ) self.validate_all( @@ -624,23 +636,23 @@ TBLPROPERTIES ( self.validate_all( "SELECT EXPLODE(x) FROM tbl", write={ - "bigquery": "SELECT IF(pos = pos_2, col, NULL) AS col FROM tbl, UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(x)) - 1)) AS pos CROSS JOIN UNNEST(x) AS col WITH OFFSET AS pos_2 WHERE pos = pos_2 OR (pos > (ARRAY_LENGTH(x) - 1) AND pos_2 = (ARRAY_LENGTH(x) - 1))", - "presto": "SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col FROM tbl, UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(x)))) AS _u(pos) CROSS JOIN UNNEST(x) WITH ORDINALITY AS _u_2(col, pos_2) WHERE _u.pos = _u_2.pos_2 OR (_u.pos > CARDINALITY(x) AND _u_2.pos_2 = CARDINALITY(x))", + "bigquery": "SELECT IF(pos = pos_2, col, NULL) AS col FROM tbl CROSS JOIN UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(x)) - 1)) AS pos CROSS JOIN UNNEST(x) AS col WITH OFFSET AS pos_2 WHERE pos = pos_2 OR (pos > (ARRAY_LENGTH(x) - 1) AND pos_2 = (ARRAY_LENGTH(x) - 1))", + "presto": "SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col FROM tbl CROSS JOIN UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(x)))) AS _u(pos) CROSS JOIN UNNEST(x) WITH ORDINALITY AS _u_2(col, pos_2) WHERE _u.pos = _u_2.pos_2 OR (_u.pos > CARDINALITY(x) AND _u_2.pos_2 = CARDINALITY(x))", "spark": "SELECT EXPLODE(x) FROM tbl", }, ) self.validate_all( "SELECT EXPLODE(col) FROM _u", write={ - "bigquery": "SELECT IF(pos = pos_2, col_2, NULL) AS col_2 FROM _u, UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(col)) - 1)) AS pos CROSS JOIN UNNEST(col) AS col_2 WITH OFFSET AS pos_2 WHERE pos = pos_2 OR (pos > (ARRAY_LENGTH(col) - 1) AND pos_2 = (ARRAY_LENGTH(col) - 1))", - "presto": "SELECT IF(_u_2.pos = _u_3.pos_2, _u_3.col_2) AS col_2 FROM _u, UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(col)))) AS _u_2(pos) CROSS JOIN UNNEST(col) WITH ORDINALITY AS _u_3(col_2, pos_2) WHERE _u_2.pos = _u_3.pos_2 OR (_u_2.pos > CARDINALITY(col) AND _u_3.pos_2 = CARDINALITY(col))", + "bigquery": "SELECT IF(pos = pos_2, col_2, NULL) AS col_2 FROM _u CROSS JOIN UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(col)) - 1)) AS pos CROSS JOIN UNNEST(col) AS col_2 WITH OFFSET AS pos_2 WHERE pos = pos_2 OR (pos > (ARRAY_LENGTH(col) - 1) AND pos_2 = (ARRAY_LENGTH(col) - 1))", + "presto": "SELECT IF(_u_2.pos = _u_3.pos_2, _u_3.col_2) AS col_2 FROM _u CROSS JOIN UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(col)))) AS _u_2(pos) CROSS JOIN UNNEST(col) WITH ORDINALITY AS _u_3(col_2, pos_2) WHERE _u_2.pos = _u_3.pos_2 OR (_u_2.pos > CARDINALITY(col) AND _u_3.pos_2 = CARDINALITY(col))", "spark": "SELECT EXPLODE(col) FROM _u", }, ) self.validate_all( "SELECT EXPLODE(col) AS exploded FROM schema.tbl", write={ - "presto": "SELECT IF(_u.pos = _u_2.pos_2, _u_2.exploded) AS exploded FROM schema.tbl, UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(col)))) AS _u(pos) CROSS JOIN UNNEST(col) WITH ORDINALITY AS _u_2(exploded, pos_2) WHERE _u.pos = _u_2.pos_2 OR (_u.pos > CARDINALITY(col) AND _u_2.pos_2 = CARDINALITY(col))", + "presto": "SELECT IF(_u.pos = _u_2.pos_2, _u_2.exploded) AS exploded FROM schema.tbl CROSS JOIN UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(col)))) AS _u(pos) CROSS JOIN UNNEST(col) WITH ORDINALITY AS _u_2(exploded, pos_2) WHERE _u.pos = _u_2.pos_2 OR (_u.pos > CARDINALITY(col) AND _u_2.pos_2 = CARDINALITY(col))", }, ) self.validate_all( @@ -666,13 +678,13 @@ TBLPROPERTIES ( self.validate_all( "SELECT POSEXPLODE(ARRAY(2, 3)), EXPLODE(ARRAY(4, 5, 6)) FROM tbl", write={ - "bigquery": "SELECT IF(pos = pos_2, col, NULL) AS col, IF(pos = pos_2, pos_2, NULL) AS pos_2, IF(pos = pos_3, col_2, NULL) AS col_2 FROM tbl, UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH([2, 3]), ARRAY_LENGTH([4, 5, 6])) - 1)) AS pos CROSS JOIN UNNEST([2, 3]) AS col WITH OFFSET AS pos_2 CROSS JOIN UNNEST([4, 5, 6]) AS col_2 WITH OFFSET AS pos_3 WHERE (pos = pos_2 OR (pos > (ARRAY_LENGTH([2, 3]) - 1) AND pos_2 = (ARRAY_LENGTH([2, 3]) - 1))) AND (pos = pos_3 OR (pos > (ARRAY_LENGTH([4, 5, 6]) - 1) AND pos_3 = (ARRAY_LENGTH([4, 5, 6]) - 1)))", - "presto": "SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col, IF(_u.pos = _u_2.pos_2, _u_2.pos_2) AS pos_2, IF(_u.pos = _u_3.pos_3, _u_3.col_2) AS col_2 FROM tbl, UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(ARRAY[2, 3]), CARDINALITY(ARRAY[4, 5, 6])))) AS _u(pos) CROSS JOIN UNNEST(ARRAY[2, 3]) WITH ORDINALITY AS _u_2(col, pos_2) CROSS JOIN UNNEST(ARRAY[4, 5, 6]) WITH ORDINALITY AS _u_3(col_2, pos_3) WHERE (_u.pos = _u_2.pos_2 OR (_u.pos > CARDINALITY(ARRAY[2, 3]) AND _u_2.pos_2 = CARDINALITY(ARRAY[2, 3]))) AND (_u.pos = _u_3.pos_3 OR (_u.pos > CARDINALITY(ARRAY[4, 5, 6]) AND _u_3.pos_3 = CARDINALITY(ARRAY[4, 5, 6])))", + "bigquery": "SELECT IF(pos = pos_2, col, NULL) AS col, IF(pos = pos_2, pos_2, NULL) AS pos_2, IF(pos = pos_3, col_2, NULL) AS col_2 FROM tbl CROSS JOIN UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH([2, 3]), ARRAY_LENGTH([4, 5, 6])) - 1)) AS pos CROSS JOIN UNNEST([2, 3]) AS col WITH OFFSET AS pos_2 CROSS JOIN UNNEST([4, 5, 6]) AS col_2 WITH OFFSET AS pos_3 WHERE (pos = pos_2 OR (pos > (ARRAY_LENGTH([2, 3]) - 1) AND pos_2 = (ARRAY_LENGTH([2, 3]) - 1))) AND (pos = pos_3 OR (pos > (ARRAY_LENGTH([4, 5, 6]) - 1) AND pos_3 = (ARRAY_LENGTH([4, 5, 6]) - 1)))", + "presto": "SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col, IF(_u.pos = _u_2.pos_2, _u_2.pos_2) AS pos_2, IF(_u.pos = _u_3.pos_3, _u_3.col_2) AS col_2 FROM tbl CROSS JOIN UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(ARRAY[2, 3]), CARDINALITY(ARRAY[4, 5, 6])))) AS _u(pos) CROSS JOIN UNNEST(ARRAY[2, 3]) WITH ORDINALITY AS _u_2(col, pos_2) CROSS JOIN UNNEST(ARRAY[4, 5, 6]) WITH ORDINALITY AS _u_3(col_2, pos_3) WHERE (_u.pos = _u_2.pos_2 OR (_u.pos > CARDINALITY(ARRAY[2, 3]) AND _u_2.pos_2 = CARDINALITY(ARRAY[2, 3]))) AND (_u.pos = _u_3.pos_3 OR (_u.pos > CARDINALITY(ARRAY[4, 5, 6]) AND _u_3.pos_3 = CARDINALITY(ARRAY[4, 5, 6])))", }, ) self.validate_all( "SELECT col, pos, POSEXPLODE(ARRAY(2, 3)) FROM _u", write={ - "presto": "SELECT col, pos, IF(_u_2.pos_2 = _u_3.pos_3, _u_3.col_2) AS col_2, IF(_u_2.pos_2 = _u_3.pos_3, _u_3.pos_3) AS pos_3 FROM _u, UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(ARRAY[2, 3])))) AS _u_2(pos_2) CROSS JOIN UNNEST(ARRAY[2, 3]) WITH ORDINALITY AS _u_3(col_2, pos_3) WHERE _u_2.pos_2 = _u_3.pos_3 OR (_u_2.pos_2 > CARDINALITY(ARRAY[2, 3]) AND _u_3.pos_3 = CARDINALITY(ARRAY[2, 3]))", + "presto": "SELECT col, pos, IF(_u_2.pos_2 = _u_3.pos_3, _u_3.col_2) AS col_2, IF(_u_2.pos_2 = _u_3.pos_3, _u_3.pos_3) AS pos_3 FROM _u CROSS JOIN UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(ARRAY[2, 3])))) AS _u_2(pos_2) CROSS JOIN UNNEST(ARRAY[2, 3]) WITH ORDINALITY AS _u_3(col_2, pos_3) WHERE _u_2.pos_2 = _u_3.pos_3 OR (_u_2.pos_2 > CARDINALITY(ARRAY[2, 3]) AND _u_3.pos_3 = CARDINALITY(ARRAY[2, 3]))", }, ) diff --git a/tests/dialects/test_starrocks.py b/tests/dialects/test_starrocks.py index 21a89d7..67aabb9 100644 --- a/tests/dialects/test_starrocks.py +++ b/tests/dialects/test_starrocks.py @@ -11,10 +11,10 @@ class TestStarrocks(Validator): def test_time(self): self.validate_identity("TIMESTAMP('2022-01-01')") self.validate_identity( - "SELECT DATE_DIFF('second', '2010-11-30 23:59:59', '2010-11-30 20:58:59')" + "SELECT DATE_DIFF('SECOND', '2010-11-30 23:59:59', '2010-11-30 20:58:59')" ) self.validate_identity( - "SELECT DATE_DIFF('minute', '2010-11-30 23:59:59', '2010-11-30 20:58:59')" + "SELECT DATE_DIFF('MINUTE', '2010-11-30 23:59:59', '2010-11-30 20:58:59')" ) def test_regex(self): diff --git a/tests/dialects/test_tsql.py b/tests/dialects/test_tsql.py index a2569ab..7cf9971 100644 --- a/tests/dialects/test_tsql.py +++ b/tests/dialects/test_tsql.py @@ -6,10 +6,17 @@ class TestTSQL(Validator): dialect = "tsql" def test_tsql(self): + self.validate_identity("ROUND(x, 1, 0)") + self.validate_identity("EXEC MyProc @id=7, @name='Lochristi'") # https://learn.microsoft.com/en-us/previous-versions/sql/sql-server-2008-r2/ms187879(v=sql.105)?redirectedfrom=MSDN # tsql allows .. which means use the default schema self.validate_identity("SELECT * FROM a..b") + self.validate_identity("SELECT TRIM(' test ') AS Result") + self.validate_identity("SELECT TRIM('.,! ' FROM ' # test .') AS Result") + self.validate_identity("SELECT * FROM t TABLESAMPLE (10 PERCENT)") + self.validate_identity("SELECT * FROM t TABLESAMPLE (20 ROWS)") + self.validate_identity("SELECT * FROM t TABLESAMPLE (10 PERCENT) REPEATABLE (123)") self.validate_identity("SELECT CONCAT(column1, column2)") self.validate_identity("SELECT TestSpecialChar.Test# FROM TestSpecialChar") self.validate_identity("SELECT TestSpecialChar.Test@ FROM TestSpecialChar") @@ -20,6 +27,40 @@ class TestTSQL(Validator): self.validate_identity("1 AND true", "1 <> 0 AND (1 = 1)") self.validate_identity("CAST(x AS int) OR y", "CAST(x AS INTEGER) <> 0 OR y <> 0") + self.validate_all( + "SELECT TRIM(BOTH 'a' FROM a)", + read={ + "mysql": "SELECT TRIM(BOTH 'a' FROM a)", + }, + write={ + "mysql": "SELECT TRIM(BOTH 'a' FROM a)", + "tsql": "SELECT TRIM(BOTH 'a' FROM a)", + }, + ) + self.validate_all( + "SELECT TIMEFROMPARTS(23, 59, 59, 0, 0)", + read={ + "duckdb": "SELECT MAKE_TIME(23, 59, 59)", + "mysql": "SELECT MAKETIME(23, 59, 59)", + "postgres": "SELECT MAKE_TIME(23, 59, 59)", + "snowflake": "SELECT TIME_FROM_PARTS(23, 59, 59)", + }, + write={ + "tsql": "SELECT TIMEFROMPARTS(23, 59, 59, 0, 0)", + }, + ) + self.validate_all( + "SELECT DATETIMEFROMPARTS(2013, 4, 5, 12, 00, 00, 0)", + read={ + # The nanoseconds are ignored since T-SQL doesn't support that precision + "snowflake": "SELECT TIMESTAMP_FROM_PARTS(2013, 4, 5, 12, 00, 00, 987654321)" + }, + write={ + "duckdb": "SELECT MAKE_TIMESTAMP(2013, 4, 5, 12, 00, 00 + (0 / 1000.0))", + "snowflake": "SELECT TIMESTAMP_FROM_PARTS(2013, 4, 5, 12, 00, 00, 0 * 1000000)", + "tsql": "SELECT DATETIMEFROMPARTS(2013, 4, 5, 12, 00, 00, 0)", + }, + ) self.validate_all( "SELECT TOP 1 * FROM (SELECT x FROM t1 UNION ALL SELECT x FROM t2) AS _l_0", read={ @@ -32,6 +73,33 @@ class TestTSQL(Validator): "duckdb": "CREATE TABLE foo AS WITH t(c) AS (SELECT 1) SELECT c FROM t", }, ) + self.validate_all( + "WITH t(c) AS (SELECT 1) SELECT * INTO foo FROM (SELECT c AS c FROM t) AS temp", + write={ + "duckdb": "CREATE TABLE foo AS WITH t(c) AS (SELECT 1) SELECT * FROM (SELECT c AS c FROM t) AS temp", + "postgres": "WITH t(c) AS (SELECT 1) SELECT * INTO foo FROM (SELECT c AS c FROM t) AS temp", + "oracle": "WITH t(c) AS (SELECT 1) SELECT * INTO foo FROM (SELECT c AS c FROM t) temp", + }, + ) + self.validate_all( + "WITH t(c) AS (SELECT 1) SELECT * INTO TEMP UNLOGGED foo FROM (SELECT c AS c FROM t) AS temp", + write={ + "duckdb": "CREATE TEMPORARY TABLE foo AS WITH t(c) AS (SELECT 1) SELECT * FROM (SELECT c AS c FROM t) AS temp", + "postgres": "WITH t(c) AS (SELECT 1) SELECT * INTO TEMPORARY foo FROM (SELECT c AS c FROM t) AS temp", + }, + ) + self.validate_all( + "WITH t(c) AS (SELECT 1) SELECT * INTO UNLOGGED foo FROM (SELECT c AS c FROM t) AS temp", + write={ + "duckdb": "CREATE TABLE foo AS WITH t(c) AS (SELECT 1) SELECT * FROM (SELECT c AS c FROM t) AS temp", + }, + ) + self.validate_all( + "WITH t(c) AS (SELECT 1) SELECT * INTO UNLOGGED foo FROM (SELECT c AS c FROM t) AS temp", + write={ + "duckdb": "CREATE TABLE foo AS WITH t(c) AS (SELECT 1) SELECT * FROM (SELECT c AS c FROM t) AS temp", + }, + ) self.validate_all( "WITH y AS (SELECT 2 AS c) INSERT INTO t SELECT * FROM y", read={ @@ -104,7 +172,8 @@ class TestTSQL(Validator): ) self.validate_identity( - 'CREATE TABLE x (CONSTRAINT "pk_mytable" UNIQUE NONCLUSTERED (a DESC)) ON b (c)' + 'CREATE TABLE x (CONSTRAINT "pk_mytable" UNIQUE NONCLUSTERED (a DESC)) ON b (c)', + "CREATE TABLE x (CONSTRAINT [pk_mytable] UNIQUE NONCLUSTERED (a DESC)) ON b (c)", ) self.validate_all( @@ -117,7 +186,7 @@ class TestTSQL(Validator): ) ON [SECONDARY] """, write={ - "tsql": 'CREATE TABLE x ("zip_cd" VARCHAR(5) NULL NOT FOR REPLICATION, "zip_cd_mkey" VARCHAR(5) NOT NULL, CONSTRAINT "pk_mytable" PRIMARY KEY CLUSTERED ("zip_cd_mkey" ASC) WITH (PAD_INDEX=ON, STATISTICS_NORECOMPUTE=OFF) ON "INDEX") ON "SECONDARY"', + "tsql": "CREATE TABLE x ([zip_cd] VARCHAR(5) NULL NOT FOR REPLICATION, [zip_cd_mkey] VARCHAR(5) NOT NULL, CONSTRAINT [pk_mytable] PRIMARY KEY CLUSTERED ([zip_cd_mkey] ASC) WITH (PAD_INDEX=ON, STATISTICS_NORECOMPUTE=OFF) ON [INDEX]) ON [SECONDARY]", "spark2": "CREATE TABLE x (`zip_cd` VARCHAR(5), `zip_cd_mkey` VARCHAR(5) NOT NULL, CONSTRAINT `pk_mytable` PRIMARY KEY (`zip_cd_mkey`))", }, ) @@ -136,7 +205,8 @@ class TestTSQL(Validator): ) self.validate_identity( - "CREATE TABLE [db].[tbl]([a] [int])", 'CREATE TABLE "db"."tbl" ("a" INTEGER)' + "CREATE TABLE [db].[tbl]([a] [int])", + "CREATE TABLE [db].[tbl] ([a] INTEGER)", ) projection = parse_one("SELECT a = 1", read="tsql").selects[0] @@ -172,7 +242,6 @@ class TestTSQL(Validator): self.validate_identity("PRINT @TestVariable") self.validate_identity("SELECT Employee_ID, Department_ID FROM @MyTableVar") self.validate_identity("INSERT INTO @TestTable VALUES (1, 'Value1', 12, 20)") - self.validate_identity('SELECT "x"."y" FROM foo') self.validate_identity("SELECT * FROM #foo") self.validate_identity("SELECT * FROM ##foo") self.validate_identity("SELECT a = 1", "SELECT 1 AS a") @@ -185,11 +254,15 @@ class TestTSQL(Validator): self.validate_identity( "SELECT DISTINCT DepartmentName, PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY BaseRate) OVER (PARTITION BY DepartmentName) AS MedianCont FROM dbo.DimEmployee" ) + self.validate_identity( + 'SELECT "x"."y" FROM foo', + "SELECT [x].[y] FROM foo", + ) self.validate_all( "SELECT CAST([a].[b] AS SMALLINT) FROM foo", write={ - "tsql": 'SELECT CAST("a"."b" AS SMALLINT) FROM foo', + "tsql": "SELECT CAST([a].[b] AS SMALLINT) FROM foo", "spark": "SELECT CAST(`a`.`b` AS SMALLINT) FROM foo", }, ) @@ -227,13 +300,6 @@ class TestTSQL(Validator): "postgres": "STRING_AGG(x, '|')", }, ) - self.validate_all( - "SELECT CAST([a].[b] AS SMALLINT) FROM foo", - write={ - "tsql": 'SELECT CAST("a"."b" AS SMALLINT) FROM foo', - "spark": "SELECT CAST(`a`.`b` AS SMALLINT) FROM foo", - }, - ) self.validate_all( "HASHBYTES('SHA1', x)", read={ @@ -583,7 +649,11 @@ class TestTSQL(Validator): 'CREATE TABLE "dbo"."benchmark" (' '"name" CHAR(7) NOT NULL, ' '"internal_id" VARCHAR(10) NOT NULL, ' - f'UNIQUE {clusterd_keyword} ("internal_id" ASC))' + f'UNIQUE {clusterd_keyword} ("internal_id" ASC))', + "CREATE TABLE [dbo].[benchmark] (" + "[name] CHAR(7) NOT NULL, " + "[internal_id] VARCHAR(10) NOT NULL, " + f"UNIQUE {clusterd_keyword} ([internal_id] ASC))", ) self.validate_identity( @@ -802,13 +872,13 @@ WHERE """ expected_sqls = [ - 'CREATE PROCEDURE "TRANSF"."SP_Merge_Sales_Real" @Loadid INTEGER, @NumberOfRows INTEGER AS BEGIN SET XACT_ABORT ON', + "CREATE PROCEDURE [TRANSF].[SP_Merge_Sales_Real] @Loadid INTEGER, @NumberOfRows INTEGER AS BEGIN SET XACT_ABORT ON", "DECLARE @DWH_DateCreated DATETIME = CONVERT(DATETIME, getdate(), 104)", "DECLARE @DWH_DateModified DATETIME = CONVERT(DATETIME, getdate(), 104)", "DECLARE @DWH_IdUserCreated INTEGER = SUSER_ID (SYSTEM_USER)", "DECLARE @DWH_IdUserModified INTEGER = SUSER_ID (SYSTEM_USER)", "DECLARE @SalesAmountBefore float", - 'SELECT @SalesAmountBefore = SUM(SalesAmount) FROM TRANSF."Pre_Merge_Sales_Real" AS S', + "SELECT @SalesAmountBefore = SUM(SalesAmount) FROM TRANSF.[Pre_Merge_Sales_Real] AS S", "END", ] @@ -827,9 +897,9 @@ WHERE """ expected_sqls = [ - 'CREATE PROC "dbo"."transform_proc" AS DECLARE @CurrentDate VARCHAR(20)', + "CREATE PROC [dbo].[transform_proc] AS DECLARE @CurrentDate VARCHAR(20)", "SET @CurrentDate = CAST(FORMAT(GETDATE(), 'yyyy-MM-dd HH:mm:ss') AS VARCHAR(20))", - 'CREATE TABLE "target_schema"."target_table" (a INTEGER) WITH (DISTRIBUTION=REPLICATE, HEAP)', + "CREATE TABLE [target_schema].[target_table] (a INTEGER) WITH (DISTRIBUTION=REPLICATE, HEAP)", ] for expr, expected_sql in zip(parse(sql, read="tsql"), expected_sqls): @@ -901,7 +971,7 @@ WHERE def test_datepart(self): self.validate_identity( "DATEPART(QUARTER, x)", - "DATEPART(quarter, CAST(x AS DATETIME2))", + "DATEPART(QUARTER, CAST(x AS DATETIME2))", ) self.validate_identity( "DATEPART(YEAR, x)", @@ -909,15 +979,15 @@ WHERE ) self.validate_identity( "DATEPART(HOUR, date_and_time)", - "DATEPART(hour, CAST(date_and_time AS DATETIME2))", + "DATEPART(HOUR, CAST(date_and_time AS DATETIME2))", ) self.validate_identity( "DATEPART(WEEKDAY, date_and_time)", - "DATEPART(dw, CAST(date_and_time AS DATETIME2))", + "DATEPART(DW, CAST(date_and_time AS DATETIME2))", ) self.validate_identity( "DATEPART(DW, date_and_time)", - "DATEPART(dw, CAST(date_and_time AS DATETIME2))", + "DATEPART(DW, CAST(date_and_time AS DATETIME2))", ) self.validate_all( @@ -929,9 +999,9 @@ WHERE }, ) self.validate_all( - "SELECT DATEPART(year, CAST('2017-01-01' AS DATE))", + "SELECT DATEPART(YEAR, CAST('2017-01-01' AS DATE))", read={ - "postgres": "SELECT DATE_PART('year', '2017-01-01'::DATE)", + "postgres": "SELECT DATE_PART('YEAR', '2017-01-01'::DATE)", }, write={ "postgres": "SELECT TO_CHAR(CAST(CAST('2017-01-01' AS DATE) AS TIMESTAMP), 'YYYY')", @@ -1135,7 +1205,7 @@ WHERE ) def test_add_date(self): - self.validate_identity("SELECT DATEADD(year, 1, '2017/08/25')") + self.validate_identity("SELECT DATEADD(YEAR, 1, '2017/08/25')") self.validate_all( "DATEADD(year, 50, '2006-07-31')", @@ -1153,52 +1223,52 @@ WHERE "SELECT DATEADD(wk, 1, '2017/08/25')", write={ "spark": "SELECT DATE_ADD('2017/08/25', 7)", - "databricks": "SELECT DATEADD(week, 1, '2017/08/25')", + "databricks": "SELECT DATEADD(WEEK, 1, '2017/08/25')", }, ) def test_date_diff(self): - self.validate_identity("SELECT DATEDIFF(hour, 1.5, '2021-01-01')") + self.validate_identity("SELECT DATEDIFF(HOUR, 1.5, '2021-01-01')") self.validate_all( "SELECT DATEDIFF(quarter, 0, '2021-01-01')", write={ - "tsql": "SELECT DATEDIFF(quarter, CAST('1900-01-01' AS DATETIME2), CAST('2021-01-01' AS DATETIME2))", - "spark": "SELECT DATEDIFF(quarter, CAST('1900-01-01' AS TIMESTAMP), CAST('2021-01-01' AS TIMESTAMP))", - "duckdb": "SELECT DATE_DIFF('quarter', CAST('1900-01-01' AS TIMESTAMP), CAST('2021-01-01' AS TIMESTAMP))", + "tsql": "SELECT DATEDIFF(QUARTER, CAST('1900-01-01' AS DATETIME2), CAST('2021-01-01' AS DATETIME2))", + "spark": "SELECT DATEDIFF(QUARTER, CAST('1900-01-01' AS TIMESTAMP), CAST('2021-01-01' AS TIMESTAMP))", + "duckdb": "SELECT DATE_DIFF('QUARTER', CAST('1900-01-01' AS TIMESTAMP), CAST('2021-01-01' AS TIMESTAMP))", }, ) self.validate_all( "SELECT DATEDIFF(day, 1, '2021-01-01')", write={ - "tsql": "SELECT DATEDIFF(day, CAST('1900-01-02' AS DATETIME2), CAST('2021-01-01' AS DATETIME2))", - "spark": "SELECT DATEDIFF(day, CAST('1900-01-02' AS TIMESTAMP), CAST('2021-01-01' AS TIMESTAMP))", - "duckdb": "SELECT DATE_DIFF('day', CAST('1900-01-02' AS TIMESTAMP), CAST('2021-01-01' AS TIMESTAMP))", + "tsql": "SELECT DATEDIFF(DAY, CAST('1900-01-02' AS DATETIME2), CAST('2021-01-01' AS DATETIME2))", + "spark": "SELECT DATEDIFF(DAY, CAST('1900-01-02' AS TIMESTAMP), CAST('2021-01-01' AS TIMESTAMP))", + "duckdb": "SELECT DATE_DIFF('DAY', CAST('1900-01-02' AS TIMESTAMP), CAST('2021-01-01' AS TIMESTAMP))", }, ) self.validate_all( "SELECT DATEDIFF(year, '2020-01-01', '2021-01-01')", write={ - "tsql": "SELECT DATEDIFF(year, CAST('2020-01-01' AS DATETIME2), CAST('2021-01-01' AS DATETIME2))", - "spark": "SELECT DATEDIFF(year, CAST('2020-01-01' AS TIMESTAMP), CAST('2021-01-01' AS TIMESTAMP))", + "tsql": "SELECT DATEDIFF(YEAR, CAST('2020-01-01' AS DATETIME2), CAST('2021-01-01' AS DATETIME2))", + "spark": "SELECT DATEDIFF(YEAR, CAST('2020-01-01' AS TIMESTAMP), CAST('2021-01-01' AS TIMESTAMP))", "spark2": "SELECT CAST(MONTHS_BETWEEN(CAST('2021-01-01' AS TIMESTAMP), CAST('2020-01-01' AS TIMESTAMP)) / 12 AS INT)", }, ) self.validate_all( "SELECT DATEDIFF(mm, 'start', 'end')", write={ - "databricks": "SELECT DATEDIFF(month, CAST('start' AS TIMESTAMP), CAST('end' AS TIMESTAMP))", + "databricks": "SELECT DATEDIFF(MONTH, CAST('start' AS TIMESTAMP), CAST('end' AS TIMESTAMP))", "spark2": "SELECT CAST(MONTHS_BETWEEN(CAST('end' AS TIMESTAMP), CAST('start' AS TIMESTAMP)) AS INT)", - "tsql": "SELECT DATEDIFF(month, CAST('start' AS DATETIME2), CAST('end' AS DATETIME2))", + "tsql": "SELECT DATEDIFF(MONTH, CAST('start' AS DATETIME2), CAST('end' AS DATETIME2))", }, ) self.validate_all( "SELECT DATEDIFF(quarter, 'start', 'end')", write={ - "databricks": "SELECT DATEDIFF(quarter, CAST('start' AS TIMESTAMP), CAST('end' AS TIMESTAMP))", - "spark": "SELECT DATEDIFF(quarter, CAST('start' AS TIMESTAMP), CAST('end' AS TIMESTAMP))", + "databricks": "SELECT DATEDIFF(QUARTER, CAST('start' AS TIMESTAMP), CAST('end' AS TIMESTAMP))", + "spark": "SELECT DATEDIFF(QUARTER, CAST('start' AS TIMESTAMP), CAST('end' AS TIMESTAMP))", "spark2": "SELECT CAST(MONTHS_BETWEEN(CAST('end' AS TIMESTAMP), CAST('start' AS TIMESTAMP)) / 3 AS INT)", - "tsql": "SELECT DATEDIFF(quarter, CAST('start' AS DATETIME2), CAST('end' AS DATETIME2))", + "tsql": "SELECT DATEDIFF(QUARTER, CAST('start' AS DATETIME2), CAST('end' AS DATETIME2))", }, ) @@ -1220,39 +1290,45 @@ WHERE self.validate_all( "SELECT x.a, x.b, t.v, t.y FROM x CROSS APPLY (SELECT v, y FROM t) t(v, y)", write={ - "spark": "SELECT x.a, x.b, t.v, t.y FROM x, LATERAL (SELECT v, y FROM t) AS t(v, y)", + "spark": "SELECT x.a, x.b, t.v, t.y FROM x INNER JOIN LATERAL (SELECT v, y FROM t) AS t(v, y)", + "tsql": "SELECT x.a, x.b, t.v, t.y FROM x CROSS APPLY (SELECT v, y FROM t) AS t(v, y)", }, ) self.validate_all( "SELECT x.a, x.b, t.v, t.y FROM x OUTER APPLY (SELECT v, y FROM t) t(v, y)", write={ "spark": "SELECT x.a, x.b, t.v, t.y FROM x LEFT JOIN LATERAL (SELECT v, y FROM t) AS t(v, y)", + "tsql": "SELECT x.a, x.b, t.v, t.y FROM x OUTER APPLY (SELECT v, y FROM t) AS t(v, y)", }, ) self.validate_all( "SELECT x.a, x.b, t.v, t.y, s.v, s.y FROM x OUTER APPLY (SELECT v, y FROM t) t(v, y) OUTER APPLY (SELECT v, y FROM t) s(v, y) LEFT JOIN z ON z.id = s.id", write={ "spark": "SELECT x.a, x.b, t.v, t.y, s.v, s.y FROM x LEFT JOIN LATERAL (SELECT v, y FROM t) AS t(v, y) LEFT JOIN LATERAL (SELECT v, y FROM t) AS s(v, y) LEFT JOIN z ON z.id = s.id", + "tsql": "SELECT x.a, x.b, t.v, t.y, s.v, s.y FROM x OUTER APPLY (SELECT v, y FROM t) AS t(v, y) OUTER APPLY (SELECT v, y FROM t) AS s(v, y) LEFT JOIN z ON z.id = s.id", }, ) def test_lateral_table_valued_function(self): self.validate_all( - "SELECT t.x, y.z FROM x CROSS APPLY tvfTest(t.x)y(z)", + "SELECT t.x, y.z FROM x CROSS APPLY tvfTest(t.x) y(z)", write={ - "spark": "SELECT t.x, y.z FROM x, LATERAL TVFTEST(t.x) AS y(z)", + "spark": "SELECT t.x, y.z FROM x INNER JOIN LATERAL TVFTEST(t.x) AS y(z)", + "tsql": "SELECT t.x, y.z FROM x CROSS APPLY TVFTEST(t.x) AS y(z)", }, ) self.validate_all( "SELECT t.x, y.z FROM x OUTER APPLY tvfTest(t.x)y(z)", write={ "spark": "SELECT t.x, y.z FROM x LEFT JOIN LATERAL TVFTEST(t.x) AS y(z)", + "tsql": "SELECT t.x, y.z FROM x OUTER APPLY TVFTEST(t.x) AS y(z)", }, ) self.validate_all( "SELECT t.x, y.z FROM x OUTER APPLY a.b.tvfTest(t.x)y(z)", write={ "spark": "SELECT t.x, y.z FROM x LEFT JOIN LATERAL a.b.TVFTEST(t.x) AS y(z)", + "tsql": "SELECT t.x, y.z FROM x OUTER APPLY a.b.TVFTEST(t.x) AS y(z)", }, ) @@ -1284,27 +1360,45 @@ WHERE self.validate_all( "SELECT FORMAT(1000000.01,'###,###.###')", - write={"spark": "SELECT FORMAT_NUMBER(1000000.01, '###,###.###')"}, + write={ + "spark": "SELECT FORMAT_NUMBER(1000000.01, '###,###.###')", + "tsql": "SELECT FORMAT(1000000.01, '###,###.###')", + }, ) self.validate_all( "SELECT FORMAT(1234567, 'f')", - write={"spark": "SELECT FORMAT_NUMBER(1234567, 'f')"}, + write={ + "spark": "SELECT FORMAT_NUMBER(1234567, 'f')", + "tsql": "SELECT FORMAT(1234567, 'f')", + }, ) self.validate_all( "SELECT FORMAT('01-01-1991', 'dd.mm.yyyy')", - write={"spark": "SELECT DATE_FORMAT('01-01-1991', 'dd.mm.yyyy')"}, + write={ + "spark": "SELECT DATE_FORMAT('01-01-1991', 'dd.mm.yyyy')", + "tsql": "SELECT FORMAT('01-01-1991', 'dd.mm.yyyy')", + }, ) self.validate_all( "SELECT FORMAT(date_col, 'dd.mm.yyyy')", - write={"spark": "SELECT DATE_FORMAT(date_col, 'dd.mm.yyyy')"}, + write={ + "spark": "SELECT DATE_FORMAT(date_col, 'dd.mm.yyyy')", + "tsql": "SELECT FORMAT(date_col, 'dd.mm.yyyy')", + }, ) self.validate_all( "SELECT FORMAT(date_col, 'm')", - write={"spark": "SELECT DATE_FORMAT(date_col, 'MMMM d')"}, + write={ + "spark": "SELECT DATE_FORMAT(date_col, 'MMMM d')", + "tsql": "SELECT FORMAT(date_col, 'MMMM d')", + }, ) self.validate_all( "SELECT FORMAT(num_col, 'c')", - write={"spark": "SELECT FORMAT_NUMBER(num_col, 'c')"}, + write={ + "spark": "SELECT FORMAT_NUMBER(num_col, 'c')", + "tsql": "SELECT FORMAT(num_col, 'c')", + }, ) def test_string(self): @@ -1324,11 +1418,36 @@ WHERE def test_eomonth(self): self.validate_all( "EOMONTH(GETDATE())", - write={"spark": "LAST_DAY(CURRENT_TIMESTAMP())"}, + read={ + "spark": "LAST_DAY(CURRENT_TIMESTAMP())", + }, + write={ + "bigquery": "LAST_DAY(CAST(CURRENT_TIMESTAMP() AS DATE))", + "clickhouse": "LAST_DAY(CAST(CURRENT_TIMESTAMP() AS DATE))", + "duckdb": "LAST_DAY(CAST(CURRENT_TIMESTAMP AS DATE))", + "mysql": "LAST_DAY(DATE(CURRENT_TIMESTAMP()))", + "postgres": "CAST(DATE_TRUNC('MONTH', CAST(CURRENT_TIMESTAMP AS DATE)) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)", + "presto": "LAST_DAY_OF_MONTH(CAST(CAST(CURRENT_TIMESTAMP AS TIMESTAMP) AS DATE))", + "redshift": "LAST_DAY(CAST(SYSDATE AS DATE))", + "snowflake": "LAST_DAY(CAST(CURRENT_TIMESTAMP() AS DATE))", + "spark": "LAST_DAY(TO_DATE(CURRENT_TIMESTAMP()))", + "tsql": "EOMONTH(CAST(GETDATE() AS DATE))", + }, ) self.validate_all( "EOMONTH(GETDATE(), -1)", - write={"spark": "LAST_DAY(ADD_MONTHS(CURRENT_TIMESTAMP(), -1))"}, + write={ + "bigquery": "LAST_DAY(DATE_ADD(CAST(CURRENT_TIMESTAMP() AS DATE), INTERVAL -1 MONTH))", + "clickhouse": "LAST_DAY(DATE_ADD(MONTH, -1, CAST(CURRENT_TIMESTAMP() AS DATE)))", + "duckdb": "LAST_DAY(CAST(CURRENT_TIMESTAMP AS DATE) + INTERVAL (-1) MONTH)", + "mysql": "LAST_DAY(DATE_ADD(CURRENT_TIMESTAMP(), INTERVAL -1 MONTH))", + "postgres": "CAST(DATE_TRUNC('MONTH', CAST(CURRENT_TIMESTAMP AS DATE) + INTERVAL '-1 MONTH') + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)", + "presto": "LAST_DAY_OF_MONTH(DATE_ADD('MONTH', CAST(-1 AS BIGINT), CAST(CAST(CURRENT_TIMESTAMP AS TIMESTAMP) AS DATE)))", + "redshift": "LAST_DAY(DATEADD(MONTH, -1, CAST(SYSDATE AS DATE)))", + "snowflake": "LAST_DAY(DATEADD(MONTH, -1, CAST(CURRENT_TIMESTAMP() AS DATE)))", + "spark": "LAST_DAY(ADD_MONTHS(TO_DATE(CURRENT_TIMESTAMP()), -1))", + "tsql": "EOMONTH(DATEADD(MONTH, -1, CAST(GETDATE() AS DATE)))", + }, ) def test_identifier_prefixes(self): @@ -1384,61 +1503,32 @@ WHERE def test_temporal_table(self): self.validate_identity( - """CREATE TABLE test ("data" CHAR(7), "valid_from" DATETIME2(2) GENERATED ALWAYS AS ROW START NOT NULL, "valid_to" DATETIME2(2) GENERATED ALWAYS AS ROW END NOT NULL, PERIOD FOR SYSTEM_TIME ("valid_from", "valid_to")) WITH(SYSTEM_VERSIONING=ON)""" + """CREATE TABLE test ("data" CHAR(7), "valid_from" DATETIME2(2) GENERATED ALWAYS AS ROW START NOT NULL, "valid_to" DATETIME2(2) GENERATED ALWAYS AS ROW END NOT NULL, PERIOD FOR SYSTEM_TIME ("valid_from", "valid_to")) WITH(SYSTEM_VERSIONING=ON)""", + "CREATE TABLE test ([data] CHAR(7), [valid_from] DATETIME2(2) GENERATED ALWAYS AS ROW START NOT NULL, [valid_to] DATETIME2(2) GENERATED ALWAYS AS ROW END NOT NULL, PERIOD FOR SYSTEM_TIME ([valid_from], [valid_to])) WITH(SYSTEM_VERSIONING=ON)", ) self.validate_identity( - """CREATE TABLE test ("data" CHAR(7), "valid_from" DATETIME2(2) GENERATED ALWAYS AS ROW START HIDDEN NOT NULL, "valid_to" DATETIME2(2) GENERATED ALWAYS AS ROW END HIDDEN NOT NULL, PERIOD FOR SYSTEM_TIME ("valid_from", "valid_to")) WITH(SYSTEM_VERSIONING=ON(HISTORY_TABLE="dbo"."benchmark_history", DATA_CONSISTENCY_CHECK=ON))""" + """CREATE TABLE test ([data] CHAR(7), [valid_from] DATETIME2(2) GENERATED ALWAYS AS ROW START HIDDEN NOT NULL, [valid_to] DATETIME2(2) GENERATED ALWAYS AS ROW END HIDDEN NOT NULL, PERIOD FOR SYSTEM_TIME ([valid_from], [valid_to])) WITH(SYSTEM_VERSIONING=ON(HISTORY_TABLE=[dbo].[benchmark_history], DATA_CONSISTENCY_CHECK=ON))""" ) self.validate_identity( - """CREATE TABLE test ("data" CHAR(7), "valid_from" DATETIME2(2) GENERATED ALWAYS AS ROW START NOT NULL, "valid_to" DATETIME2(2) GENERATED ALWAYS AS ROW END NOT NULL, PERIOD FOR SYSTEM_TIME ("valid_from", "valid_to")) WITH(SYSTEM_VERSIONING=ON(HISTORY_TABLE="dbo"."benchmark_history", DATA_CONSISTENCY_CHECK=ON))""" + """CREATE TABLE test ([data] CHAR(7), [valid_from] DATETIME2(2) GENERATED ALWAYS AS ROW START NOT NULL, [valid_to] DATETIME2(2) GENERATED ALWAYS AS ROW END NOT NULL, PERIOD FOR SYSTEM_TIME ([valid_from], [valid_to])) WITH(SYSTEM_VERSIONING=ON(HISTORY_TABLE=[dbo].[benchmark_history], DATA_CONSISTENCY_CHECK=ON))""" ) self.validate_identity( - """CREATE TABLE test ("data" CHAR(7), "valid_from" DATETIME2(2) GENERATED ALWAYS AS ROW START NOT NULL, "valid_to" DATETIME2(2) GENERATED ALWAYS AS ROW END NOT NULL, PERIOD FOR SYSTEM_TIME ("valid_from", "valid_to")) WITH(SYSTEM_VERSIONING=ON(HISTORY_TABLE="dbo"."benchmark_history", DATA_CONSISTENCY_CHECK=OFF))""" + """CREATE TABLE test ([data] CHAR(7), [valid_from] DATETIME2(2) GENERATED ALWAYS AS ROW START NOT NULL, [valid_to] DATETIME2(2) GENERATED ALWAYS AS ROW END NOT NULL, PERIOD FOR SYSTEM_TIME ([valid_from], [valid_to])) WITH(SYSTEM_VERSIONING=ON(HISTORY_TABLE=[dbo].[benchmark_history], DATA_CONSISTENCY_CHECK=OFF))""" ) self.validate_identity( - """CREATE TABLE test ("data" CHAR(7), "valid_from" DATETIME2(2) GENERATED ALWAYS AS ROW START NOT NULL, "valid_to" DATETIME2(2) GENERATED ALWAYS AS ROW END NOT NULL, PERIOD FOR SYSTEM_TIME ("valid_from", "valid_to")) WITH(SYSTEM_VERSIONING=ON(HISTORY_TABLE="dbo"."benchmark_history"))""" + """CREATE TABLE test ([data] CHAR(7), [valid_from] DATETIME2(2) GENERATED ALWAYS AS ROW START NOT NULL, [valid_to] DATETIME2(2) GENERATED ALWAYS AS ROW END NOT NULL, PERIOD FOR SYSTEM_TIME ([valid_from], [valid_to])) WITH(SYSTEM_VERSIONING=ON(HISTORY_TABLE=[dbo].[benchmark_history]))""" ) self.validate_identity( - """CREATE TABLE test ("data" CHAR(7), "valid_from" DATETIME2(2) GENERATED ALWAYS AS ROW START NOT NULL, "valid_to" DATETIME2(2) GENERATED ALWAYS AS ROW END NOT NULL, PERIOD FOR SYSTEM_TIME ("valid_from", "valid_to")) WITH(SYSTEM_VERSIONING=ON(HISTORY_TABLE="dbo"."benchmark_history"))""" + """CREATE TABLE test ([data] CHAR(7), [valid_from] DATETIME2(2) GENERATED ALWAYS AS ROW START NOT NULL, [valid_to] DATETIME2(2) GENERATED ALWAYS AS ROW END NOT NULL, PERIOD FOR SYSTEM_TIME ([valid_from], [valid_to])) WITH(SYSTEM_VERSIONING=ON(HISTORY_TABLE=[dbo].[benchmark_history]))""" ) def test_system_time(self): - self.validate_all( - "SELECT [x] FROM [a].[b] FOR SYSTEM_TIME AS OF 'foo'", - write={ - "tsql": """SELECT "x" FROM "a"."b" FOR SYSTEM_TIME AS OF 'foo'""", - }, - ) - self.validate_all( - "SELECT [x] FROM [a].[b] FOR SYSTEM_TIME AS OF 'foo' AS alias", - write={ - "tsql": """SELECT "x" FROM "a"."b" FOR SYSTEM_TIME AS OF 'foo' AS alias""", - }, - ) - self.validate_all( - "SELECT [x] FROM [a].[b] FOR SYSTEM_TIME FROM c TO d", - write={ - "tsql": """SELECT "x" FROM "a"."b" FOR SYSTEM_TIME FROM c TO d""", - }, - ) - self.validate_all( - "SELECT [x] FROM [a].[b] FOR SYSTEM_TIME BETWEEN c AND d", - write={ - "tsql": """SELECT "x" FROM "a"."b" FOR SYSTEM_TIME BETWEEN c AND d""", - }, - ) - self.validate_all( - "SELECT [x] FROM [a].[b] FOR SYSTEM_TIME CONTAINED IN (c, d)", - write={ - "tsql": """SELECT "x" FROM "a"."b" FOR SYSTEM_TIME CONTAINED IN (c, d)""", - }, - ) - self.validate_all( - "SELECT [x] FROM [a].[b] FOR SYSTEM_TIME ALL AS alias", - write={ - "tsql": """SELECT "x" FROM "a"."b" FOR SYSTEM_TIME ALL AS alias""", - }, - ) + self.validate_identity("SELECT [x] FROM [a].[b] FOR SYSTEM_TIME AS OF 'foo'") + self.validate_identity("SELECT [x] FROM [a].[b] FOR SYSTEM_TIME AS OF 'foo' AS alias") + self.validate_identity("SELECT [x] FROM [a].[b] FOR SYSTEM_TIME FROM c TO d") + self.validate_identity("SELECT [x] FROM [a].[b] FOR SYSTEM_TIME BETWEEN c AND d") + self.validate_identity("SELECT [x] FROM [a].[b] FOR SYSTEM_TIME CONTAINED IN (c, d)") + self.validate_identity("SELECT [x] FROM [a].[b] FOR SYSTEM_TIME ALL AS alias") def test_current_user(self): self.validate_all( @@ -1491,7 +1581,7 @@ WHERE self.validate_all( """SELECT [key], value FROM OPENJSON(@json,'$.path.to."sub-object"')""", write={ - "tsql": """SELECT "key", value FROM OPENJSON(@json, '$.path.to."sub-object"')""", + "tsql": """SELECT [key], value FROM OPENJSON(@json, '$.path.to."sub-object"')""", }, ) self.validate_all( @@ -1520,7 +1610,7 @@ FROM OPENJSON(@json) WITH ( Date DATETIME2 '$.Order.Date', Customer VARCHAR(200) '$.AccountNumber', Quantity INTEGER '$.Item.Quantity', - "Order" VARCHAR(MAX) AS JSON + [Order] VARCHAR(MAX) AS JSON )""" }, pretty=True, @@ -1547,19 +1637,19 @@ FROM OPENJSON(@json) WITH ( def test_qualify_derived_table_outputs(self): self.validate_identity( "WITH t AS (SELECT 1) SELECT * FROM t", - 'WITH t AS (SELECT 1 AS "1") SELECT * FROM t', + "WITH t AS (SELECT 1 AS [1]) SELECT * FROM t", ) self.validate_identity( 'WITH t AS (SELECT "c") SELECT * FROM t', - 'WITH t AS (SELECT "c" AS "c") SELECT * FROM t', + "WITH t AS (SELECT [c] AS [c]) SELECT * FROM t", ) self.validate_identity( "SELECT * FROM (SELECT 1) AS subq", - 'SELECT * FROM (SELECT 1 AS "1") AS subq', + "SELECT * FROM (SELECT 1 AS [1]) AS subq", ) self.validate_identity( 'SELECT * FROM (SELECT "c") AS subq', - 'SELECT * FROM (SELECT "c" AS "c") AS subq', + "SELECT * FROM (SELECT [c] AS [c]) AS subq", ) self.validate_all( -- cgit v1.2.3