diff options
Diffstat (limited to 'tests/dialects')
-rw-r--r-- | tests/dialects/__init__.py | 0 | ||||
-rw-r--r-- | tests/dialects/test_bigquery.py | 238 | ||||
-rw-r--r-- | tests/dialects/test_clickhouse.py | 25 | ||||
-rw-r--r-- | tests/dialects/test_dialect.py | 981 | ||||
-rw-r--r-- | tests/dialects/test_duckdb.py | 249 | ||||
-rw-r--r-- | tests/dialects/test_hive.py | 541 | ||||
-rw-r--r-- | tests/dialects/test_mysql.py | 79 | ||||
-rw-r--r-- | tests/dialects/test_postgres.py | 93 | ||||
-rw-r--r-- | tests/dialects/test_presto.py | 422 | ||||
-rw-r--r-- | tests/dialects/test_snowflake.py | 145 | ||||
-rw-r--r-- | tests/dialects/test_spark.py | 226 | ||||
-rw-r--r-- | tests/dialects/test_sqlite.py | 72 | ||||
-rw-r--r-- | tests/dialects/test_starrocks.py | 8 | ||||
-rw-r--r-- | tests/dialects/test_tableau.py | 62 |
14 files changed, 3141 insertions, 0 deletions
diff --git a/tests/dialects/__init__.py b/tests/dialects/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/tests/dialects/__init__.py diff --git a/tests/dialects/test_bigquery.py b/tests/dialects/test_bigquery.py new file mode 100644 index 0000000..1337c3d --- /dev/null +++ b/tests/dialects/test_bigquery.py @@ -0,0 +1,238 @@ +from sqlglot import ErrorLevel, ParseError, UnsupportedError, transpile +from tests.dialects.test_dialect import Validator + + +class TestBigQuery(Validator): + dialect = "bigquery" + + def test_bigquery(self): + self.validate_all( + '"""x"""', + write={ + "bigquery": "'x'", + "duckdb": "'x'", + "presto": "'x'", + "hive": "'x'", + "spark": "'x'", + }, + ) + self.validate_all( + '"""x\'"""', + write={ + "bigquery": "'x\\''", + "duckdb": "'x'''", + "presto": "'x'''", + "hive": "'x\\''", + "spark": "'x\\''", + }, + ) + self.validate_all( + r'r"""/\*.*\*/"""', + write={ + "bigquery": r"'/\\*.*\\*/'", + "duckdb": r"'/\*.*\*/'", + "presto": r"'/\*.*\*/'", + "hive": r"'/\\*.*\\*/'", + "spark": r"'/\\*.*\\*/'", + }, + ) + self.validate_all( + R'R"""/\*.*\*/"""', + write={ + "bigquery": R"'/\\*.*\\*/'", + "duckdb": R"'/\*.*\*/'", + "presto": R"'/\*.*\*/'", + "hive": R"'/\\*.*\\*/'", + "spark": R"'/\\*.*\\*/'", + }, + ) + self.validate_all( + "CAST(a AS INT64)", + write={ + "bigquery": "CAST(a AS INT64)", + "duckdb": "CAST(a AS BIGINT)", + "presto": "CAST(a AS BIGINT)", + "hive": "CAST(a AS BIGINT)", + "spark": "CAST(a AS LONG)", + }, + ) + self.validate_all( + "CAST(a AS NUMERIC)", + write={ + "bigquery": "CAST(a AS NUMERIC)", + "duckdb": "CAST(a AS DECIMAL)", + "presto": "CAST(a AS DECIMAL)", + "hive": "CAST(a AS DECIMAL)", + "spark": "CAST(a AS DECIMAL)", + }, + ) + self.validate_all( + "[1, 2, 3]", + read={ + "duckdb": "LIST_VALUE(1, 2, 3)", + "presto": "ARRAY[1, 2, 3]", + "hive": "ARRAY(1, 2, 3)", + "spark": "ARRAY(1, 2, 3)", + }, + write={ + "bigquery": "[1, 2, 3]", + "duckdb": "LIST_VALUE(1, 2, 3)", + "presto": "ARRAY[1, 2, 3]", + "hive": "ARRAY(1, 2, 3)", + "spark": "ARRAY(1, 2, 3)", + }, + ) + self.validate_all( + "SELECT * FROM UNNEST(['7', '14']) AS x", + read={ + "spark": "SELECT * FROM UNNEST(ARRAY('7', '14')) AS (x)", + }, + write={ + "bigquery": "SELECT * FROM UNNEST(['7', '14']) AS x", + "presto": "SELECT * FROM UNNEST(ARRAY['7', '14']) AS (x)", + "hive": "SELECT * FROM UNNEST(ARRAY('7', '14')) AS (x)", + "spark": "SELECT * FROM UNNEST(ARRAY('7', '14')) AS (x)", + }, + ) + + self.validate_all( + "x IS unknown", + write={ + "bigquery": "x IS NULL", + "duckdb": "x IS NULL", + "presto": "x IS NULL", + "hive": "x IS NULL", + "spark": "x IS NULL", + }, + ) + self.validate_all( + "current_datetime", + write={ + "bigquery": "CURRENT_DATETIME()", + "duckdb": "CURRENT_DATETIME()", + "presto": "CURRENT_DATETIME()", + "hive": "CURRENT_DATETIME()", + "spark": "CURRENT_DATETIME()", + }, + ) + self.validate_all( + "current_time", + write={ + "bigquery": "CURRENT_TIME()", + "duckdb": "CURRENT_TIME()", + "presto": "CURRENT_TIME()", + "hive": "CURRENT_TIME()", + "spark": "CURRENT_TIME()", + }, + ) + self.validate_all( + "current_timestamp", + write={ + "bigquery": "CURRENT_TIMESTAMP()", + "duckdb": "CURRENT_TIMESTAMP()", + "postgres": "CURRENT_TIMESTAMP", + "presto": "CURRENT_TIMESTAMP()", + "hive": "CURRENT_TIMESTAMP()", + "spark": "CURRENT_TIMESTAMP()", + }, + ) + self.validate_all( + "current_timestamp()", + write={ + "bigquery": "CURRENT_TIMESTAMP()", + "duckdb": "CURRENT_TIMESTAMP()", + "postgres": "CURRENT_TIMESTAMP", + "presto": "CURRENT_TIMESTAMP()", + "hive": "CURRENT_TIMESTAMP()", + "spark": "CURRENT_TIMESTAMP()", + }, + ) + + self.validate_identity( + "SELECT ROW() OVER (y ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) FROM x WINDOW y AS (PARTITION BY CATEGORY)" + ) + + self.validate_identity( + "SELECT LAST_VALUE(a IGNORE NULLS) OVER y FROM x WINDOW y AS (PARTITION BY CATEGORY)", + ) + + self.validate_all( + "CREATE TABLE db.example_table (col_a struct<struct_col_a:int, struct_col_b:string>)", + write={ + "bigquery": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a INT64, struct_col_b STRING>)", + "duckdb": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a INT, struct_col_b TEXT>)", + "presto": "CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b VARCHAR))", + "hive": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a INT, struct_col_b STRING>)", + "spark": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a: INT, struct_col_b: STRING>)", + }, + ) + self.validate_all( + "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a INT64, struct_col_b STRUCT<nested_col_a STRING, nested_col_b STRING>>)", + write={ + "bigquery": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a INT64, struct_col_b STRUCT<nested_col_a STRING, nested_col_b STRING>>)", + "presto": "CREATE TABLE db.example_table (col_a ROW(struct_col_a BIGINT, struct_col_b ROW(nested_col_a VARCHAR, nested_col_b VARCHAR)))", + "hive": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a BIGINT, struct_col_b STRUCT<nested_col_a STRING, nested_col_b STRING>>)", + "spark": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a: LONG, struct_col_b: STRUCT<nested_col_a: STRING, nested_col_b: STRING>>)", + }, + ) + self.validate_all( + "SELECT * FROM a WHERE b IN UNNEST([1, 2, 3])", + write={ + "bigquery": "SELECT * FROM a WHERE b IN UNNEST([1, 2, 3])", + "mysql": "SELECT * FROM a WHERE b IN (SELECT UNNEST(ARRAY(1, 2, 3)))", + "presto": "SELECT * FROM a WHERE b IN (SELECT UNNEST(ARRAY[1, 2, 3]))", + "hive": "SELECT * FROM a WHERE b IN (SELECT UNNEST(ARRAY(1, 2, 3)))", + "spark": "SELECT * FROM a WHERE b IN (SELECT UNNEST(ARRAY(1, 2, 3)))", + }, + ) + + # Reference: https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#set_operators + with self.assertRaises(UnsupportedError): + transpile( + "SELECT * FROM a INTERSECT ALL SELECT * FROM b", + write="bigquery", + unsupported_level=ErrorLevel.RAISE, + ) + + with self.assertRaises(UnsupportedError): + transpile( + "SELECT * FROM a EXCEPT ALL SELECT * FROM b", + write="bigquery", + unsupported_level=ErrorLevel.RAISE, + ) + + with self.assertRaises(ParseError): + transpile("SELECT * FROM UNNEST(x) AS x(y)", read="bigquery") + + self.validate_all( + "DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY)", + write={ + "postgres": "CURRENT_DATE - INTERVAL '1' DAY", + }, + ) + self.validate_all( + "DATE_ADD(CURRENT_DATE(), INTERVAL 1 DAY)", + write={ + "bigquery": "DATE_ADD(CURRENT_DATE, INTERVAL 1 DAY)", + "duckdb": "CURRENT_DATE + INTERVAL 1 DAY", + "mysql": "DATE_ADD(CURRENT_DATE, INTERVAL 1 DAY)", + "postgres": "CURRENT_DATE + INTERVAL '1' DAY", + "presto": "DATE_ADD(DAY, 1, CURRENT_DATE)", + "hive": "DATE_ADD(CURRENT_DATE, 1)", + "spark": "DATE_ADD(CURRENT_DATE, 1)", + }, + ) + self.validate_all( + "CURRENT_DATE('UTC')", + write={ + "mysql": "CURRENT_DATE AT TIME ZONE 'UTC'", + "postgres": "CURRENT_DATE AT TIME ZONE 'UTC'", + }, + ) + self.validate_all( + "SELECT a FROM test WHERE a = 1 GROUP BY a HAVING a = 2 QUALIFY z ORDER BY a LIMIT 10", + write={ + "bigquery": "SELECT a FROM test WHERE a = 1 GROUP BY a HAVING a = 2 QUALIFY z ORDER BY a LIMIT 10", + "snowflake": "SELECT a FROM test WHERE a = 1 GROUP BY a HAVING a = 2 QUALIFY z ORDER BY a NULLS FIRST LIMIT 10", + }, + ) diff --git a/tests/dialects/test_clickhouse.py b/tests/dialects/test_clickhouse.py new file mode 100644 index 0000000..e5b1516 --- /dev/null +++ b/tests/dialects/test_clickhouse.py @@ -0,0 +1,25 @@ +from tests.dialects.test_dialect import Validator + + +class TestClickhouse(Validator): + dialect = "clickhouse" + + def test_clickhouse(self): + self.validate_identity("dictGet(x, 'y')") + self.validate_identity("SELECT * FROM x FINAL") + self.validate_identity("SELECT * FROM x AS y FINAL") + + self.validate_all( + "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname", + write={ + "clickhouse": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname, lname", + "spark": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname NULLS LAST", + }, + ) + + self.validate_all( + "CAST(1 AS NULLABLE(Int64))", + write={ + "clickhouse": "CAST(1 AS Nullable(BIGINT))", + }, + ) diff --git a/tests/dialects/test_dialect.py b/tests/dialects/test_dialect.py new file mode 100644 index 0000000..3993565 --- /dev/null +++ b/tests/dialects/test_dialect.py @@ -0,0 +1,981 @@ +import unittest + +from sqlglot import ( + Dialect, + Dialects, + ErrorLevel, + UnsupportedError, + parse_one, + transpile, +) + + +class Validator(unittest.TestCase): + dialect = None + + def validate(self, sql, target, **kwargs): + self.assertEqual(transpile(sql, **kwargs)[0], target) + + def validate_identity(self, sql): + self.assertEqual(transpile(sql, read=self.dialect, write=self.dialect)[0], sql) + + def validate_all(self, sql, read=None, write=None, pretty=False): + """ + Validate that: + 1. Everything in `read` transpiles to `sql` + 2. `sql` transpiles to everything in `write` + + Args: + sql (str): Main SQL expression + dialect (str): dialect of `sql` + read (dict): Mapping of dialect -> SQL + write (dict): Mapping of dialect -> SQL + """ + expression = parse_one(sql, read=self.dialect) + + for read_dialect, read_sql in (read or {}).items(): + with self.subTest(f"{read_dialect} -> {sql}"): + self.assertEqual( + parse_one(read_sql, read_dialect).sql( + self.dialect, unsupported_level=ErrorLevel.IGNORE + ), + sql, + ) + + for write_dialect, write_sql in (write or {}).items(): + with self.subTest(f"{sql} -> {write_dialect}"): + if write_sql is UnsupportedError: + with self.assertRaises(UnsupportedError): + expression.sql( + write_dialect, unsupported_level=ErrorLevel.RAISE + ) + else: + self.assertEqual( + expression.sql( + write_dialect, + unsupported_level=ErrorLevel.IGNORE, + pretty=pretty, + ), + write_sql, + ) + + +class TestDialect(Validator): + maxDiff = None + + def test_enum(self): + for dialect in Dialects: + self.assertIsNotNone(Dialect[dialect]) + self.assertIsNotNone(Dialect.get(dialect)) + self.assertIsNotNone(Dialect.get_or_raise(dialect)) + self.assertIsNotNone(Dialect[dialect.value]) + + def test_cast(self): + self.validate_all( + "CAST(a AS TEXT)", + write={ + "bigquery": "CAST(a AS STRING)", + "clickhouse": "CAST(a AS TEXT)", + "duckdb": "CAST(a AS TEXT)", + "mysql": "CAST(a AS TEXT)", + "hive": "CAST(a AS STRING)", + "oracle": "CAST(a AS CLOB)", + "postgres": "CAST(a AS TEXT)", + "presto": "CAST(a AS VARCHAR)", + "snowflake": "CAST(a AS TEXT)", + "spark": "CAST(a AS STRING)", + "starrocks": "CAST(a AS STRING)", + }, + ) + self.validate_all( + "CAST(a AS STRING)", + write={ + "bigquery": "CAST(a AS STRING)", + "duckdb": "CAST(a AS TEXT)", + "mysql": "CAST(a AS TEXT)", + "hive": "CAST(a AS STRING)", + "oracle": "CAST(a AS CLOB)", + "postgres": "CAST(a AS TEXT)", + "presto": "CAST(a AS VARCHAR)", + "snowflake": "CAST(a AS TEXT)", + "spark": "CAST(a AS STRING)", + "starrocks": "CAST(a AS STRING)", + }, + ) + self.validate_all( + "CAST(a AS VARCHAR)", + write={ + "bigquery": "CAST(a AS STRING)", + "duckdb": "CAST(a AS TEXT)", + "mysql": "CAST(a AS VARCHAR)", + "hive": "CAST(a AS STRING)", + "oracle": "CAST(a AS VARCHAR2)", + "postgres": "CAST(a AS VARCHAR)", + "presto": "CAST(a AS VARCHAR)", + "snowflake": "CAST(a AS VARCHAR)", + "spark": "CAST(a AS STRING)", + "starrocks": "CAST(a AS VARCHAR)", + }, + ) + self.validate_all( + "CAST(a AS VARCHAR(3))", + write={ + "bigquery": "CAST(a AS STRING(3))", + "duckdb": "CAST(a AS TEXT(3))", + "mysql": "CAST(a AS VARCHAR(3))", + "hive": "CAST(a AS VARCHAR(3))", + "oracle": "CAST(a AS VARCHAR2(3))", + "postgres": "CAST(a AS VARCHAR(3))", + "presto": "CAST(a AS VARCHAR(3))", + "snowflake": "CAST(a AS VARCHAR(3))", + "spark": "CAST(a AS VARCHAR(3))", + "starrocks": "CAST(a AS VARCHAR(3))", + }, + ) + self.validate_all( + "CAST(a AS SMALLINT)", + write={ + "bigquery": "CAST(a AS INT64)", + "duckdb": "CAST(a AS SMALLINT)", + "mysql": "CAST(a AS SMALLINT)", + "hive": "CAST(a AS SMALLINT)", + "oracle": "CAST(a AS NUMBER)", + "postgres": "CAST(a AS SMALLINT)", + "presto": "CAST(a AS SMALLINT)", + "snowflake": "CAST(a AS SMALLINT)", + "spark": "CAST(a AS SHORT)", + "sqlite": "CAST(a AS INTEGER)", + "starrocks": "CAST(a AS SMALLINT)", + }, + ) + self.validate_all( + "CAST(a AS DOUBLE)", + write={ + "bigquery": "CAST(a AS FLOAT64)", + "clickhouse": "CAST(a AS DOUBLE)", + "duckdb": "CAST(a AS DOUBLE)", + "mysql": "CAST(a AS DOUBLE)", + "hive": "CAST(a AS DOUBLE)", + "oracle": "CAST(a AS DOUBLE PRECISION)", + "postgres": "CAST(a AS DOUBLE PRECISION)", + "presto": "CAST(a AS DOUBLE)", + "snowflake": "CAST(a AS DOUBLE)", + "spark": "CAST(a AS DOUBLE)", + "starrocks": "CAST(a AS DOUBLE)", + }, + ) + self.validate_all( + "CAST(a AS TIMESTAMP)", write={"starrocks": "CAST(a AS DATETIME)"} + ) + self.validate_all( + "CAST(a AS TIMESTAMPTZ)", write={"starrocks": "CAST(a AS DATETIME)"} + ) + self.validate_all("CAST(a AS TINYINT)", write={"oracle": "CAST(a AS NUMBER)"}) + self.validate_all("CAST(a AS SMALLINT)", write={"oracle": "CAST(a AS NUMBER)"}) + self.validate_all("CAST(a AS BIGINT)", write={"oracle": "CAST(a AS NUMBER)"}) + self.validate_all("CAST(a AS INT)", write={"oracle": "CAST(a AS NUMBER)"}) + self.validate_all( + "CAST(a AS DECIMAL)", + read={"oracle": "CAST(a AS NUMBER)"}, + write={"oracle": "CAST(a AS NUMBER)"}, + ) + + def test_time(self): + self.validate_all( + "STR_TO_TIME(x, '%Y-%m-%dT%H:%M:%S')", + read={ + "duckdb": "STRPTIME(x, '%Y-%m-%dT%H:%M:%S')", + }, + write={ + "mysql": "STR_TO_DATE(x, '%Y-%m-%dT%H:%i:%S')", + "duckdb": "STRPTIME(x, '%Y-%m-%dT%H:%M:%S')", + "hive": "CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, 'yyyy-MM-ddTHH:mm:ss')) AS TIMESTAMP)", + "presto": "DATE_PARSE(x, '%Y-%m-%dT%H:%i:%S')", + "spark": "TO_TIMESTAMP(x, 'yyyy-MM-ddTHH:mm:ss')", + }, + ) + self.validate_all( + "STR_TO_TIME('2020-01-01', '%Y-%m-%d')", + write={ + "duckdb": "STRPTIME('2020-01-01', '%Y-%m-%d')", + "hive": "CAST('2020-01-01' AS TIMESTAMP)", + "presto": "DATE_PARSE('2020-01-01', '%Y-%m-%d')", + "spark": "TO_TIMESTAMP('2020-01-01', 'yyyy-MM-dd')", + }, + ) + self.validate_all( + "STR_TO_TIME(x, '%y')", + write={ + "duckdb": "STRPTIME(x, '%y')", + "hive": "CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, 'yy')) AS TIMESTAMP)", + "presto": "DATE_PARSE(x, '%y')", + "spark": "TO_TIMESTAMP(x, 'yy')", + }, + ) + self.validate_all( + "STR_TO_UNIX('2020-01-01', '%Y-%M-%d')", + write={ + "duckdb": "EPOCH(STRPTIME('2020-01-01', '%Y-%M-%d'))", + "hive": "UNIX_TIMESTAMP('2020-01-01', 'yyyy-mm-dd')", + "presto": "TO_UNIXTIME(DATE_PARSE('2020-01-01', '%Y-%i-%d'))", + }, + ) + self.validate_all( + "TIME_STR_TO_DATE('2020-01-01')", + write={ + "duckdb": "CAST('2020-01-01' AS DATE)", + "hive": "TO_DATE('2020-01-01')", + "presto": "DATE_PARSE('2020-01-01', '%Y-%m-%d %H:%i:%s')", + }, + ) + self.validate_all( + "TIME_STR_TO_TIME('2020-01-01')", + write={ + "duckdb": "CAST('2020-01-01' AS TIMESTAMP)", + "hive": "CAST('2020-01-01' AS TIMESTAMP)", + "presto": "DATE_PARSE('2020-01-01', '%Y-%m-%d %H:%i:%s')", + }, + ) + self.validate_all( + "TIME_STR_TO_UNIX('2020-01-01')", + write={ + "duckdb": "EPOCH(CAST('2020-01-01' AS TIMESTAMP))", + "hive": "UNIX_TIMESTAMP('2020-01-01')", + "presto": "TO_UNIXTIME(DATE_PARSE('2020-01-01', '%Y-%m-%d %H:%i:%S'))", + }, + ) + self.validate_all( + "TIME_TO_STR(x, '%Y-%m-%d')", + write={ + "duckdb": "STRFTIME(x, '%Y-%m-%d')", + "hive": "DATE_FORMAT(x, 'yyyy-MM-dd')", + "presto": "DATE_FORMAT(x, '%Y-%m-%d')", + }, + ) + self.validate_all( + "TIME_TO_TIME_STR(x)", + write={ + "duckdb": "CAST(x AS TEXT)", + "hive": "CAST(x AS STRING)", + "presto": "CAST(x AS VARCHAR)", + }, + ) + self.validate_all( + "TIME_TO_UNIX(x)", + write={ + "duckdb": "EPOCH(x)", + "hive": "UNIX_TIMESTAMP(x)", + "presto": "TO_UNIXTIME(x)", + }, + ) + self.validate_all( + "TS_OR_DS_TO_DATE_STR(x)", + write={ + "duckdb": "SUBSTRING(CAST(x AS TEXT), 1, 10)", + "hive": "SUBSTRING(CAST(x AS STRING), 1, 10)", + "presto": "SUBSTRING(CAST(x AS VARCHAR), 1, 10)", + }, + ) + self.validate_all( + "TS_OR_DS_TO_DATE(x)", + write={ + "duckdb": "CAST(x AS DATE)", + "hive": "TO_DATE(x)", + "presto": "CAST(SUBSTR(CAST(x AS VARCHAR), 1, 10) AS DATE)", + }, + ) + self.validate_all( + "TS_OR_DS_TO_DATE(x, '%-d')", + write={ + "duckdb": "CAST(STRPTIME(x, '%-d') AS DATE)", + "hive": "TO_DATE(x, 'd')", + "presto": "CAST(DATE_PARSE(x, '%e') AS DATE)", + "spark": "TO_DATE(x, 'd')", + }, + ) + self.validate_all( + "UNIX_TO_STR(x, y)", + write={ + "duckdb": "STRFTIME(TO_TIMESTAMP(CAST(x AS BIGINT)), y)", + "hive": "FROM_UNIXTIME(x, y)", + "presto": "DATE_FORMAT(FROM_UNIXTIME(x), y)", + }, + ) + self.validate_all( + "UNIX_TO_TIME(x)", + write={ + "duckdb": "TO_TIMESTAMP(CAST(x AS BIGINT))", + "hive": "FROM_UNIXTIME(x)", + "presto": "FROM_UNIXTIME(x)", + }, + ) + self.validate_all( + "UNIX_TO_TIME_STR(x)", + write={ + "duckdb": "CAST(TO_TIMESTAMP(CAST(x AS BIGINT)) AS TEXT)", + "hive": "FROM_UNIXTIME(x)", + "presto": "CAST(FROM_UNIXTIME(x) AS VARCHAR)", + }, + ) + self.validate_all( + "DATE_TO_DATE_STR(x)", + write={ + "duckdb": "CAST(x AS TEXT)", + "hive": "CAST(x AS STRING)", + "presto": "CAST(x AS VARCHAR)", + }, + ) + self.validate_all( + "DATE_TO_DI(x)", + write={ + "duckdb": "CAST(STRFTIME(x, '%Y%m%d') AS INT)", + "hive": "CAST(DATE_FORMAT(x, 'yyyyMMdd') AS INT)", + "presto": "CAST(DATE_FORMAT(x, '%Y%m%d') AS INT)", + }, + ) + self.validate_all( + "DI_TO_DATE(x)", + write={ + "duckdb": "CAST(STRPTIME(CAST(x AS TEXT), '%Y%m%d') AS DATE)", + "hive": "TO_DATE(CAST(x AS STRING), 'yyyyMMdd')", + "presto": "CAST(DATE_PARSE(CAST(x AS VARCHAR), '%Y%m%d') AS DATE)", + }, + ) + self.validate_all( + "TS_OR_DI_TO_DI(x)", + write={ + "duckdb": "CAST(SUBSTR(REPLACE(CAST(x AS TEXT), '-', ''), 1, 8) AS INT)", + "hive": "CAST(SUBSTR(REPLACE(CAST(x AS STRING), '-', ''), 1, 8) AS INT)", + "presto": "CAST(SUBSTR(REPLACE(CAST(x AS VARCHAR), '-', ''), 1, 8) AS INT)", + "spark": "CAST(SUBSTR(REPLACE(CAST(x AS STRING), '-', ''), 1, 8) AS INT)", + }, + ) + self.validate_all( + "DATE_ADD(x, 1, 'day')", + read={ + "mysql": "DATE_ADD(x, INTERVAL 1 DAY)", + "starrocks": "DATE_ADD(x, INTERVAL 1 DAY)", + }, + write={ + "bigquery": "DATE_ADD(x, INTERVAL 1 'day')", + "duckdb": "x + INTERVAL 1 day", + "hive": "DATE_ADD(x, 1)", + "mysql": "DATE_ADD(x, INTERVAL 1 DAY)", + "postgres": "x + INTERVAL '1' 'day'", + "presto": "DATE_ADD('day', 1, x)", + "spark": "DATE_ADD(x, 1)", + "starrocks": "DATE_ADD(x, INTERVAL 1 DAY)", + }, + ) + self.validate_all( + "DATE_ADD(x, y, 'day')", + write={ + "postgres": UnsupportedError, + }, + ) + self.validate_all( + "DATE_ADD(x, 1)", + write={ + "bigquery": "DATE_ADD(x, INTERVAL 1 'day')", + "duckdb": "x + INTERVAL 1 DAY", + "hive": "DATE_ADD(x, 1)", + "mysql": "DATE_ADD(x, INTERVAL 1 DAY)", + "presto": "DATE_ADD('day', 1, x)", + "spark": "DATE_ADD(x, 1)", + "starrocks": "DATE_ADD(x, INTERVAL 1 DAY)", + }, + ) + self.validate_all( + "DATE_TRUNC(x, 'day')", + write={ + "mysql": "DATE(x)", + "starrocks": "DATE(x)", + }, + ) + self.validate_all( + "DATE_TRUNC(x, 'week')", + write={ + "mysql": "STR_TO_DATE(CONCAT(YEAR(x), ' ', WEEK(x, 1), ' 1'), '%Y %u %w')", + "starrocks": "STR_TO_DATE(CONCAT(YEAR(x), ' ', WEEK(x, 1), ' 1'), '%Y %u %w')", + }, + ) + self.validate_all( + "DATE_TRUNC(x, 'month')", + write={ + "mysql": "STR_TO_DATE(CONCAT(YEAR(x), ' ', MONTH(x), ' 1'), '%Y %c %e')", + "starrocks": "STR_TO_DATE(CONCAT(YEAR(x), ' ', MONTH(x), ' 1'), '%Y %c %e')", + }, + ) + self.validate_all( + "DATE_TRUNC(x, 'quarter')", + write={ + "mysql": "STR_TO_DATE(CONCAT(YEAR(x), ' ', QUARTER(x) * 3 - 2, ' 1'), '%Y %c %e')", + "starrocks": "STR_TO_DATE(CONCAT(YEAR(x), ' ', QUARTER(x) * 3 - 2, ' 1'), '%Y %c %e')", + }, + ) + self.validate_all( + "DATE_TRUNC(x, 'year')", + write={ + "mysql": "STR_TO_DATE(CONCAT(YEAR(x), ' 1 1'), '%Y %c %e')", + "starrocks": "STR_TO_DATE(CONCAT(YEAR(x), ' 1 1'), '%Y %c %e')", + }, + ) + self.validate_all( + "DATE_TRUNC(x, 'millenium')", + write={ + "mysql": UnsupportedError, + "starrocks": UnsupportedError, + }, + ) + self.validate_all( + "STR_TO_DATE(x, '%Y-%m-%dT%H:%M:%S')", + read={ + "mysql": "STR_TO_DATE(x, '%Y-%m-%dT%H:%i:%S')", + "starrocks": "STR_TO_DATE(x, '%Y-%m-%dT%H:%i:%S')", + }, + write={ + "mysql": "STR_TO_DATE(x, '%Y-%m-%dT%H:%i:%S')", + "starrocks": "STR_TO_DATE(x, '%Y-%m-%dT%H:%i:%S')", + "hive": "CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, 'yyyy-MM-ddTHH:mm:ss')) AS DATE)", + "presto": "CAST(DATE_PARSE(x, '%Y-%m-%dT%H:%i:%S') AS DATE)", + "spark": "TO_DATE(x, 'yyyy-MM-ddTHH:mm:ss')", + }, + ) + self.validate_all( + "STR_TO_DATE(x, '%Y-%m-%d')", + write={ + "mysql": "STR_TO_DATE(x, '%Y-%m-%d')", + "starrocks": "STR_TO_DATE(x, '%Y-%m-%d')", + "hive": "CAST(x AS DATE)", + "presto": "CAST(DATE_PARSE(x, '%Y-%m-%d') AS DATE)", + "spark": "TO_DATE(x)", + }, + ) + self.validate_all( + "DATE_STR_TO_DATE(x)", + write={ + "duckdb": "CAST(x AS DATE)", + "hive": "TO_DATE(x)", + "presto": "CAST(DATE_PARSE(x, '%Y-%m-%d') AS DATE)", + "spark": "TO_DATE(x)", + }, + ) + self.validate_all( + "TS_OR_DS_ADD('2021-02-01', 1, 'DAY')", + write={ + "duckdb": "CAST('2021-02-01' AS DATE) + INTERVAL 1 DAY", + "hive": "DATE_ADD('2021-02-01', 1)", + "presto": "DATE_ADD('DAY', 1, DATE_PARSE(SUBSTR('2021-02-01', 1, 10), '%Y-%m-%d'))", + "spark": "DATE_ADD('2021-02-01', 1)", + }, + ) + self.validate_all( + "DATE_ADD(CAST('2020-01-01' AS DATE), 1)", + write={ + "duckdb": "CAST('2020-01-01' AS DATE) + INTERVAL 1 DAY", + "hive": "DATE_ADD(CAST('2020-01-01' AS DATE), 1)", + "presto": "DATE_ADD('day', 1, CAST('2020-01-01' AS DATE))", + "spark": "DATE_ADD(CAST('2020-01-01' AS DATE), 1)", + }, + ) + + for unit in ("DAY", "MONTH", "YEAR"): + self.validate_all( + f"{unit}(x)", + read={ + dialect: f"{unit}(x)" + for dialect in ( + "bigquery", + "duckdb", + "mysql", + "presto", + "starrocks", + ) + }, + write={ + dialect: f"{unit}(x)" + for dialect in ( + "bigquery", + "duckdb", + "mysql", + "presto", + "hive", + "spark", + "starrocks", + ) + }, + ) + + def test_array(self): + self.validate_all( + "ARRAY(0, 1, 2)", + write={ + "bigquery": "[0, 1, 2]", + "duckdb": "LIST_VALUE(0, 1, 2)", + "presto": "ARRAY[0, 1, 2]", + "spark": "ARRAY(0, 1, 2)", + }, + ) + self.validate_all( + "ARRAY_SIZE(x)", + write={ + "bigquery": "ARRAY_LENGTH(x)", + "duckdb": "ARRAY_LENGTH(x)", + "presto": "CARDINALITY(x)", + "spark": "SIZE(x)", + }, + ) + self.validate_all( + "ARRAY_SUM(ARRAY(1, 2))", + write={ + "trino": "REDUCE(ARRAY[1, 2], 0, (acc, x) -> acc + x, acc -> acc)", + "duckdb": "LIST_SUM(LIST_VALUE(1, 2))", + "hive": "ARRAY_SUM(ARRAY(1, 2))", + "presto": "ARRAY_SUM(ARRAY[1, 2])", + "spark": "AGGREGATE(ARRAY(1, 2), 0, (acc, x) -> acc + x, acc -> acc)", + }, + ) + self.validate_all( + "REDUCE(x, 0, (acc, x) -> acc + x, acc -> acc)", + write={ + "trino": "REDUCE(x, 0, (acc, x) -> acc + x, acc -> acc)", + "duckdb": "REDUCE(x, 0, (acc, x) -> acc + x, acc -> acc)", + "hive": "REDUCE(x, 0, (acc, x) -> acc + x, acc -> acc)", + "presto": "REDUCE(x, 0, (acc, x) -> acc + x, acc -> acc)", + "spark": "AGGREGATE(x, 0, (acc, x) -> acc + x, acc -> acc)", + }, + ) + + def test_order_by(self): + self.validate_all( + "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname", + write={ + "bigquery": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname", + "duckdb": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname", + "presto": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname, lname NULLS FIRST", + "hive": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname", + "spark": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname", + }, + ) + + def test_json(self): + self.validate_all( + "JSON_EXTRACT(x, 'y')", + read={ + "postgres": "x->'y'", + "presto": "JSON_EXTRACT(x, 'y')", + }, + write={ + "postgres": "x->'y'", + "presto": "JSON_EXTRACT(x, 'y')", + }, + ) + self.validate_all( + "JSON_EXTRACT_SCALAR(x, 'y')", + read={ + "postgres": "x->>'y'", + "presto": "JSON_EXTRACT_SCALAR(x, 'y')", + }, + write={ + "postgres": "x->>'y'", + "presto": "JSON_EXTRACT_SCALAR(x, 'y')", + }, + ) + self.validate_all( + "JSONB_EXTRACT(x, 'y')", + read={ + "postgres": "x#>'y'", + }, + write={ + "postgres": "x#>'y'", + }, + ) + self.validate_all( + "JSONB_EXTRACT_SCALAR(x, 'y')", + read={ + "postgres": "x#>>'y'", + }, + write={ + "postgres": "x#>>'y'", + }, + ) + + def test_cross_join(self): + self.validate_all( + "SELECT a FROM x CROSS JOIN UNNEST(y) AS t (a)", + write={ + "presto": "SELECT a FROM x CROSS JOIN UNNEST(y) AS t(a)", + "spark": "SELECT a FROM x LATERAL VIEW EXPLODE(y) t AS a", + }, + ) + self.validate_all( + "SELECT a, b FROM x CROSS JOIN UNNEST(y, z) AS t (a, b)", + write={ + "presto": "SELECT a, b FROM x CROSS JOIN UNNEST(y, z) AS t(a, b)", + "spark": "SELECT a, b FROM x LATERAL VIEW EXPLODE(y) t AS a LATERAL VIEW EXPLODE(z) t AS b", + }, + ) + self.validate_all( + "SELECT a FROM x CROSS JOIN UNNEST(y) WITH ORDINALITY AS t (a)", + write={ + "presto": "SELECT a FROM x CROSS JOIN UNNEST(y) WITH ORDINALITY AS t(a)", + "spark": "SELECT a FROM x LATERAL VIEW POSEXPLODE(y) t AS a", + }, + ) + + def test_set_operators(self): + self.validate_all( + "SELECT * FROM a UNION SELECT * FROM b", + read={ + "bigquery": "SELECT * FROM a UNION DISTINCT SELECT * FROM b", + "duckdb": "SELECT * FROM a UNION SELECT * FROM b", + "presto": "SELECT * FROM a UNION SELECT * FROM b", + "spark": "SELECT * FROM a UNION SELECT * FROM b", + }, + write={ + "bigquery": "SELECT * FROM a UNION DISTINCT SELECT * FROM b", + "duckdb": "SELECT * FROM a UNION SELECT * FROM b", + "presto": "SELECT * FROM a UNION SELECT * FROM b", + "spark": "SELECT * FROM a UNION SELECT * FROM b", + }, + ) + self.validate_all( + "SELECT * FROM a UNION ALL SELECT * FROM b", + read={ + "bigquery": "SELECT * FROM a UNION ALL SELECT * FROM b", + "duckdb": "SELECT * FROM a UNION ALL SELECT * FROM b", + "presto": "SELECT * FROM a UNION ALL SELECT * FROM b", + "spark": "SELECT * FROM a UNION ALL SELECT * FROM b", + }, + write={ + "bigquery": "SELECT * FROM a UNION ALL SELECT * FROM b", + "duckdb": "SELECT * FROM a UNION ALL SELECT * FROM b", + "presto": "SELECT * FROM a UNION ALL SELECT * FROM b", + "spark": "SELECT * FROM a UNION ALL SELECT * FROM b", + }, + ) + self.validate_all( + "SELECT * FROM a INTERSECT SELECT * FROM b", + read={ + "bigquery": "SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b", + "duckdb": "SELECT * FROM a INTERSECT SELECT * FROM b", + "presto": "SELECT * FROM a INTERSECT SELECT * FROM b", + "spark": "SELECT * FROM a INTERSECT SELECT * FROM b", + }, + write={ + "bigquery": "SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b", + "duckdb": "SELECT * FROM a INTERSECT SELECT * FROM b", + "presto": "SELECT * FROM a INTERSECT SELECT * FROM b", + "spark": "SELECT * FROM a INTERSECT SELECT * FROM b", + }, + ) + self.validate_all( + "SELECT * FROM a EXCEPT SELECT * FROM b", + read={ + "bigquery": "SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b", + "duckdb": "SELECT * FROM a EXCEPT SELECT * FROM b", + "presto": "SELECT * FROM a EXCEPT SELECT * FROM b", + "spark": "SELECT * FROM a EXCEPT SELECT * FROM b", + }, + write={ + "bigquery": "SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b", + "duckdb": "SELECT * FROM a EXCEPT SELECT * FROM b", + "presto": "SELECT * FROM a EXCEPT SELECT * FROM b", + "spark": "SELECT * FROM a EXCEPT SELECT * FROM b", + }, + ) + self.validate_all( + "SELECT * FROM a UNION DISTINCT SELECT * FROM b", + write={ + "bigquery": "SELECT * FROM a UNION DISTINCT SELECT * FROM b", + "duckdb": "SELECT * FROM a UNION SELECT * FROM b", + "presto": "SELECT * FROM a UNION SELECT * FROM b", + "spark": "SELECT * FROM a UNION SELECT * FROM b", + }, + ) + self.validate_all( + "SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b", + write={ + "bigquery": "SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b", + "duckdb": "SELECT * FROM a INTERSECT SELECT * FROM b", + "presto": "SELECT * FROM a INTERSECT SELECT * FROM b", + "spark": "SELECT * FROM a INTERSECT SELECT * FROM b", + }, + ) + self.validate_all( + "SELECT * FROM a INTERSECT ALL SELECT * FROM b", + write={ + "bigquery": "SELECT * FROM a INTERSECT ALL SELECT * FROM b", + "duckdb": "SELECT * FROM a INTERSECT ALL SELECT * FROM b", + "presto": "SELECT * FROM a INTERSECT ALL SELECT * FROM b", + "spark": "SELECT * FROM a INTERSECT ALL SELECT * FROM b", + }, + ) + self.validate_all( + "SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b", + write={ + "bigquery": "SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b", + "duckdb": "SELECT * FROM a EXCEPT SELECT * FROM b", + "presto": "SELECT * FROM a EXCEPT SELECT * FROM b", + "spark": "SELECT * FROM a EXCEPT SELECT * FROM b", + }, + ) + self.validate_all( + "SELECT * FROM a EXCEPT ALL SELECT * FROM b", + read={ + "bigquery": "SELECT * FROM a EXCEPT ALL SELECT * FROM b", + "duckdb": "SELECT * FROM a EXCEPT ALL SELECT * FROM b", + "presto": "SELECT * FROM a EXCEPT ALL SELECT * FROM b", + "spark": "SELECT * FROM a EXCEPT ALL SELECT * FROM b", + }, + ) + + def test_operators(self): + self.validate_all( + "x ILIKE '%y'", + read={ + "clickhouse": "x ILIKE '%y'", + "duckdb": "x ILIKE '%y'", + "postgres": "x ILIKE '%y'", + "snowflake": "x ILIKE '%y'", + }, + write={ + "bigquery": "LOWER(x) LIKE '%y'", + "clickhouse": "x ILIKE '%y'", + "duckdb": "x ILIKE '%y'", + "hive": "LOWER(x) LIKE '%y'", + "mysql": "LOWER(x) LIKE '%y'", + "oracle": "LOWER(x) LIKE '%y'", + "postgres": "x ILIKE '%y'", + "presto": "LOWER(x) LIKE '%y'", + "snowflake": "x ILIKE '%y'", + "spark": "LOWER(x) LIKE '%y'", + "sqlite": "LOWER(x) LIKE '%y'", + "starrocks": "LOWER(x) LIKE '%y'", + "trino": "LOWER(x) LIKE '%y'", + }, + ) + self.validate_all( + "SELECT * FROM a ORDER BY col_a NULLS LAST", + write={ + "mysql": UnsupportedError, + "starrocks": UnsupportedError, + }, + ) + self.validate_all( + "STR_POSITION(x, 'a')", + write={ + "duckdb": "STRPOS(x, 'a')", + "presto": "STRPOS(x, 'a')", + "spark": "LOCATE('a', x)", + }, + ) + self.validate_all( + "CONCAT_WS('-', 'a', 'b')", + write={ + "duckdb": "CONCAT_WS('-', 'a', 'b')", + "presto": "ARRAY_JOIN(ARRAY['a', 'b'], '-')", + "hive": "CONCAT_WS('-', 'a', 'b')", + "spark": "CONCAT_WS('-', 'a', 'b')", + }, + ) + + self.validate_all( + "CONCAT_WS('-', x)", + write={ + "duckdb": "CONCAT_WS('-', x)", + "presto": "ARRAY_JOIN(x, '-')", + "hive": "CONCAT_WS('-', x)", + "spark": "CONCAT_WS('-', x)", + }, + ) + self.validate_all( + "IF(x > 1, 1, 0)", + write={ + "duckdb": "CASE WHEN x > 1 THEN 1 ELSE 0 END", + "presto": "IF(x > 1, 1, 0)", + "hive": "IF(x > 1, 1, 0)", + "spark": "IF(x > 1, 1, 0)", + "tableau": "IF x > 1 THEN 1 ELSE 0 END", + }, + ) + self.validate_all( + "CASE WHEN 1 THEN x ELSE 0 END", + write={ + "duckdb": "CASE WHEN 1 THEN x ELSE 0 END", + "presto": "CASE WHEN 1 THEN x ELSE 0 END", + "hive": "CASE WHEN 1 THEN x ELSE 0 END", + "spark": "CASE WHEN 1 THEN x ELSE 0 END", + "tableau": "CASE WHEN 1 THEN x ELSE 0 END", + }, + ) + self.validate_all( + "x[y]", + write={ + "duckdb": "x[y]", + "presto": "x[y]", + "hive": "x[y]", + "spark": "x[y]", + }, + ) + self.validate_all( + """'["x"]'""", + write={ + "duckdb": """'["x"]'""", + "presto": """'["x"]'""", + "hive": """'["x"]'""", + "spark": """'["x"]'""", + }, + ) + + self.validate_all( + 'true or null as "foo"', + write={ + "bigquery": "TRUE OR NULL AS `foo`", + "duckdb": 'TRUE OR NULL AS "foo"', + "presto": 'TRUE OR NULL AS "foo"', + "hive": "TRUE OR NULL AS `foo`", + "spark": "TRUE OR NULL AS `foo`", + }, + ) + self.validate_all( + "SELECT IF(COALESCE(bar, 0) = 1, TRUE, FALSE) as foo FROM baz", + write={ + "bigquery": "SELECT CASE WHEN COALESCE(bar, 0) = 1 THEN TRUE ELSE FALSE END AS foo FROM baz", + "duckdb": "SELECT CASE WHEN COALESCE(bar, 0) = 1 THEN TRUE ELSE FALSE END AS foo FROM baz", + "presto": "SELECT IF(COALESCE(bar, 0) = 1, TRUE, FALSE) AS foo FROM baz", + "hive": "SELECT IF(COALESCE(bar, 0) = 1, TRUE, FALSE) AS foo FROM baz", + "spark": "SELECT IF(COALESCE(bar, 0) = 1, TRUE, FALSE) AS foo FROM baz", + }, + ) + self.validate_all( + "LEVENSHTEIN(col1, col2)", + write={ + "duckdb": "LEVENSHTEIN(col1, col2)", + "presto": "LEVENSHTEIN_DISTANCE(col1, col2)", + "hive": "LEVENSHTEIN(col1, col2)", + "spark": "LEVENSHTEIN(col1, col2)", + }, + ) + self.validate_all( + "LEVENSHTEIN(coalesce(col1, col2), coalesce(col2, col1))", + write={ + "duckdb": "LEVENSHTEIN(COALESCE(col1, col2), COALESCE(col2, col1))", + "presto": "LEVENSHTEIN_DISTANCE(COALESCE(col1, col2), COALESCE(col2, col1))", + "hive": "LEVENSHTEIN(COALESCE(col1, col2), COALESCE(col2, col1))", + "spark": "LEVENSHTEIN(COALESCE(col1, col2), COALESCE(col2, col1))", + }, + ) + self.validate_all( + "ARRAY_FILTER(the_array, x -> x > 0)", + write={ + "presto": "FILTER(the_array, x -> x > 0)", + "hive": "FILTER(the_array, x -> x > 0)", + "spark": "FILTER(the_array, x -> x > 0)", + }, + ) + self.validate_all( + "SELECT a AS b FROM x GROUP BY b", + write={ + "duckdb": "SELECT a AS b FROM x GROUP BY b", + "presto": "SELECT a AS b FROM x GROUP BY 1", + "hive": "SELECT a AS b FROM x GROUP BY 1", + "oracle": "SELECT a AS b FROM x GROUP BY 1", + "spark": "SELECT a AS b FROM x GROUP BY 1", + }, + ) + self.validate_all( + "SELECT x FROM y LIMIT 10", + write={ + "sqlite": "SELECT x FROM y LIMIT 10", + "oracle": "SELECT x FROM y FETCH FIRST 10 ROWS ONLY", + }, + ) + self.validate_all( + "SELECT x FROM y LIMIT 10 OFFSET 5", + write={ + "sqlite": "SELECT x FROM y LIMIT 10 OFFSET 5", + "oracle": "SELECT x FROM y OFFSET 5 ROWS FETCH FIRST 10 ROWS ONLY", + }, + ) + self.validate_all( + "SELECT x FROM y OFFSET 10 FETCH FIRST 3 ROWS ONLY", + write={ + "oracle": "SELECT x FROM y OFFSET 10 ROWS FETCH FIRST 3 ROWS ONLY", + }, + ) + self.validate_all( + "SELECT x FROM y OFFSET 10 ROWS FETCH FIRST 3 ROWS ONLY", + write={ + "oracle": "SELECT x FROM y OFFSET 10 ROWS FETCH FIRST 3 ROWS ONLY", + }, + ) + self.validate_all( + '"x" + "y"', + read={ + "clickhouse": '`x` + "y"', + "sqlite": '`x` + "y"', + }, + ) + self.validate_all( + "[1, 2]", + write={ + "bigquery": "[1, 2]", + "clickhouse": "[1, 2]", + }, + ) + self.validate_all( + "SELECT * FROM VALUES ('x'), ('y') AS t(z)", + write={ + "spark": "SELECT * FROM (VALUES ('x'), ('y')) AS t(z)", + }, + ) + self.validate_all( + "CREATE TABLE t (c CHAR, nc NCHAR, v1 VARCHAR, v2 VARCHAR2, nv NVARCHAR, nv2 NVARCHAR2)", + write={ + "hive": "CREATE TABLE t (c CHAR, nc CHAR, v1 STRING, v2 STRING, nv STRING, nv2 STRING)", + "oracle": "CREATE TABLE t (c CHAR, nc CHAR, v1 VARCHAR2, v2 VARCHAR2, nv NVARCHAR2, nv2 NVARCHAR2)", + "postgres": "CREATE TABLE t (c CHAR, nc CHAR, v1 VARCHAR, v2 VARCHAR, nv VARCHAR, nv2 VARCHAR)", + "sqlite": "CREATE TABLE t (c TEXT, nc TEXT, v1 TEXT, v2 TEXT, nv TEXT, nv2 TEXT)", + }, + ) + self.validate_all( + "POWER(1.2, 3.4)", + read={ + "hive": "pow(1.2, 3.4)", + "postgres": "power(1.2, 3.4)", + }, + ) + self.validate_all( + "CREATE INDEX my_idx ON tbl (a, b)", + read={ + "hive": "CREATE INDEX my_idx ON TABLE tbl (a, b)", + "sqlite": "CREATE INDEX my_idx ON tbl (a, b)", + }, + write={ + "hive": "CREATE INDEX my_idx ON TABLE tbl (a, b)", + "postgres": "CREATE INDEX my_idx ON tbl (a, b)", + "sqlite": "CREATE INDEX my_idx ON tbl (a, b)", + }, + ) + self.validate_all( + "CREATE UNIQUE INDEX my_idx ON tbl (a, b)", + read={ + "hive": "CREATE UNIQUE INDEX my_idx ON TABLE tbl (a, b)", + "sqlite": "CREATE UNIQUE INDEX my_idx ON tbl (a, b)", + }, + write={ + "hive": "CREATE UNIQUE INDEX my_idx ON TABLE tbl (a, b)", + "postgres": "CREATE UNIQUE INDEX my_idx ON tbl (a, b)", + "sqlite": "CREATE UNIQUE INDEX my_idx ON tbl (a, b)", + }, + ) + self.validate_all( + "CREATE TABLE t (b1 BINARY, b2 BINARY(1024), c1 TEXT, c2 TEXT(1024))", + write={ + "hive": "CREATE TABLE t (b1 BINARY, b2 BINARY(1024), c1 STRING, c2 STRING(1024))", + "oracle": "CREATE TABLE t (b1 BLOB, b2 BLOB(1024), c1 CLOB, c2 CLOB(1024))", + "postgres": "CREATE TABLE t (b1 BYTEA, b2 BYTEA(1024), c1 TEXT, c2 TEXT(1024))", + "sqlite": "CREATE TABLE t (b1 BLOB, b2 BLOB(1024), c1 TEXT, c2 TEXT(1024))", + }, + ) diff --git a/tests/dialects/test_duckdb.py b/tests/dialects/test_duckdb.py new file mode 100644 index 0000000..501301f --- /dev/null +++ b/tests/dialects/test_duckdb.py @@ -0,0 +1,249 @@ +from tests.dialects.test_dialect import Validator + + +class TestDuckDB(Validator): + dialect = "duckdb" + + def test_time(self): + self.validate_all( + "EPOCH(x)", + read={ + "presto": "TO_UNIXTIME(x)", + }, + write={ + "bigquery": "TIME_TO_UNIX(x)", + "duckdb": "EPOCH(x)", + "presto": "TO_UNIXTIME(x)", + "spark": "UNIX_TIMESTAMP(x)", + }, + ) + self.validate_all( + "EPOCH_MS(x)", + write={ + "bigquery": "UNIX_TO_TIME(x / 1000)", + "duckdb": "TO_TIMESTAMP(CAST(x / 1000 AS BIGINT))", + "presto": "FROM_UNIXTIME(x / 1000)", + "spark": "FROM_UNIXTIME(x / 1000)", + }, + ) + self.validate_all( + "STRFTIME(x, '%y-%-m-%S')", + write={ + "bigquery": "TIME_TO_STR(x, '%y-%-m-%S')", + "duckdb": "STRFTIME(x, '%y-%-m-%S')", + "postgres": "TO_CHAR(x, 'YY-FMMM-SS')", + "presto": "DATE_FORMAT(x, '%y-%c-%S')", + "spark": "DATE_FORMAT(x, 'yy-M-ss')", + }, + ) + self.validate_all( + "STRFTIME(x, '%Y-%m-%d %H:%M:%S')", + write={ + "duckdb": "STRFTIME(x, '%Y-%m-%d %H:%M:%S')", + "presto": "DATE_FORMAT(x, '%Y-%m-%d %H:%i:%S')", + "hive": "DATE_FORMAT(x, 'yyyy-MM-dd HH:mm:ss')", + }, + ) + self.validate_all( + "STRPTIME(x, '%y-%-m')", + write={ + "bigquery": "STR_TO_TIME(x, '%y-%-m')", + "duckdb": "STRPTIME(x, '%y-%-m')", + "presto": "DATE_PARSE(x, '%y-%c')", + "hive": "CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, 'yy-M')) AS TIMESTAMP)", + "spark": "TO_TIMESTAMP(x, 'yy-M')", + }, + ) + self.validate_all( + "TO_TIMESTAMP(x)", + write={ + "duckdb": "CAST(x AS TIMESTAMP)", + "presto": "DATE_PARSE(x, '%Y-%m-%d %H:%i:%s')", + "hive": "CAST(x AS TIMESTAMP)", + }, + ) + + def test_duckdb(self): + self.validate_all( + "LIST_VALUE(0, 1, 2)", + write={ + "bigquery": "[0, 1, 2]", + "duckdb": "LIST_VALUE(0, 1, 2)", + "presto": "ARRAY[0, 1, 2]", + "spark": "ARRAY(0, 1, 2)", + }, + ) + self.validate_all( + "REGEXP_MATCHES(x, y)", + write={ + "duckdb": "REGEXP_MATCHES(x, y)", + "presto": "REGEXP_LIKE(x, y)", + "hive": "x RLIKE y", + "spark": "x RLIKE y", + }, + ) + self.validate_all( + "STR_SPLIT(x, 'a')", + write={ + "duckdb": "STR_SPLIT(x, 'a')", + "presto": "SPLIT(x, 'a')", + "hive": "SPLIT(x, CONCAT('\\\\Q', 'a'))", + "spark": "SPLIT(x, CONCAT('\\\\Q', 'a'))", + }, + ) + self.validate_all( + "STRING_TO_ARRAY(x, 'a')", + write={ + "duckdb": "STR_SPLIT(x, 'a')", + "presto": "SPLIT(x, 'a')", + "hive": "SPLIT(x, CONCAT('\\\\Q', 'a'))", + "spark": "SPLIT(x, CONCAT('\\\\Q', 'a'))", + }, + ) + self.validate_all( + "STR_SPLIT_REGEX(x, 'a')", + write={ + "duckdb": "STR_SPLIT_REGEX(x, 'a')", + "presto": "REGEXP_SPLIT(x, 'a')", + "hive": "SPLIT(x, 'a')", + "spark": "SPLIT(x, 'a')", + }, + ) + self.validate_all( + "STRUCT_EXTRACT(x, 'abc')", + write={ + "duckdb": "STRUCT_EXTRACT(x, 'abc')", + "presto": 'x."abc"', + "hive": "x.`abc`", + "spark": "x.`abc`", + }, + ) + self.validate_all( + "STRUCT_EXTRACT(STRUCT_EXTRACT(x, 'y'), 'abc')", + write={ + "duckdb": "STRUCT_EXTRACT(STRUCT_EXTRACT(x, 'y'), 'abc')", + "presto": 'x."y"."abc"', + "hive": "x.`y`.`abc`", + "spark": "x.`y`.`abc`", + }, + ) + + self.validate_all( + "QUANTILE(x, 0.5)", + write={ + "duckdb": "QUANTILE(x, 0.5)", + "presto": "APPROX_PERCENTILE(x, 0.5)", + "hive": "PERCENTILE(x, 0.5)", + "spark": "PERCENTILE(x, 0.5)", + }, + ) + + self.validate_all( + "CAST(x AS DATE)", + write={ + "duckdb": "CAST(x AS DATE)", + "": "CAST(x AS DATE)", + }, + ) + self.validate_all( + "UNNEST(x)", + read={ + "spark": "EXPLODE(x)", + }, + write={ + "duckdb": "UNNEST(x)", + "spark": "EXPLODE(x)", + }, + ) + + self.validate_all( + "1d", + write={ + "duckdb": "1 AS d", + "spark": "1 AS d", + }, + ) + self.validate_all( + "CAST(1 AS DOUBLE)", + read={ + "hive": "1d", + "spark": "1d", + }, + ) + self.validate_all( + "POWER(CAST(2 AS SMALLINT), 3)", + read={ + "hive": "POW(2S, 3)", + "spark": "POW(2S, 3)", + }, + ) + self.validate_all( + "LIST_SUM(LIST_VALUE(1, 2))", + read={ + "spark": "ARRAY_SUM(ARRAY(1, 2))", + }, + ) + self.validate_all( + "IF(y <> 0, x / y, NULL)", + read={ + "bigquery": "SAFE_DIVIDE(x, y)", + }, + ) + self.validate_all( + "STRUCT_PACK(x := 1, y := '2')", + write={ + "duckdb": "STRUCT_PACK(x := 1, y := '2')", + "spark": "STRUCT(x = 1, y = '2')", + }, + ) + self.validate_all( + "ARRAY_SORT(x)", + write={ + "duckdb": "ARRAY_SORT(x)", + "presto": "ARRAY_SORT(x)", + "hive": "SORT_ARRAY(x)", + "spark": "SORT_ARRAY(x)", + }, + ) + self.validate_all( + "ARRAY_REVERSE_SORT(x)", + write={ + "duckdb": "ARRAY_REVERSE_SORT(x)", + "presto": "ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)", + "hive": "SORT_ARRAY(x, FALSE)", + "spark": "SORT_ARRAY(x, FALSE)", + }, + ) + self.validate_all( + "LIST_REVERSE_SORT(x)", + write={ + "duckdb": "ARRAY_REVERSE_SORT(x)", + "presto": "ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)", + "hive": "SORT_ARRAY(x, FALSE)", + "spark": "SORT_ARRAY(x, FALSE)", + }, + ) + self.validate_all( + "LIST_SORT(x)", + write={ + "duckdb": "ARRAY_SORT(x)", + "presto": "ARRAY_SORT(x)", + "hive": "SORT_ARRAY(x)", + "spark": "SORT_ARRAY(x)", + }, + ) + self.validate_all( + "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname", + write={ + "duckdb": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname", + }, + ) + self.validate_all( + "MONTH('2021-03-01')", + write={ + "duckdb": "MONTH('2021-03-01')", + "presto": "MONTH('2021-03-01')", + "hive": "MONTH('2021-03-01')", + "spark": "MONTH('2021-03-01')", + }, + ) diff --git a/tests/dialects/test_hive.py b/tests/dialects/test_hive.py new file mode 100644 index 0000000..eccd75a --- /dev/null +++ b/tests/dialects/test_hive.py @@ -0,0 +1,541 @@ +from tests.dialects.test_dialect import Validator + + +class TestHive(Validator): + dialect = "hive" + + def test_bits(self): + self.validate_all( + "x & 1", + write={ + "duckdb": "x & 1", + "presto": "BITWISE_AND(x, 1)", + "hive": "x & 1", + "spark": "x & 1", + }, + ) + self.validate_all( + "~x", + write={ + "duckdb": "~x", + "presto": "BITWISE_NOT(x)", + "hive": "~x", + "spark": "~x", + }, + ) + self.validate_all( + "x | 1", + write={ + "duckdb": "x | 1", + "presto": "BITWISE_OR(x, 1)", + "hive": "x | 1", + "spark": "x | 1", + }, + ) + self.validate_all( + "x << 1", + read={ + "spark": "SHIFTLEFT(x, 1)", + }, + write={ + "duckdb": "x << 1", + "presto": "BITWISE_ARITHMETIC_SHIFT_LEFT(x, 1)", + "hive": "x << 1", + "spark": "SHIFTLEFT(x, 1)", + }, + ) + self.validate_all( + "x >> 1", + read={ + "spark": "SHIFTRIGHT(x, 1)", + }, + write={ + "duckdb": "x >> 1", + "presto": "BITWISE_ARITHMETIC_SHIFT_RIGHT(x, 1)", + "hive": "x >> 1", + "spark": "SHIFTRIGHT(x, 1)", + }, + ) + self.validate_all( + "x & 1 > 0", + write={ + "duckdb": "x & 1 > 0", + "presto": "BITWISE_AND(x, 1) > 0", + "hive": "x & 1 > 0", + "spark": "x & 1 > 0", + }, + ) + + def test_cast(self): + self.validate_all( + "1s", + write={ + "duckdb": "CAST(1 AS SMALLINT)", + "presto": "CAST(1 AS SMALLINT)", + "hive": "CAST(1 AS SMALLINT)", + "spark": "CAST(1 AS SHORT)", + }, + ) + self.validate_all( + "1S", + write={ + "duckdb": "CAST(1 AS SMALLINT)", + "presto": "CAST(1 AS SMALLINT)", + "hive": "CAST(1 AS SMALLINT)", + "spark": "CAST(1 AS SHORT)", + }, + ) + self.validate_all( + "1Y", + write={ + "duckdb": "CAST(1 AS TINYINT)", + "presto": "CAST(1 AS TINYINT)", + "hive": "CAST(1 AS TINYINT)", + "spark": "CAST(1 AS BYTE)", + }, + ) + self.validate_all( + "1L", + write={ + "duckdb": "CAST(1 AS BIGINT)", + "presto": "CAST(1 AS BIGINT)", + "hive": "CAST(1 AS BIGINT)", + "spark": "CAST(1 AS LONG)", + }, + ) + self.validate_all( + "1.0bd", + write={ + "duckdb": "CAST(1.0 AS DECIMAL)", + "presto": "CAST(1.0 AS DECIMAL)", + "hive": "CAST(1.0 AS DECIMAL)", + "spark": "CAST(1.0 AS DECIMAL)", + }, + ) + self.validate_all( + "CAST(1 AS INT)", + read={ + "presto": "TRY_CAST(1 AS INT)", + }, + write={ + "duckdb": "TRY_CAST(1 AS INT)", + "presto": "TRY_CAST(1 AS INTEGER)", + "hive": "CAST(1 AS INT)", + "spark": "CAST(1 AS INT)", + }, + ) + + def test_ddl(self): + self.validate_all( + "CREATE TABLE test STORED AS parquet TBLPROPERTIES ('x' = '1', 'Z' = '2') AS SELECT 1", + write={ + "presto": "CREATE TABLE test WITH (FORMAT = 'parquet', x = '1', Z = '2') AS SELECT 1", + "hive": "CREATE TABLE test STORED AS PARQUET TBLPROPERTIES ('x' = '1', 'Z' = '2') AS SELECT 1", + "spark": "CREATE TABLE test STORED AS PARQUET TBLPROPERTIES ('x' = '1', 'Z' = '2') AS SELECT 1", + }, + ) + self.validate_all( + "CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)", + write={ + "presto": "CREATE TABLE x (w VARCHAR, y INTEGER, z INTEGER) WITH (PARTITIONED_BY = ARRAY['y', 'z'])", + "hive": "CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)", + "spark": "CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)", + }, + ) + + def test_lateral_view(self): + self.validate_all( + "SELECT a, b FROM x LATERAL VIEW EXPLODE(y) t AS a LATERAL VIEW EXPLODE(z) u AS b", + write={ + "presto": "SELECT a, b FROM x CROSS JOIN UNNEST(y) AS t(a) CROSS JOIN UNNEST(z) AS u(b)", + "hive": "SELECT a, b FROM x LATERAL VIEW EXPLODE(y) t AS a LATERAL VIEW EXPLODE(z) u AS b", + "spark": "SELECT a, b FROM x LATERAL VIEW EXPLODE(y) t AS a LATERAL VIEW EXPLODE(z) u AS b", + }, + ) + self.validate_all( + "SELECT a FROM x LATERAL VIEW EXPLODE(y) t AS a", + write={ + "presto": "SELECT a FROM x CROSS JOIN UNNEST(y) AS t(a)", + "hive": "SELECT a FROM x LATERAL VIEW EXPLODE(y) t AS a", + "spark": "SELECT a FROM x LATERAL VIEW EXPLODE(y) t AS a", + }, + ) + self.validate_all( + "SELECT a FROM x LATERAL VIEW POSEXPLODE(y) t AS a", + write={ + "presto": "SELECT a FROM x CROSS JOIN UNNEST(y) WITH ORDINALITY AS t(a)", + "hive": "SELECT a FROM x LATERAL VIEW POSEXPLODE(y) t AS a", + "spark": "SELECT a FROM x LATERAL VIEW POSEXPLODE(y) t AS a", + }, + ) + self.validate_all( + "SELECT a FROM x LATERAL VIEW EXPLODE(ARRAY(y)) t AS a", + write={ + "presto": "SELECT a FROM x CROSS JOIN UNNEST(ARRAY[y]) AS t(a)", + "hive": "SELECT a FROM x LATERAL VIEW EXPLODE(ARRAY(y)) t AS a", + "spark": "SELECT a FROM x LATERAL VIEW EXPLODE(ARRAY(y)) t AS a", + }, + ) + + def test_quotes(self): + self.validate_all( + "'\\''", + write={ + "duckdb": "''''", + "presto": "''''", + "hive": "'\\''", + "spark": "'\\''", + }, + ) + self.validate_all( + "'\"x\"'", + write={ + "duckdb": "'\"x\"'", + "presto": "'\"x\"'", + "hive": "'\"x\"'", + "spark": "'\"x\"'", + }, + ) + self.validate_all( + "\"'x'\"", + write={ + "duckdb": "'''x'''", + "presto": "'''x'''", + "hive": "'\\'x\\''", + "spark": "'\\'x\\''", + }, + ) + self.validate_all( + "'\\\\a'", + read={ + "presto": "'\\a'", + }, + write={ + "duckdb": "'\\a'", + "presto": "'\\a'", + "hive": "'\\\\a'", + "spark": "'\\\\a'", + }, + ) + + def test_regex(self): + self.validate_all( + "a RLIKE 'x'", + write={ + "duckdb": "REGEXP_MATCHES(a, 'x')", + "presto": "REGEXP_LIKE(a, 'x')", + "hive": "a RLIKE 'x'", + "spark": "a RLIKE 'x'", + }, + ) + + self.validate_all( + "a REGEXP 'x'", + write={ + "duckdb": "REGEXP_MATCHES(a, 'x')", + "presto": "REGEXP_LIKE(a, 'x')", + "hive": "a RLIKE 'x'", + "spark": "a RLIKE 'x'", + }, + ) + + def test_time(self): + self.validate_all( + "DATEDIFF(a, b)", + write={ + "duckdb": "DATE_DIFF('day', CAST(b AS DATE), CAST(a AS DATE))", + "presto": "DATE_DIFF('day', CAST(SUBSTR(CAST(b AS VARCHAR), 1, 10) AS DATE), CAST(SUBSTR(CAST(a AS VARCHAR), 1, 10) AS DATE))", + "hive": "DATEDIFF(TO_DATE(a), TO_DATE(b))", + "spark": "DATEDIFF(TO_DATE(a), TO_DATE(b))", + "": "DATE_DIFF(TS_OR_DS_TO_DATE(a), TS_OR_DS_TO_DATE(b))", + }, + ) + self.validate_all( + """from_unixtime(x, "yyyy-MM-dd'T'HH")""", + write={ + "duckdb": "STRFTIME(TO_TIMESTAMP(CAST(x AS BIGINT)), '%Y-%m-%d''T''%H')", + "presto": "DATE_FORMAT(FROM_UNIXTIME(x), '%Y-%m-%d''T''%H')", + "hive": "FROM_UNIXTIME(x, 'yyyy-MM-dd\\'T\\'HH')", + "spark": "FROM_UNIXTIME(x, 'yyyy-MM-dd\\'T\\'HH')", + }, + ) + self.validate_all( + "DATE_FORMAT('2020-01-01', 'yyyy-MM-dd HH:mm:ss')", + write={ + "duckdb": "STRFTIME('2020-01-01', '%Y-%m-%d %H:%M:%S')", + "presto": "DATE_FORMAT('2020-01-01', '%Y-%m-%d %H:%i:%S')", + "hive": "DATE_FORMAT('2020-01-01', 'yyyy-MM-dd HH:mm:ss')", + "spark": "DATE_FORMAT('2020-01-01', 'yyyy-MM-dd HH:mm:ss')", + }, + ) + self.validate_all( + "DATE_ADD('2020-01-01', 1)", + write={ + "duckdb": "CAST('2020-01-01' AS DATE) + INTERVAL 1 DAY", + "presto": "DATE_ADD('DAY', 1, DATE_PARSE(SUBSTR('2020-01-01', 1, 10), '%Y-%m-%d'))", + "hive": "DATE_ADD('2020-01-01', 1)", + "spark": "DATE_ADD('2020-01-01', 1)", + "": "TS_OR_DS_ADD('2020-01-01', 1, 'DAY')", + }, + ) + self.validate_all( + "DATE_SUB('2020-01-01', 1)", + write={ + "duckdb": "CAST('2020-01-01' AS DATE) + INTERVAL 1 * -1 DAY", + "presto": "DATE_ADD('DAY', 1 * -1, DATE_PARSE(SUBSTR('2020-01-01', 1, 10), '%Y-%m-%d'))", + "hive": "DATE_ADD('2020-01-01', 1 * -1)", + "spark": "DATE_ADD('2020-01-01', 1 * -1)", + "": "TS_OR_DS_ADD('2020-01-01', 1 * -1, 'DAY')", + }, + ) + self.validate_all( + "DATEDIFF(TO_DATE(y), x)", + write={ + "duckdb": "DATE_DIFF('day', CAST(x AS DATE), CAST(CAST(y AS DATE) AS DATE))", + "presto": "DATE_DIFF('day', CAST(SUBSTR(CAST(x AS VARCHAR), 1, 10) AS DATE), CAST(SUBSTR(CAST(CAST(SUBSTR(CAST(y AS VARCHAR), 1, 10) AS DATE) AS VARCHAR), 1, 10) AS DATE))", + "hive": "DATEDIFF(TO_DATE(TO_DATE(y)), TO_DATE(x))", + "spark": "DATEDIFF(TO_DATE(TO_DATE(y)), TO_DATE(x))", + "": "DATE_DIFF(TS_OR_DS_TO_DATE(TS_OR_DS_TO_DATE(y)), TS_OR_DS_TO_DATE(x))", + }, + ) + self.validate_all( + "UNIX_TIMESTAMP(x)", + write={ + "duckdb": "EPOCH(STRPTIME(x, '%Y-%m-%d %H:%M:%S'))", + "presto": "TO_UNIXTIME(DATE_PARSE(x, '%Y-%m-%d %H:%i:%S'))", + "hive": "UNIX_TIMESTAMP(x)", + "spark": "UNIX_TIMESTAMP(x)", + "": "STR_TO_UNIX(x, '%Y-%m-%d %H:%M:%S')", + }, + ) + + for unit in ("DAY", "MONTH", "YEAR"): + self.validate_all( + f"{unit}(x)", + write={ + "duckdb": f"{unit}(CAST(x AS DATE))", + "presto": f"{unit}(CAST(SUBSTR(CAST(x AS VARCHAR), 1, 10) AS DATE))", + "hive": f"{unit}(TO_DATE(x))", + "spark": f"{unit}(TO_DATE(x))", + }, + ) + + def test_order_by(self): + self.validate_all( + "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname", + write={ + "duckdb": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname", + "presto": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname, lname NULLS FIRST", + "hive": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname", + "spark": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname", + }, + ) + + def test_hive(self): + self.validate_all( + "PERCENTILE(x, 0.5)", + write={ + "duckdb": "QUANTILE(x, 0.5)", + "presto": "APPROX_PERCENTILE(x, 0.5)", + "hive": "PERCENTILE(x, 0.5)", + "spark": "PERCENTILE(x, 0.5)", + }, + ) + self.validate_all( + "APPROX_COUNT_DISTINCT(a)", + write={ + "duckdb": "APPROX_COUNT_DISTINCT(a)", + "presto": "APPROX_DISTINCT(a)", + "hive": "APPROX_COUNT_DISTINCT(a)", + "spark": "APPROX_COUNT_DISTINCT(a)", + }, + ) + self.validate_all( + "ARRAY_CONTAINS(x, 1)", + write={ + "duckdb": "ARRAY_CONTAINS(x, 1)", + "presto": "CONTAINS(x, 1)", + "hive": "ARRAY_CONTAINS(x, 1)", + "spark": "ARRAY_CONTAINS(x, 1)", + }, + ) + self.validate_all( + "SIZE(x)", + write={ + "duckdb": "ARRAY_LENGTH(x)", + "presto": "CARDINALITY(x)", + "hive": "SIZE(x)", + "spark": "SIZE(x)", + }, + ) + self.validate_all( + "LOCATE('a', x)", + write={ + "duckdb": "STRPOS(x, 'a')", + "presto": "STRPOS(x, 'a')", + "hive": "LOCATE('a', x)", + "spark": "LOCATE('a', x)", + }, + ) + self.validate_all( + "LOCATE('a', x, 3)", + write={ + "duckdb": "STRPOS(SUBSTR(x, 3), 'a') + 3 - 1", + "presto": "STRPOS(SUBSTR(x, 3), 'a') + 3 - 1", + "hive": "LOCATE('a', x, 3)", + "spark": "LOCATE('a', x, 3)", + }, + ) + self.validate_all( + "INITCAP('new york')", + write={ + "duckdb": "INITCAP('new york')", + "presto": "REGEXP_REPLACE('new york', '(\w)(\w*)', x -> UPPER(x[1]) || LOWER(x[2]))", + "hive": "INITCAP('new york')", + "spark": "INITCAP('new york')", + }, + ) + self.validate_all( + "SELECT * FROM x TABLESAMPLE(10) y", + write={ + "presto": "SELECT * FROM x AS y TABLESAMPLE(10)", + "hive": "SELECT * FROM x TABLESAMPLE(10) AS y", + "spark": "SELECT * FROM x TABLESAMPLE(10) AS y", + }, + ) + self.validate_all( + "SELECT SORT_ARRAY(x)", + write={ + "duckdb": "SELECT ARRAY_SORT(x)", + "presto": "SELECT ARRAY_SORT(x)", + "hive": "SELECT SORT_ARRAY(x)", + "spark": "SELECT SORT_ARRAY(x)", + }, + ) + self.validate_all( + "SELECT SORT_ARRAY(x, FALSE)", + read={ + "duckdb": "SELECT ARRAY_REVERSE_SORT(x)", + "spark": "SELECT SORT_ARRAY(x, FALSE)", + }, + write={ + "duckdb": "SELECT ARRAY_REVERSE_SORT(x)", + "presto": "SELECT ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)", + "hive": "SELECT SORT_ARRAY(x, FALSE)", + "spark": "SELECT SORT_ARRAY(x, FALSE)", + }, + ) + self.validate_all( + "GET_JSON_OBJECT(x, '$.name')", + write={ + "presto": "JSON_EXTRACT_SCALAR(x, '$.name')", + "hive": "GET_JSON_OBJECT(x, '$.name')", + "spark": "GET_JSON_OBJECT(x, '$.name')", + }, + ) + self.validate_all( + "MAP(a, b, c, d)", + write={ + "duckdb": "MAP(LIST_VALUE(a, c), LIST_VALUE(b, d))", + "presto": "MAP(ARRAY[a, c], ARRAY[b, d])", + "hive": "MAP(a, b, c, d)", + "spark": "MAP_FROM_ARRAYS(ARRAY(a, c), ARRAY(b, d))", + }, + ) + self.validate_all( + "MAP(a, b)", + write={ + "duckdb": "MAP(LIST_VALUE(a), LIST_VALUE(b))", + "presto": "MAP(ARRAY[a], ARRAY[b])", + "hive": "MAP(a, b)", + "spark": "MAP_FROM_ARRAYS(ARRAY(a), ARRAY(b))", + }, + ) + self.validate_all( + "LOG(10)", + write={ + "duckdb": "LN(10)", + "presto": "LN(10)", + "hive": "LN(10)", + "spark": "LN(10)", + }, + ) + self.validate_all( + "LOG(10, 2)", + write={ + "duckdb": "LOG(10, 2)", + "presto": "LOG(10, 2)", + "hive": "LOG(10, 2)", + "spark": "LOG(10, 2)", + }, + ) + self.validate_all( + 'ds = "2020-01-01"', + write={ + "duckdb": "ds = '2020-01-01'", + "presto": "ds = '2020-01-01'", + "hive": "ds = '2020-01-01'", + "spark": "ds = '2020-01-01'", + }, + ) + self.validate_all( + "ds = \"1''2\"", + write={ + "duckdb": "ds = '1''''2'", + "presto": "ds = '1''''2'", + "hive": "ds = '1\\'\\'2'", + "spark": "ds = '1\\'\\'2'", + }, + ) + self.validate_all( + "x == 1", + write={ + "duckdb": "x = 1", + "presto": "x = 1", + "hive": "x = 1", + "spark": "x = 1", + }, + ) + self.validate_all( + "x div y", + write={ + "duckdb": "CAST(x / y AS INT)", + "presto": "CAST(x / y AS INTEGER)", + "hive": "CAST(x / y AS INT)", + "spark": "CAST(x / y AS INT)", + }, + ) + self.validate_all( + "COLLECT_LIST(x)", + read={ + "presto": "ARRAY_AGG(x)", + }, + write={ + "duckdb": "ARRAY_AGG(x)", + "presto": "ARRAY_AGG(x)", + "hive": "COLLECT_LIST(x)", + "spark": "COLLECT_LIST(x)", + }, + ) + self.validate_all( + "COLLECT_SET(x)", + read={ + "presto": "SET_AGG(x)", + }, + write={ + "presto": "SET_AGG(x)", + "hive": "COLLECT_SET(x)", + "spark": "COLLECT_SET(x)", + }, + ) + self.validate_all( + "SELECT * FROM x TABLESAMPLE(1) AS foo", + read={ + "presto": "SELECT * FROM x AS foo TABLESAMPLE(1)", + }, + write={ + "presto": "SELECT * FROM x AS foo TABLESAMPLE(1)", + "hive": "SELECT * FROM x TABLESAMPLE(1) AS foo", + "spark": "SELECT * FROM x TABLESAMPLE(1) AS foo", + }, + ) diff --git a/tests/dialects/test_mysql.py b/tests/dialects/test_mysql.py new file mode 100644 index 0000000..ee0c5f5 --- /dev/null +++ b/tests/dialects/test_mysql.py @@ -0,0 +1,79 @@ +from tests.dialects.test_dialect import Validator + + +class TestMySQL(Validator): + dialect = "mysql" + + def test_ddl(self): + self.validate_all( + "CREATE TABLE z (a INT) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x'", + write={ + "mysql": "CREATE TABLE z (a INT) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x'", + "spark": "CREATE TABLE z (a INT) COMMENT 'x'", + }, + ) + + def test_identity(self): + self.validate_identity("SELECT CAST(`a`.`b` AS INT) FROM foo") + + def test_introducers(self): + self.validate_all( + "_utf8mb4 'hola'", + read={ + "mysql": "_utf8mb4'hola'", + }, + write={ + "mysql": "_utf8mb4 'hola'", + }, + ) + + def test_binary_literal(self): + self.validate_all( + "SELECT 0xCC", + write={ + "mysql": "SELECT b'11001100'", + "spark": "SELECT X'11001100'", + }, + ) + self.validate_all( + "SELECT 0xz", + write={ + "mysql": "SELECT `0xz`", + }, + ) + self.validate_all( + "SELECT 0XCC", + write={ + "mysql": "SELECT 0 AS XCC", + }, + ) + + def test_string_literals(self): + self.validate_all( + 'SELECT "2021-01-01" + INTERVAL 1 MONTH', + write={ + "mysql": "SELECT '2021-01-01' + INTERVAL 1 MONTH", + }, + ) + + def test_convert(self): + self.validate_all( + "CONVERT(x USING latin1)", + write={ + "mysql": "CAST(x AS CHAR CHARACTER SET latin1)", + }, + ) + self.validate_all( + "CAST(x AS CHAR CHARACTER SET latin1)", + write={ + "mysql": "CAST(x AS CHAR CHARACTER SET latin1)", + }, + ) + + def test_hash_comments(self): + self.validate_all( + "SELECT 1 # arbitrary content,,, until end-of-line", + write={ + "mysql": "SELECT 1", + }, + ) diff --git a/tests/dialects/test_postgres.py b/tests/dialects/test_postgres.py new file mode 100644 index 0000000..15dbfd0 --- /dev/null +++ b/tests/dialects/test_postgres.py @@ -0,0 +1,93 @@ +from sqlglot import ParseError, transpile +from tests.dialects.test_dialect import Validator + + +class TestPostgres(Validator): + dialect = "postgres" + + def test_ddl(self): + self.validate_all( + "CREATE TABLE products (product_no INT UNIQUE, name TEXT, price DECIMAL)", + write={ + "postgres": "CREATE TABLE products (product_no INT UNIQUE, name TEXT, price DECIMAL)" + }, + ) + self.validate_all( + "CREATE TABLE products (product_no INT CONSTRAINT must_be_different UNIQUE, name TEXT CONSTRAINT present NOT NULL, price DECIMAL)", + write={ + "postgres": "CREATE TABLE products (product_no INT CONSTRAINT must_be_different UNIQUE, name TEXT CONSTRAINT present NOT NULL, price DECIMAL)" + }, + ) + self.validate_all( + "CREATE TABLE products (product_no INT, name TEXT, price DECIMAL, UNIQUE (product_no, name))", + write={ + "postgres": "CREATE TABLE products (product_no INT, name TEXT, price DECIMAL, UNIQUE (product_no, name))" + }, + ) + self.validate_all( + "CREATE TABLE products (" + "product_no INT UNIQUE," + " name TEXT," + " price DECIMAL CHECK (price > 0)," + " discounted_price DECIMAL CONSTRAINT positive_discount CHECK (discounted_price > 0)," + " CHECK (product_no > 1)," + " CONSTRAINT valid_discount CHECK (price > discounted_price))", + write={ + "postgres": "CREATE TABLE products (" + "product_no INT UNIQUE," + " name TEXT," + " price DECIMAL CHECK (price > 0)," + " discounted_price DECIMAL CONSTRAINT positive_discount CHECK (discounted_price > 0)," + " CHECK (product_no > 1)," + " CONSTRAINT valid_discount CHECK (price > discounted_price))" + }, + ) + + with self.assertRaises(ParseError): + transpile( + "CREATE TABLE products (price DECIMAL CHECK price > 0)", read="postgres" + ) + with self.assertRaises(ParseError): + transpile( + "CREATE TABLE products (price DECIMAL, CHECK price > 1)", + read="postgres", + ) + + def test_postgres(self): + self.validate_all( + "CREATE TABLE x (a INT SERIAL)", + read={"sqlite": "CREATE TABLE x (a INTEGER AUTOINCREMENT)"}, + write={"sqlite": "CREATE TABLE x (a INTEGER AUTOINCREMENT)"}, + ) + self.validate_all( + "CREATE TABLE x (a UUID, b BYTEA)", + write={ + "presto": "CREATE TABLE x (a UUID, b VARBINARY)", + "hive": "CREATE TABLE x (a UUID, b BINARY)", + "spark": "CREATE TABLE x (a UUID, b BINARY)", + }, + ) + self.validate_all( + "SELECT SUM(x) OVER (PARTITION BY a ORDER BY d ROWS 1 PRECEDING)", + write={ + "postgres": "SELECT SUM(x) OVER (PARTITION BY a ORDER BY d ROWS BETWEEN 1 PRECEDING AND CURRENT ROW)", + }, + ) + self.validate_all( + "SELECT * FROM x FETCH 1 ROW", + write={ + "postgres": "SELECT * FROM x FETCH FIRST 1 ROWS ONLY", + "presto": "SELECT * FROM x FETCH FIRST 1 ROWS ONLY", + "hive": "SELECT * FROM x FETCH FIRST 1 ROWS ONLY", + "spark": "SELECT * FROM x FETCH FIRST 1 ROWS ONLY", + }, + ) + self.validate_all( + "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname", + write={ + "postgres": "SELECT fname, lname, age FROM person ORDER BY age DESC, fname, lname", + "presto": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname, lname", + "hive": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname NULLS LAST", + "spark": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname NULLS LAST", + }, + ) diff --git a/tests/dialects/test_presto.py b/tests/dialects/test_presto.py new file mode 100644 index 0000000..eb9aa5c --- /dev/null +++ b/tests/dialects/test_presto.py @@ -0,0 +1,422 @@ +from sqlglot import UnsupportedError +from tests.dialects.test_dialect import Validator + + +class TestPresto(Validator): + dialect = "presto" + + def test_cast(self): + self.validate_all( + "CAST(a AS ARRAY(INT))", + write={ + "bigquery": "CAST(a AS ARRAY<INT64>)", + "duckdb": "CAST(a AS ARRAY<INT>)", + "presto": "CAST(a AS ARRAY(INTEGER))", + "spark": "CAST(a AS ARRAY<INT>)", + }, + ) + self.validate_all( + "CAST(a AS VARCHAR)", + write={ + "bigquery": "CAST(a AS STRING)", + "duckdb": "CAST(a AS TEXT)", + "presto": "CAST(a AS VARCHAR)", + "spark": "CAST(a AS STRING)", + }, + ) + self.validate_all( + "CAST(ARRAY[1, 2] AS ARRAY(BIGINT))", + write={ + "bigquery": "CAST([1, 2] AS ARRAY<INT64>)", + "duckdb": "CAST(LIST_VALUE(1, 2) AS ARRAY<BIGINT>)", + "presto": "CAST(ARRAY[1, 2] AS ARRAY(BIGINT))", + "spark": "CAST(ARRAY(1, 2) AS ARRAY<LONG>)", + }, + ) + self.validate_all( + "CAST(MAP(ARRAY[1], ARRAY[1]) AS MAP(INT,INT))", + write={ + "bigquery": "CAST(MAP([1], [1]) AS MAP<INT64, INT64>)", + "duckdb": "CAST(MAP(LIST_VALUE(1), LIST_VALUE(1)) AS MAP<INT, INT>)", + "presto": "CAST(MAP(ARRAY[1], ARRAY[1]) AS MAP(INTEGER, INTEGER))", + "hive": "CAST(MAP(1, 1) AS MAP<INT, INT>)", + "spark": "CAST(MAP_FROM_ARRAYS(ARRAY(1), ARRAY(1)) AS MAP<INT, INT>)", + }, + ) + self.validate_all( + "CAST(MAP(ARRAY['a','b','c'], ARRAY[ARRAY[1], ARRAY[2], ARRAY[3]]) AS MAP(VARCHAR, ARRAY(INT)))", + write={ + "bigquery": "CAST(MAP(['a', 'b', 'c'], [[1], [2], [3]]) AS MAP<STRING, ARRAY<INT64>>)", + "duckdb": "CAST(MAP(LIST_VALUE('a', 'b', 'c'), LIST_VALUE(LIST_VALUE(1), LIST_VALUE(2), LIST_VALUE(3))) AS MAP<TEXT, ARRAY<INT>>)", + "presto": "CAST(MAP(ARRAY['a', 'b', 'c'], ARRAY[ARRAY[1], ARRAY[2], ARRAY[3]]) AS MAP(VARCHAR, ARRAY(INTEGER)))", + "hive": "CAST(MAP('a', ARRAY(1), 'b', ARRAY(2), 'c', ARRAY(3)) AS MAP<STRING, ARRAY<INT>>)", + "spark": "CAST(MAP_FROM_ARRAYS(ARRAY('a', 'b', 'c'), ARRAY(ARRAY(1), ARRAY(2), ARRAY(3))) AS MAP<STRING, ARRAY<INT>>)", + }, + ) + self.validate_all( + "CAST(x AS TIMESTAMP(9) WITH TIME ZONE)", + write={ + "bigquery": "CAST(x AS TIMESTAMPTZ(9))", + "duckdb": "CAST(x AS TIMESTAMPTZ(9))", + "presto": "CAST(x AS TIMESTAMP(9) WITH TIME ZONE)", + "hive": "CAST(x AS TIMESTAMPTZ(9))", + "spark": "CAST(x AS TIMESTAMPTZ(9))", + }, + ) + + def test_regex(self): + self.validate_all( + "REGEXP_LIKE(a, 'x')", + write={ + "duckdb": "REGEXP_MATCHES(a, 'x')", + "presto": "REGEXP_LIKE(a, 'x')", + "hive": "a RLIKE 'x'", + "spark": "a RLIKE 'x'", + }, + ) + self.validate_all( + "SPLIT(x, 'a.')", + write={ + "duckdb": "STR_SPLIT(x, 'a.')", + "presto": "SPLIT(x, 'a.')", + "hive": "SPLIT(x, CONCAT('\\\\Q', 'a.'))", + "spark": "SPLIT(x, CONCAT('\\\\Q', 'a.'))", + }, + ) + self.validate_all( + "REGEXP_SPLIT(x, 'a.')", + write={ + "duckdb": "STR_SPLIT_REGEX(x, 'a.')", + "presto": "REGEXP_SPLIT(x, 'a.')", + "hive": "SPLIT(x, 'a.')", + "spark": "SPLIT(x, 'a.')", + }, + ) + self.validate_all( + "CARDINALITY(x)", + write={ + "duckdb": "ARRAY_LENGTH(x)", + "presto": "CARDINALITY(x)", + "hive": "SIZE(x)", + "spark": "SIZE(x)", + }, + ) + + def test_time(self): + self.validate_all( + "DATE_FORMAT(x, '%Y-%m-%d %H:%i:%S')", + write={ + "duckdb": "STRFTIME(x, '%Y-%m-%d %H:%M:%S')", + "presto": "DATE_FORMAT(x, '%Y-%m-%d %H:%i:%S')", + "hive": "DATE_FORMAT(x, 'yyyy-MM-dd HH:mm:ss')", + "spark": "DATE_FORMAT(x, 'yyyy-MM-dd HH:mm:ss')", + }, + ) + self.validate_all( + "DATE_PARSE(x, '%Y-%m-%d %H:%i:%S')", + write={ + "duckdb": "STRPTIME(x, '%Y-%m-%d %H:%M:%S')", + "presto": "DATE_PARSE(x, '%Y-%m-%d %H:%i:%S')", + "hive": "CAST(x AS TIMESTAMP)", + "spark": "TO_TIMESTAMP(x, 'yyyy-MM-dd HH:mm:ss')", + }, + ) + self.validate_all( + "DATE_PARSE(x, '%Y-%m-%d')", + write={ + "duckdb": "STRPTIME(x, '%Y-%m-%d')", + "presto": "DATE_PARSE(x, '%Y-%m-%d')", + "hive": "CAST(x AS TIMESTAMP)", + "spark": "TO_TIMESTAMP(x, 'yyyy-MM-dd')", + }, + ) + self.validate_all( + "DATE_PARSE(SUBSTR(x, 1, 10), '%Y-%m-%d')", + write={ + "duckdb": "STRPTIME(SUBSTR(x, 1, 10), '%Y-%m-%d')", + "presto": "DATE_PARSE(SUBSTR(x, 1, 10), '%Y-%m-%d')", + "hive": "CAST(SUBSTR(x, 1, 10) AS TIMESTAMP)", + "spark": "TO_TIMESTAMP(SUBSTR(x, 1, 10), 'yyyy-MM-dd')", + }, + ) + self.validate_all( + "FROM_UNIXTIME(x)", + write={ + "duckdb": "TO_TIMESTAMP(CAST(x AS BIGINT))", + "presto": "FROM_UNIXTIME(x)", + "hive": "FROM_UNIXTIME(x)", + "spark": "FROM_UNIXTIME(x)", + }, + ) + self.validate_all( + "TO_UNIXTIME(x)", + write={ + "duckdb": "EPOCH(x)", + "presto": "TO_UNIXTIME(x)", + "hive": "UNIX_TIMESTAMP(x)", + "spark": "UNIX_TIMESTAMP(x)", + }, + ) + self.validate_all( + "DATE_ADD('day', 1, x)", + write={ + "duckdb": "x + INTERVAL 1 day", + "presto": "DATE_ADD('day', 1, x)", + "hive": "DATE_ADD(x, 1)", + "spark": "DATE_ADD(x, 1)", + }, + ) + + def test_ddl(self): + self.validate_all( + "CREATE TABLE test WITH (FORMAT = 'PARQUET') AS SELECT 1", + write={ + "presto": "CREATE TABLE test WITH (FORMAT = 'PARQUET') AS SELECT 1", + "hive": "CREATE TABLE test STORED AS PARQUET AS SELECT 1", + "spark": "CREATE TABLE test STORED AS PARQUET AS SELECT 1", + }, + ) + self.validate_all( + "CREATE TABLE test WITH (FORMAT = 'PARQUET', X = '1', Z = '2') AS SELECT 1", + write={ + "presto": "CREATE TABLE test WITH (FORMAT = 'PARQUET', X = '1', Z = '2') AS SELECT 1", + "hive": "CREATE TABLE test STORED AS PARQUET TBLPROPERTIES ('X' = '1', 'Z' = '2') AS SELECT 1", + "spark": "CREATE TABLE test STORED AS PARQUET TBLPROPERTIES ('X' = '1', 'Z' = '2') AS SELECT 1", + }, + ) + self.validate_all( + "CREATE TABLE x (w VARCHAR, y INTEGER, z INTEGER) WITH (PARTITIONED_BY = ARRAY['y', 'z'])", + write={ + "presto": "CREATE TABLE x (w VARCHAR, y INTEGER, z INTEGER) WITH (PARTITIONED_BY = ARRAY['y', 'z'])", + "hive": "CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)", + "spark": "CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)", + }, + ) + self.validate_all( + "CREATE TABLE x WITH (bucket_by = ARRAY['y'], bucket_count = 64) AS SELECT 1 AS y", + write={ + "presto": "CREATE TABLE x WITH (bucket_by = ARRAY['y'], bucket_count = 64) AS SELECT 1 AS y", + "hive": "CREATE TABLE x TBLPROPERTIES ('bucket_by' = ARRAY('y'), 'bucket_count' = 64) AS SELECT 1 AS y", + "spark": "CREATE TABLE x TBLPROPERTIES ('bucket_by' = ARRAY('y'), 'bucket_count' = 64) AS SELECT 1 AS y", + }, + ) + self.validate_all( + "CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b VARCHAR))", + write={ + "presto": "CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b VARCHAR))", + "hive": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a INT, struct_col_b STRING>)", + "spark": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a: INT, struct_col_b: STRING>)", + }, + ) + self.validate_all( + "CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b ROW(nested_col_a VARCHAR, nested_col_b VARCHAR)))", + write={ + "presto": "CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b ROW(nested_col_a VARCHAR, nested_col_b VARCHAR)))", + "hive": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a INT, struct_col_b STRUCT<nested_col_a STRING, nested_col_b STRING>>)", + "spark": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a: INT, struct_col_b: STRUCT<nested_col_a: STRING, nested_col_b: STRING>>)", + }, + ) + + self.validate( + "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname", + "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname, lname", + read="presto", + write="presto", + ) + + def test_quotes(self): + self.validate_all( + "''''", + write={ + "duckdb": "''''", + "presto": "''''", + "hive": "'\\''", + "spark": "'\\''", + }, + ) + self.validate_all( + "'x'", + write={ + "duckdb": "'x'", + "presto": "'x'", + "hive": "'x'", + "spark": "'x'", + }, + ) + self.validate_all( + "'''x'''", + write={ + "duckdb": "'''x'''", + "presto": "'''x'''", + "hive": "'\\'x\\''", + "spark": "'\\'x\\''", + }, + ) + self.validate_all( + "'''x'", + write={ + "duckdb": "'''x'", + "presto": "'''x'", + "hive": "'\\'x'", + "spark": "'\\'x'", + }, + ) + self.validate_all( + "x IN ('a', 'a''b')", + write={ + "duckdb": "x IN ('a', 'a''b')", + "presto": "x IN ('a', 'a''b')", + "hive": "x IN ('a', 'a\\'b')", + "spark": "x IN ('a', 'a\\'b')", + }, + ) + + def test_unnest(self): + self.validate_all( + "SELECT a FROM x CROSS JOIN UNNEST(ARRAY(y)) AS t (a)", + write={ + "presto": "SELECT a FROM x CROSS JOIN UNNEST(ARRAY[y]) AS t(a)", + "hive": "SELECT a FROM x LATERAL VIEW EXPLODE(ARRAY(y)) t AS a", + "spark": "SELECT a FROM x LATERAL VIEW EXPLODE(ARRAY(y)) t AS a", + }, + ) + + def test_presto(self): + self.validate_all( + 'SELECT a."b" FROM "foo"', + write={ + "duckdb": 'SELECT a."b" FROM "foo"', + "presto": 'SELECT a."b" FROM "foo"', + "spark": "SELECT a.`b` FROM `foo`", + }, + ) + self.validate_all( + "SELECT ARRAY[1, 2]", + write={ + "bigquery": "SELECT [1, 2]", + "duckdb": "SELECT LIST_VALUE(1, 2)", + "presto": "SELECT ARRAY[1, 2]", + "spark": "SELECT ARRAY(1, 2)", + }, + ) + self.validate_all( + "SELECT APPROX_DISTINCT(a) FROM foo", + write={ + "duckdb": "SELECT APPROX_COUNT_DISTINCT(a) FROM foo", + "presto": "SELECT APPROX_DISTINCT(a) FROM foo", + "hive": "SELECT APPROX_COUNT_DISTINCT(a) FROM foo", + "spark": "SELECT APPROX_COUNT_DISTINCT(a) FROM foo", + }, + ) + self.validate_all( + "SELECT APPROX_DISTINCT(a, 0.1) FROM foo", + write={ + "duckdb": "SELECT APPROX_COUNT_DISTINCT(a) FROM foo", + "presto": "SELECT APPROX_DISTINCT(a, 0.1) FROM foo", + "hive": "SELECT APPROX_COUNT_DISTINCT(a) FROM foo", + "spark": "SELECT APPROX_COUNT_DISTINCT(a) FROM foo", + }, + ) + self.validate_all( + "SELECT APPROX_DISTINCT(a, 0.1) FROM foo", + write={ + "presto": "SELECT APPROX_DISTINCT(a, 0.1) FROM foo", + "hive": UnsupportedError, + "spark": UnsupportedError, + }, + ) + self.validate_all( + "SELECT JSON_EXTRACT(x, '$.name')", + write={ + "presto": "SELECT JSON_EXTRACT(x, '$.name')", + "hive": "SELECT GET_JSON_OBJECT(x, '$.name')", + "spark": "SELECT GET_JSON_OBJECT(x, '$.name')", + }, + ) + self.validate_all( + "SELECT JSON_EXTRACT_SCALAR(x, '$.name')", + write={ + "presto": "SELECT JSON_EXTRACT_SCALAR(x, '$.name')", + "hive": "SELECT GET_JSON_OBJECT(x, '$.name')", + "spark": "SELECT GET_JSON_OBJECT(x, '$.name')", + }, + ) + self.validate_all( + "'\u6bdb'", + write={ + "presto": "'\u6bdb'", + "hive": "'\u6bdb'", + "spark": "'\u6bdb'", + }, + ) + self.validate_all( + "SELECT ARRAY_SORT(x, (left, right) -> -1)", + write={ + "duckdb": "SELECT ARRAY_SORT(x)", + "presto": "SELECT ARRAY_SORT(x, (left, right) -> -1)", + "hive": "SELECT SORT_ARRAY(x)", + "spark": "SELECT ARRAY_SORT(x, (left, right) -> -1)", + }, + ) + self.validate_all( + "SELECT ARRAY_SORT(x)", + write={ + "presto": "SELECT ARRAY_SORT(x)", + "hive": "SELECT SORT_ARRAY(x)", + "spark": "SELECT ARRAY_SORT(x)", + }, + ) + self.validate_all( + "SELECT ARRAY_SORT(x, (left, right) -> -1)", + write={ + "hive": UnsupportedError, + }, + ) + self.validate_all( + "MAP(a, b)", + write={ + "hive": UnsupportedError, + "spark": "MAP_FROM_ARRAYS(a, b)", + }, + ) + self.validate_all( + "MAP(ARRAY(a, b), ARRAY(c, d))", + write={ + "hive": "MAP(a, c, b, d)", + "presto": "MAP(ARRAY[a, b], ARRAY[c, d])", + "spark": "MAP_FROM_ARRAYS(ARRAY(a, b), ARRAY(c, d))", + }, + ) + self.validate_all( + "MAP(ARRAY('a'), ARRAY('b'))", + write={ + "hive": "MAP('a', 'b')", + "presto": "MAP(ARRAY['a'], ARRAY['b'])", + "spark": "MAP_FROM_ARRAYS(ARRAY('a'), ARRAY('b'))", + }, + ) + self.validate_all( + "SELECT * FROM UNNEST(ARRAY['7', '14']) AS x", + write={ + "bigquery": "SELECT * FROM UNNEST(['7', '14'])", + "presto": "SELECT * FROM UNNEST(ARRAY['7', '14']) AS x", + "hive": "SELECT * FROM UNNEST(ARRAY('7', '14')) AS x", + "spark": "SELECT * FROM UNNEST(ARRAY('7', '14')) AS x", + }, + ) + self.validate_all( + "SELECT * FROM UNNEST(ARRAY['7', '14']) AS x(y)", + write={ + "bigquery": "SELECT * FROM UNNEST(['7', '14']) AS y", + "presto": "SELECT * FROM UNNEST(ARRAY['7', '14']) AS x(y)", + "hive": "SELECT * FROM UNNEST(ARRAY('7', '14')) AS x(y)", + "spark": "SELECT * FROM UNNEST(ARRAY('7', '14')) AS x(y)", + }, + ) + self.validate_all( + "WITH RECURSIVE t(n) AS (VALUES (1) UNION ALL SELECT n+1 FROM t WHERE n < 100 ) SELECT sum(n) FROM t", + write={ + "presto": "WITH RECURSIVE t(n) AS (VALUES (1) UNION ALL SELECT n + 1 FROM t WHERE n < 100) SELECT SUM(n) FROM t", + "spark": UnsupportedError, + }, + ) diff --git a/tests/dialects/test_snowflake.py b/tests/dialects/test_snowflake.py new file mode 100644 index 0000000..62f78e1 --- /dev/null +++ b/tests/dialects/test_snowflake.py @@ -0,0 +1,145 @@ +from sqlglot import UnsupportedError +from tests.dialects.test_dialect import Validator + + +class TestSnowflake(Validator): + dialect = "snowflake" + + def test_snowflake(self): + self.validate_all( + 'x:a:"b c"', + write={ + "duckdb": "x['a']['b c']", + "hive": "x['a']['b c']", + "presto": "x['a']['b c']", + "snowflake": "x['a']['b c']", + "spark": "x['a']['b c']", + }, + ) + self.validate_all( + "SELECT a FROM test WHERE a = 1 GROUP BY a HAVING a = 2 QUALIFY z ORDER BY a LIMIT 10", + write={ + "bigquery": "SELECT a FROM test WHERE a = 1 GROUP BY a HAVING a = 2 QUALIFY z ORDER BY a NULLS LAST LIMIT 10", + "snowflake": "SELECT a FROM test WHERE a = 1 GROUP BY a HAVING a = 2 QUALIFY z ORDER BY a LIMIT 10", + }, + ) + self.validate_all( + "SELECT a FROM test AS t QUALIFY ROW_NUMBER() OVER (PARTITION BY a ORDER BY Z) = 1", + write={ + "bigquery": "SELECT a FROM test AS t QUALIFY ROW_NUMBER() OVER (PARTITION BY a ORDER BY Z NULLS LAST) = 1", + "snowflake": "SELECT a FROM test AS t QUALIFY ROW_NUMBER() OVER (PARTITION BY a ORDER BY Z) = 1", + }, + ) + self.validate_all( + "SELECT TO_TIMESTAMP(1659981729)", + write={ + "bigquery": "SELECT UNIX_TO_TIME(1659981729)", + "snowflake": "SELECT TO_TIMESTAMP(1659981729)", + "spark": "SELECT FROM_UNIXTIME(1659981729)", + }, + ) + self.validate_all( + "SELECT TO_TIMESTAMP(1659981729000, 3)", + write={ + "bigquery": "SELECT UNIX_TO_TIME(1659981729000, 'millis')", + "snowflake": "SELECT TO_TIMESTAMP(1659981729000, 3)", + "spark": "SELECT TIMESTAMP_MILLIS(1659981729000)", + }, + ) + self.validate_all( + "SELECT TO_TIMESTAMP('1659981729')", + write={ + "bigquery": "SELECT UNIX_TO_TIME('1659981729')", + "snowflake": "SELECT TO_TIMESTAMP('1659981729')", + "spark": "SELECT FROM_UNIXTIME('1659981729')", + }, + ) + self.validate_all( + "SELECT TO_TIMESTAMP(1659981729000000000, 9)", + write={ + "bigquery": "SELECT UNIX_TO_TIME(1659981729000000000, 'micros')", + "snowflake": "SELECT TO_TIMESTAMP(1659981729000000000, 9)", + "spark": "SELECT TIMESTAMP_MICROS(1659981729000000000)", + }, + ) + self.validate_all( + "SELECT TO_TIMESTAMP('2013-04-05 01:02:03')", + write={ + "bigquery": "SELECT STR_TO_TIME('2013-04-05 01:02:03', '%Y-%m-%d %H:%M:%S')", + "snowflake": "SELECT TO_TIMESTAMP('2013-04-05 01:02:03', 'yyyy-mm-dd hh24:mi:ss')", + "spark": "SELECT TO_TIMESTAMP('2013-04-05 01:02:03', 'yyyy-MM-dd HH:mm:ss')", + }, + ) + self.validate_all( + "SELECT TO_TIMESTAMP('04/05/2013 01:02:03', 'mm/dd/yyyy hh24:mi:ss')", + read={ + "bigquery": "SELECT STR_TO_TIME('04/05/2013 01:02:03', '%m/%d/%Y %H:%M:%S')", + "duckdb": "SELECT STRPTIME('04/05/2013 01:02:03', '%m/%d/%Y %H:%M:%S')", + "snowflake": "SELECT TO_TIMESTAMP('04/05/2013 01:02:03', 'mm/dd/yyyy hh24:mi:ss')", + }, + write={ + "bigquery": "SELECT STR_TO_TIME('04/05/2013 01:02:03', '%m/%d/%Y %H:%M:%S')", + "snowflake": "SELECT TO_TIMESTAMP('04/05/2013 01:02:03', 'mm/dd/yyyy hh24:mi:ss')", + "spark": "SELECT TO_TIMESTAMP('04/05/2013 01:02:03', 'MM/dd/yyyy HH:mm:ss')", + }, + ) + self.validate_all( + "SELECT IFF(TRUE, 'true', 'false')", + write={ + "snowflake": "SELECT IFF(TRUE, 'true', 'false')", + }, + ) + self.validate_all( + "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname", + write={ + "duckdb": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname NULLS LAST", + "postgres": "SELECT fname, lname, age FROM person ORDER BY age DESC, fname, lname", + "presto": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname, lname", + "hive": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname NULLS LAST", + "spark": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname NULLS LAST", + "snowflake": "SELECT fname, lname, age FROM person ORDER BY age DESC, fname, lname", + }, + ) + self.validate_all( + "SELECT ARRAY_AGG(DISTINCT a)", + write={ + "spark": "SELECT COLLECT_LIST(DISTINCT a)", + "snowflake": "SELECT ARRAY_AGG(DISTINCT a)", + }, + ) + self.validate_all( + "SELECT * FROM a INTERSECT ALL SELECT * FROM b", + write={ + "snowflake": UnsupportedError, + }, + ) + self.validate_all( + "SELECT * FROM a EXCEPT ALL SELECT * FROM b", + write={ + "snowflake": UnsupportedError, + }, + ) + self.validate_all( + "SELECT ARRAY_UNION_AGG(a)", + write={ + "snowflake": "SELECT ARRAY_UNION_AGG(a)", + }, + ) + self.validate_all( + "SELECT NVL2(a, b, c)", + write={ + "snowflake": "SELECT NVL2(a, b, c)", + }, + ) + self.validate_all( + "SELECT $$a$$", + write={ + "snowflake": "SELECT 'a'", + }, + ) + self.validate_all( + r"SELECT $$a ' \ \t \x21 z $ $$", + write={ + "snowflake": r"SELECT 'a \' \\ \\t \\x21 z $ '", + }, + ) diff --git a/tests/dialects/test_spark.py b/tests/dialects/test_spark.py new file mode 100644 index 0000000..8794fed --- /dev/null +++ b/tests/dialects/test_spark.py @@ -0,0 +1,226 @@ +from tests.dialects.test_dialect import Validator + + +class TestSpark(Validator): + dialect = "spark" + + def test_ddl(self): + self.validate_all( + "CREATE TABLE db.example_table (col_a struct<struct_col_a:int, struct_col_b:string>)", + write={ + "presto": "CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b VARCHAR))", + "hive": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a INT, struct_col_b STRING>)", + "spark": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a: INT, struct_col_b: STRING>)", + }, + ) + self.validate_all( + "CREATE TABLE db.example_table (col_a struct<struct_col_a:int, struct_col_b:struct<nested_col_a:string, nested_col_b:string>>)", + write={ + "bigquery": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a INT64, struct_col_b STRUCT<nested_col_a STRING, nested_col_b STRING>>)", + "presto": "CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b ROW(nested_col_a VARCHAR, nested_col_b VARCHAR)))", + "hive": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a INT, struct_col_b STRUCT<nested_col_a STRING, nested_col_b STRING>>)", + "spark": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a: INT, struct_col_b: STRUCT<nested_col_a: STRING, nested_col_b: STRING>>)", + }, + ) + self.validate_all( + "CREATE TABLE db.example_table (col_a array<int>, col_b array<array<int>>)", + write={ + "bigquery": "CREATE TABLE db.example_table (col_a ARRAY<INT64>, col_b ARRAY<ARRAY<INT64>>)", + "presto": "CREATE TABLE db.example_table (col_a ARRAY(INTEGER), col_b ARRAY(ARRAY(INTEGER)))", + "hive": "CREATE TABLE db.example_table (col_a ARRAY<INT>, col_b ARRAY<ARRAY<INT>>)", + "spark": "CREATE TABLE db.example_table (col_a ARRAY<INT>, col_b ARRAY<ARRAY<INT>>)", + }, + ) + self.validate_all( + "CREATE TABLE x USING ICEBERG PARTITIONED BY (MONTHS(y)) LOCATION 's3://z'", + write={ + "presto": "CREATE TABLE x WITH (TABLE_FORMAT = 'ICEBERG', PARTITIONED_BY = ARRAY['MONTHS'])", + "hive": "CREATE TABLE x USING ICEBERG PARTITIONED BY (MONTHS(y)) LOCATION 's3://z'", + "spark": "CREATE TABLE x USING ICEBERG PARTITIONED BY (MONTHS(y)) LOCATION 's3://z'", + }, + ) + self.validate_all( + "CREATE TABLE test STORED AS PARQUET AS SELECT 1", + write={ + "presto": "CREATE TABLE test WITH (FORMAT = 'PARQUET') AS SELECT 1", + "hive": "CREATE TABLE test STORED AS PARQUET AS SELECT 1", + "spark": "CREATE TABLE test STORED AS PARQUET AS SELECT 1", + }, + ) + self.validate_all( + "CREATE TABLE test USING ICEBERG STORED AS PARQUET AS SELECT 1", + write={ + "presto": "CREATE TABLE test WITH (TABLE_FORMAT = 'ICEBERG', FORMAT = 'PARQUET') AS SELECT 1", + "hive": "CREATE TABLE test USING ICEBERG STORED AS PARQUET AS SELECT 1", + "spark": "CREATE TABLE test USING ICEBERG STORED AS PARQUET AS SELECT 1", + }, + ) + self.validate_all( + """CREATE TABLE blah (col_a INT) COMMENT "Test comment: blah" PARTITIONED BY (date STRING) STORED AS ICEBERG TBLPROPERTIES('x' = '1')""", + write={ + "presto": """CREATE TABLE blah ( + col_a INTEGER, + date VARCHAR +) +COMMENT='Test comment: blah' +WITH ( + PARTITIONED_BY = ARRAY['date'], + FORMAT = 'ICEBERG', + x = '1' +)""", + "hive": """CREATE TABLE blah ( + col_a INT +) +COMMENT 'Test comment: blah' +PARTITIONED BY ( + date STRING +) +STORED AS ICEBERG +TBLPROPERTIES ( + 'x' = '1' +)""", + "spark": """CREATE TABLE blah ( + col_a INT +) +COMMENT 'Test comment: blah' +PARTITIONED BY ( + date STRING +) +STORED AS ICEBERG +TBLPROPERTIES ( + 'x' = '1' +)""", + }, + pretty=True, + ) + + def test_to_date(self): + self.validate_all( + "TO_DATE(x, 'yyyy-MM-dd')", + write={ + "duckdb": "CAST(x AS DATE)", + "hive": "TO_DATE(x)", + "presto": "CAST(SUBSTR(CAST(x AS VARCHAR), 1, 10) AS DATE)", + "spark": "TO_DATE(x)", + }, + ) + self.validate_all( + "TO_DATE(x, 'yyyy')", + write={ + "duckdb": "CAST(STRPTIME(x, '%Y') AS DATE)", + "hive": "TO_DATE(x, 'yyyy')", + "presto": "CAST(DATE_PARSE(x, '%Y') AS DATE)", + "spark": "TO_DATE(x, 'yyyy')", + }, + ) + + def test_hint(self): + self.validate_all( + "SELECT /*+ COALESCE(3) */ * FROM x", + write={ + "spark": "SELECT /*+ COALESCE(3) */ * FROM x", + }, + ) + self.validate_all( + "SELECT /*+ COALESCE(3), REPARTITION(1) */ * FROM x", + write={ + "spark": "SELECT /*+ COALESCE(3), REPARTITION(1) */ * FROM x", + }, + ) + + def test_spark(self): + self.validate_all( + "ARRAY_SORT(x, (left, right) -> -1)", + write={ + "duckdb": "ARRAY_SORT(x)", + "presto": "ARRAY_SORT(x, (left, right) -> -1)", + "hive": "SORT_ARRAY(x)", + "spark": "ARRAY_SORT(x, (left, right) -> -1)", + }, + ) + self.validate_all( + "ARRAY(0, 1, 2)", + write={ + "bigquery": "[0, 1, 2]", + "duckdb": "LIST_VALUE(0, 1, 2)", + "presto": "ARRAY[0, 1, 2]", + "hive": "ARRAY(0, 1, 2)", + "spark": "ARRAY(0, 1, 2)", + }, + ) + + self.validate_all( + "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname", + write={ + "clickhouse": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname, lname NULLS FIRST", + "duckdb": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname", + "postgres": "SELECT fname, lname, age FROM person ORDER BY age DESC, fname, lname NULLS FIRST", + "presto": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname, lname NULLS FIRST", + "hive": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname", + "spark": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname", + "snowflake": "SELECT fname, lname, age FROM person ORDER BY age DESC, fname, lname NULLS FIRST", + }, + ) + self.validate_all( + "SELECT APPROX_COUNT_DISTINCT(a) FROM foo", + write={ + "duckdb": "SELECT APPROX_COUNT_DISTINCT(a) FROM foo", + "presto": "SELECT APPROX_DISTINCT(a) FROM foo", + "hive": "SELECT APPROX_COUNT_DISTINCT(a) FROM foo", + "spark": "SELECT APPROX_COUNT_DISTINCT(a) FROM foo", + }, + ) + self.validate_all( + "MONTH('2021-03-01')", + write={ + "duckdb": "MONTH(CAST('2021-03-01' AS DATE))", + "presto": "MONTH(CAST(SUBSTR(CAST('2021-03-01' AS VARCHAR), 1, 10) AS DATE))", + "hive": "MONTH(TO_DATE('2021-03-01'))", + "spark": "MONTH(TO_DATE('2021-03-01'))", + }, + ) + self.validate_all( + "YEAR('2021-03-01')", + write={ + "duckdb": "YEAR(CAST('2021-03-01' AS DATE))", + "presto": "YEAR(CAST(SUBSTR(CAST('2021-03-01' AS VARCHAR), 1, 10) AS DATE))", + "hive": "YEAR(TO_DATE('2021-03-01'))", + "spark": "YEAR(TO_DATE('2021-03-01'))", + }, + ) + self.validate_all( + "'\u6bdb'", + write={ + "duckdb": "'毛'", + "presto": "'毛'", + "hive": "'毛'", + "spark": "'毛'", + }, + ) + self.validate_all( + "SELECT LEFT(x, 2), RIGHT(x, 2)", + write={ + "duckdb": "SELECT SUBSTRING(x, 1, 2), SUBSTRING(x, LENGTH(x) - 2 + 1, 2)", + "presto": "SELECT SUBSTRING(x, 1, 2), SUBSTRING(x, LENGTH(x) - 2 + 1, 2)", + "hive": "SELECT SUBSTRING(x, 1, 2), SUBSTRING(x, LENGTH(x) - 2 + 1, 2)", + "spark": "SELECT SUBSTRING(x, 1, 2), SUBSTRING(x, LENGTH(x) - 2 + 1, 2)", + }, + ) + self.validate_all( + "MAP_FROM_ARRAYS(ARRAY(1), c)", + write={ + "duckdb": "MAP(LIST_VALUE(1), c)", + "presto": "MAP(ARRAY[1], c)", + "hive": "MAP(ARRAY(1), c)", + "spark": "MAP_FROM_ARRAYS(ARRAY(1), c)", + }, + ) + self.validate_all( + "SELECT ARRAY_SORT(x)", + write={ + "duckdb": "SELECT ARRAY_SORT(x)", + "presto": "SELECT ARRAY_SORT(x)", + "hive": "SELECT SORT_ARRAY(x)", + "spark": "SELECT ARRAY_SORT(x)", + }, + ) diff --git a/tests/dialects/test_sqlite.py b/tests/dialects/test_sqlite.py new file mode 100644 index 0000000..a0576de --- /dev/null +++ b/tests/dialects/test_sqlite.py @@ -0,0 +1,72 @@ +from tests.dialects.test_dialect import Validator + + +class TestSQLite(Validator): + dialect = "sqlite" + + def test_ddl(self): + self.validate_all( + """ + CREATE TABLE "Track" + ( + CONSTRAINT "PK_Track" FOREIGN KEY ("TrackId"), + FOREIGN KEY ("AlbumId") REFERENCES "Album" ("AlbumId") + ON DELETE NO ACTION ON UPDATE NO ACTION, + FOREIGN KEY ("AlbumId") ON DELETE CASCADE ON UPDATE RESTRICT, + FOREIGN KEY ("AlbumId") ON DELETE SET NULL ON UPDATE SET DEFAULT + ) + """, + write={ + "sqlite": """CREATE TABLE "Track" ( + CONSTRAINT "PK_Track" FOREIGN KEY ("TrackId"), + FOREIGN KEY ("AlbumId") REFERENCES "Album"("AlbumId") ON DELETE NO ACTION ON UPDATE NO ACTION, + FOREIGN KEY ("AlbumId") ON DELETE CASCADE ON UPDATE RESTRICT, + FOREIGN KEY ("AlbumId") ON DELETE SET NULL ON UPDATE SET DEFAULT +)""", + }, + pretty=True, + ) + self.validate_all( + "CREATE TABLE z (a INTEGER UNIQUE PRIMARY KEY AUTOINCREMENT)", + read={ + "mysql": "CREATE TABLE z (a INT UNIQUE PRIMARY KEY AUTO_INCREMENT)", + }, + write={ + "sqlite": "CREATE TABLE z (a INTEGER UNIQUE PRIMARY KEY AUTOINCREMENT)", + "mysql": "CREATE TABLE z (a INT UNIQUE PRIMARY KEY AUTO_INCREMENT)", + }, + ) + self.validate_all( + """CREATE TABLE "x" ("Name" NVARCHAR(200) NOT NULL)""", + write={ + "sqlite": """CREATE TABLE "x" ("Name" TEXT(200) NOT NULL)""", + "mysql": "CREATE TABLE `x` (`Name` VARCHAR(200) NOT NULL)", + }, + ) + + def test_sqlite(self): + self.validate_all( + "SELECT CAST([a].[b] AS SMALLINT) FROM foo", + write={ + "sqlite": 'SELECT CAST("a"."b" AS INTEGER) FROM foo', + "spark": "SELECT CAST(`a`.`b` AS SHORT) FROM foo", + }, + ) + self.validate_all( + "EDITDIST3(col1, col2)", + read={ + "sqlite": "EDITDIST3(col1, col2)", + "spark": "LEVENSHTEIN(col1, col2)", + }, + write={ + "sqlite": "EDITDIST3(col1, col2)", + "spark": "LEVENSHTEIN(col1, col2)", + }, + ) + self.validate_all( + "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname", + write={ + "spark": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname", + "sqlite": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname", + }, + ) diff --git a/tests/dialects/test_starrocks.py b/tests/dialects/test_starrocks.py new file mode 100644 index 0000000..1fe1a57 --- /dev/null +++ b/tests/dialects/test_starrocks.py @@ -0,0 +1,8 @@ +from tests.dialects.test_dialect import Validator + + +class TestMySQL(Validator): + dialect = "starrocks" + + def test_identity(self): + self.validate_identity("SELECT CAST(`a`.`b` AS INT) FROM foo") diff --git a/tests/dialects/test_tableau.py b/tests/dialects/test_tableau.py new file mode 100644 index 0000000..0f612dd --- /dev/null +++ b/tests/dialects/test_tableau.py @@ -0,0 +1,62 @@ +from tests.dialects.test_dialect import Validator + + +class TestTableau(Validator): + dialect = "tableau" + + def test_tableau(self): + self.validate_all( + "IF x = 'a' THEN y ELSE NULL END", + read={ + "presto": "IF(x = 'a', y, NULL)", + }, + write={ + "presto": "IF(x = 'a', y, NULL)", + "hive": "IF(x = 'a', y, NULL)", + "tableau": "IF x = 'a' THEN y ELSE NULL END", + }, + ) + self.validate_all( + "IFNULL(a, 0)", + read={ + "presto": "COALESCE(a, 0)", + }, + write={ + "presto": "COALESCE(a, 0)", + "hive": "COALESCE(a, 0)", + "tableau": "IFNULL(a, 0)", + }, + ) + self.validate_all( + "COUNTD(a)", + read={ + "presto": "COUNT(DISTINCT a)", + }, + write={ + "presto": "COUNT(DISTINCT a)", + "hive": "COUNT(DISTINCT a)", + "tableau": "COUNTD(a)", + }, + ) + self.validate_all( + "COUNTD((a))", + read={ + "presto": "COUNT(DISTINCT(a))", + }, + write={ + "presto": "COUNT(DISTINCT (a))", + "hive": "COUNT(DISTINCT (a))", + "tableau": "COUNTD((a))", + }, + ) + self.validate_all( + "COUNT(a)", + read={ + "presto": "COUNT(a)", + }, + write={ + "presto": "COUNT(a)", + "hive": "COUNT(a)", + "tableau": "COUNT(a)", + }, + ) |