From 8978da3b39d7ca3cf83ee30fcc63ffe0e5453fb2 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 8 Apr 2024 10:11:50 +0200 Subject: Adding upstream version 23.7.0. Signed-off-by: Daniel Baumann --- tests/dialects/test_athena.py | 20 +++++ tests/dialects/test_bigquery.py | 83 +++++++++++++----- tests/dialects/test_clickhouse.py | 28 +++++- tests/dialects/test_dialect.py | 117 +++++++++++++++++++++++-- tests/dialects/test_drill.py | 63 ++------------ tests/dialects/test_duckdb.py | 90 +++++++++++++++++--- tests/dialects/test_hive.py | 27 +++--- tests/dialects/test_mysql.py | 67 ++++++++++++++- tests/dialects/test_oracle.py | 68 +++++++++++++++ tests/dialects/test_postgres.py | 147 +++++++++++++++++++++----------- tests/dialects/test_presto.py | 30 ++++--- tests/dialects/test_prql.py | 17 ++++ tests/dialects/test_redshift.py | 38 ++++++--- tests/dialects/test_snowflake.py | 175 ++++++++++++++++++++++++++++++++++---- tests/dialects/test_spark.py | 37 +++++++- tests/dialects/test_sqlite.py | 132 ++++++++++++++++------------ tests/dialects/test_teradata.py | 5 +- tests/dialects/test_tsql.py | 29 ++++++- 18 files changed, 905 insertions(+), 268 deletions(-) create mode 100644 tests/dialects/test_athena.py create mode 100644 tests/dialects/test_prql.py (limited to 'tests/dialects') diff --git a/tests/dialects/test_athena.py b/tests/dialects/test_athena.py new file mode 100644 index 0000000..3288ada --- /dev/null +++ b/tests/dialects/test_athena.py @@ -0,0 +1,20 @@ +from tests.dialects.test_dialect import Validator + + +class TestAthena(Validator): + dialect = "athena" + maxDiff = None + + def test_athena(self): + self.validate_identity( + """USING EXTERNAL FUNCTION some_function(input VARBINARY) + RETURNS VARCHAR + LAMBDA 'some-name' + SELECT + some_function(1)""", + check_command_warning=True, + ) + + self.validate_identity( + "CREATE TABLE IF NOT EXISTS t (name STRING) LOCATION 's3://bucket/tmp/mytable/' TBLPROPERTIES ('table_type'='iceberg', 'FORMAT'='parquet')" + ) diff --git a/tests/dialects/test_bigquery.py b/tests/dialects/test_bigquery.py index 0d94d19..300d492 100644 --- a/tests/dialects/test_bigquery.py +++ b/tests/dialects/test_bigquery.py @@ -51,6 +51,8 @@ class TestBigQuery(Validator): self.assertEqual(table.name, "_y") self.validate_identity("SELECT * FROM x-0.y") + self.assertEqual(exp.to_table("`a.b`.`c.d`", dialect="bigquery").sql(), '"a"."b"."c"."d"') + self.assertEqual(exp.to_table("`x`.`y.z`", dialect="bigquery").sql(), '"x"."y"."z"') self.assertEqual(exp.to_table("`x.y.z`", dialect="bigquery").sql(), '"x"."y"."z"') self.assertEqual(exp.to_table("`x.y.z`", dialect="bigquery").sql("bigquery"), "`x.y.z`") self.assertEqual(exp.to_table("`x`.`y`", dialect="bigquery").sql("bigquery"), "`x`.`y`") @@ -58,6 +60,8 @@ class TestBigQuery(Validator): select_with_quoted_udf = self.validate_identity("SELECT `p.d.UdF`(data) FROM `p.d.t`") self.assertEqual(select_with_quoted_udf.selects[0].name, "p.d.UdF") + self.validate_identity("SELECT ARRAY_TO_STRING(list, '--') AS text") + self.validate_identity("SELECT jsondoc['some_key']") self.validate_identity("SELECT `p.d.UdF`(data).* FROM `p.d.t`") self.validate_identity("SELECT * FROM `my-project.my-dataset.my-table`") self.validate_identity("CREATE OR REPLACE TABLE `a.b.c` CLONE `a.b.d`") @@ -177,6 +181,13 @@ class TestBigQuery(Validator): self.validate_identity( """SELECT JSON_EXTRACT_SCALAR('5')""", """SELECT JSON_EXTRACT_SCALAR('5', '$')""" ) + self.validate_identity( + "CREATE OR REPLACE VIEW test (tenant_id OPTIONS (description='Test description on table creation')) AS SELECT 1 AS tenant_id, 1 AS customer_id", + ) + self.validate_identity( + "CREATE VIEW `d.v` OPTIONS (expiration_timestamp=TIMESTAMP '2020-01-02T04:05:06.007Z') AS SELECT 1 AS c", + "CREATE VIEW `d.v` OPTIONS (expiration_timestamp=CAST('2020-01-02T04:05:06.007Z' AS TIMESTAMP)) AS SELECT 1 AS c", + ) self.validate_identity( "SELECT ARRAY(SELECT AS STRUCT 1 a, 2 b)", "SELECT ARRAY(SELECT AS STRUCT 1 AS a, 2 AS b)", @@ -185,10 +196,6 @@ class TestBigQuery(Validator): "select array_contains([1, 2, 3], 1)", "SELECT EXISTS(SELECT 1 FROM UNNEST([1, 2, 3]) AS _col WHERE _col = 1)", ) - self.validate_identity( - "create or replace view test (tenant_id OPTIONS(description='Test description on table creation')) select 1 as tenant_id, 1 as customer_id;", - "CREATE OR REPLACE VIEW test (tenant_id OPTIONS (description='Test description on table creation')) AS SELECT 1 AS tenant_id, 1 AS customer_id", - ) self.validate_identity( "SELECT SPLIT(foo)", "SELECT SPLIT(foo, ',')", @@ -658,6 +665,13 @@ class TestBigQuery(Validator): "duckdb": "SELECT {'y': ARRAY(SELECT {'b': 1} FROM x)} FROM z", }, ) + self.validate_all( + "SELECT CAST(STRUCT(1) AS STRUCT)", + write={ + "bigquery": "SELECT CAST(STRUCT(1) AS STRUCT)", + "snowflake": "SELECT CAST(OBJECT_CONSTRUCT('_0', 1) AS OBJECT)", + }, + ) self.validate_all( "cast(x as date format 'MM/DD/YYYY')", write={ @@ -724,10 +738,10 @@ class TestBigQuery(Validator): self.validate_all( "'\\\\'", write={ - "bigquery": r"'\\'", - "duckdb": r"'\\'", - "presto": r"'\\'", - "hive": r"'\\'", + "bigquery": "'\\\\'", + "duckdb": "'\\'", + "presto": "'\\'", + "hive": "'\\\\'", }, ) self.validate_all( @@ -1004,20 +1018,28 @@ class TestBigQuery(Validator): }, ) self.validate_all( - "SELECT cola, colb FROM UNNEST([STRUCT(1 AS cola, 'test' AS colb)])", + "SELECT cola, colb FROM UNNEST([STRUCT(1 AS cola, 'test' AS colb)]) AS tab", read={ - "bigquery": "SELECT cola, colb FROM UNNEST([STRUCT(1 AS cola, 'test' AS colb)])", + "bigquery": "SELECT cola, colb FROM UNNEST([STRUCT(1 AS cola, 'test' AS colb)]) as tab", "snowflake": "SELECT cola, colb FROM (VALUES (1, 'test')) AS tab(cola, colb)", "spark": "SELECT cola, colb FROM VALUES (1, 'test') AS tab(cola, colb)", }, ) self.validate_all( - "SELECT * FROM UNNEST([STRUCT(1 AS id)]) CROSS JOIN UNNEST([STRUCT(1 AS id)])", + "SELECT * FROM UNNEST([STRUCT(1 AS _c0)]) AS t1", read={ - "bigquery": "SELECT * FROM UNNEST([STRUCT(1 AS id)]) CROSS JOIN UNNEST([STRUCT(1 AS id)])", + "bigquery": "SELECT * FROM UNNEST([STRUCT(1 AS _c0)]) AS t1", + "postgres": "SELECT * FROM (VALUES (1)) AS t1", + }, + ) + self.validate_all( + "SELECT * FROM UNNEST([STRUCT(1 AS id)]) AS t1 CROSS JOIN UNNEST([STRUCT(1 AS id)]) AS t2", + read={ + "bigquery": "SELECT * FROM UNNEST([STRUCT(1 AS id)]) AS t1 CROSS JOIN UNNEST([STRUCT(1 AS id)]) AS t2", "postgres": "SELECT * FROM (VALUES (1)) AS t1(id) CROSS JOIN (VALUES (1)) AS t2(id)", }, ) + self.validate_all( "SELECT REGEXP_EXTRACT(abc, 'pattern(group)') FROM table", write={ @@ -1050,28 +1072,43 @@ class TestBigQuery(Validator): ) self.validate_all( """SELECT - `u`.`harness_user_email` AS `harness_user_email`, - `d`.`harness_user_id` AS `harness_user_id`, - `harness_account_id` AS `harness_account_id` -FROM `analytics_staging`.`stg_mongodb__users` AS `u`, UNNEST(`u`.`harness_cluster_details`) AS `d`, UNNEST(`d`.`harness_account_ids`) AS `harness_account_id` + `u`.`user_email` AS `user_email`, + `d`.`user_id` AS `user_id`, + `account_id` AS `account_id` +FROM `analytics_staging`.`stg_mongodb__users` AS `u`, UNNEST(`u`.`cluster_details`) AS `d`, UNNEST(`d`.`account_ids`) AS `account_id` WHERE - NOT `harness_account_id` IS NULL""", + NOT `account_id` IS NULL""", read={ "": """ SELECT - "u"."harness_user_email" AS "harness_user_email", - "_q_0"."d"."harness_user_id" AS "harness_user_id", - "_q_1"."harness_account_id" AS "harness_account_id" + "u"."user_email" AS "user_email", + "_q_0"."d"."user_id" AS "user_id", + "_q_1"."account_id" AS "account_id" FROM "analytics_staging"."stg_mongodb__users" AS "u", - UNNEST("u"."harness_cluster_details") AS "_q_0"("d"), - UNNEST("_q_0"."d"."harness_account_ids") AS "_q_1"("harness_account_id") + UNNEST("u"."cluster_details") AS "_q_0"("d"), + UNNEST("_q_0"."d"."account_ids") AS "_q_1"("account_id") WHERE - NOT "_q_1"."harness_account_id" IS NULL + NOT "_q_1"."account_id" IS NULL """ }, pretty=True, ) + self.validate_all( + "SELECT MOD(x, 10)", + read={"postgres": "SELECT x % 10"}, + write={ + "bigquery": "SELECT MOD(x, 10)", + "postgres": "SELECT x % 10", + }, + ) + self.validate_all( + "SELECT CAST(x AS DATETIME)", + write={ + "": "SELECT CAST(x AS TIMESTAMP)", + "bigquery": "SELECT CAST(x AS DATETIME)", + }, + ) def test_errors(self): with self.assertRaises(TokenError): diff --git a/tests/dialects/test_clickhouse.py b/tests/dialects/test_clickhouse.py index edf3da1..c5f9847 100644 --- a/tests/dialects/test_clickhouse.py +++ b/tests/dialects/test_clickhouse.py @@ -1,5 +1,6 @@ from sqlglot import exp, parse_one from tests.dialects.test_dialect import Validator +from sqlglot.errors import ErrorLevel class TestClickhouse(Validator): @@ -153,7 +154,9 @@ class TestClickhouse(Validator): self.validate_identity("TRUNCATE TABLE t1 ON CLUSTER test_cluster") self.validate_identity("TRUNCATE DATABASE db") self.validate_identity("TRUNCATE DATABASE db ON CLUSTER test_cluster") - + self.validate_identity( + "CREATE TABLE t (foo String CODEC(LZ4HC(9), ZSTD, DELTA), size String ALIAS formatReadableSize(size_bytes), INDEX idx1 a TYPE bloom_filter(0.001) GRANULARITY 1, INDEX idx2 a TYPE set(100) GRANULARITY 2, INDEX idx3 a TYPE minmax GRANULARITY 3)" + ) self.validate_all( "SELECT arrayJoin([1,2,3])", write={ @@ -390,6 +393,22 @@ class TestClickhouse(Validator): ) self.validate_identity("SYSTEM STOP MERGES foo.bar", check_command_warning=True) + self.validate_identity( + "INSERT INTO FUNCTION s3('url', 'CSV', 'name String, value UInt32', 'gzip') SELECT name, value FROM existing_table" + ) + self.validate_identity( + "INSERT INTO FUNCTION remote('localhost', default.simple_table) VALUES (100, 'inserted via remote()')" + ) + self.validate_identity( + """INSERT INTO TABLE FUNCTION hdfs('hdfs://hdfs1:9000/test', 'TSV', 'name String, column2 UInt32, column3 UInt32') VALUES ('test', 1, 2)""", + """INSERT INTO FUNCTION hdfs('hdfs://hdfs1:9000/test', 'TSV', 'name String, column2 UInt32, column3 UInt32') VALUES ('test', 1, 2)""", + ) + + self.validate_identity("SELECT 1 FORMAT TabSeparated") + self.validate_identity("SELECT * FROM t FORMAT TabSeparated") + self.validate_identity("SELECT FORMAT") + self.validate_identity("1 AS FORMAT").assert_is(exp.Alias) + def test_cte(self): self.validate_identity("WITH 'x' AS foo SELECT foo") self.validate_identity("WITH ['c'] AS field_names SELECT field_names") @@ -401,6 +420,13 @@ class TestClickhouse(Validator): self.assertIsInstance(query.args["with"].expressions[0].this, exp.Subquery) self.assertEqual(query.args["with"].expressions[0].alias, "y") + query = "WITH 1 AS var SELECT var" + for error_level in [ErrorLevel.IGNORE, ErrorLevel.RAISE, ErrorLevel.IMMEDIATE]: + self.assertEqual( + self.parse_one(query, error_level=error_level).sql(dialect=self.dialect), + query, + ) + def test_ternary(self): self.validate_all("x ? 1 : 2", write={"clickhouse": "CASE WHEN x THEN 1 ELSE 2 END"}) self.validate_all( diff --git a/tests/dialects/test_dialect.py b/tests/dialects/test_dialect.py index 5faed51..76ab94b 100644 --- a/tests/dialects/test_dialect.py +++ b/tests/dialects/test_dialect.py @@ -17,8 +17,8 @@ from sqlglot.parser import logger as parser_logger class Validator(unittest.TestCase): dialect = None - def parse_one(self, sql): - return parse_one(sql, read=self.dialect) + def parse_one(self, sql, **kwargs): + return parse_one(sql, read=self.dialect, **kwargs) def validate_identity(self, sql, write_sql=None, pretty=False, check_command_warning=False): if check_command_warning: @@ -611,7 +611,7 @@ class TestDialect(Validator): write={ "duckdb": "EPOCH(STRPTIME('2020-01-01', '%Y-%m-%d'))", "hive": "UNIX_TIMESTAMP('2020-01-01', 'yyyy-MM-dd')", - "presto": "TO_UNIXTIME(DATE_PARSE('2020-01-01', '%Y-%m-%d'))", + "presto": "TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST('2020-01-01' AS VARCHAR), '%Y-%m-%d')), PARSE_DATETIME(CAST('2020-01-01' AS VARCHAR), 'yyyy-MM-dd')))", "starrocks": "UNIX_TIMESTAMP('2020-01-01', '%Y-%m-%d')", "doris": "UNIX_TIMESTAMP('2020-01-01', '%Y-%m-%d')", }, @@ -700,7 +700,7 @@ class TestDialect(Validator): "hive": "TO_DATE(x)", "postgres": "CAST(x AS DATE)", "presto": "CAST(CAST(x AS TIMESTAMP) AS DATE)", - "snowflake": "CAST(x AS DATE)", + "snowflake": "TO_DATE(x)", "doris": "TO_DATE(x)", "mysql": "DATE(x)", }, @@ -961,6 +961,7 @@ class TestDialect(Validator): "presto": "CAST(x AS DATE)", "spark": "CAST(x AS DATE)", "sqlite": "x", + "tsql": "CAST(x AS DATE)", }, ) self.validate_all( @@ -1509,7 +1510,7 @@ class TestDialect(Validator): "POSITION(needle, haystack, pos)", write={ "drill": "STRPOS(SUBSTR(haystack, pos), needle) + pos - 1", - "presto": "STRPOS(haystack, needle, pos)", + "presto": "STRPOS(SUBSTR(haystack, pos), needle) + pos - 1", "spark": "LOCATE(needle, haystack, pos)", "clickhouse": "position(haystack, needle, pos)", "snowflake": "POSITION(needle, haystack, pos)", @@ -1719,6 +1720,11 @@ class TestDialect(Validator): with self.subTest(f"{expression.__class__.__name__} {dialect} -> {expected}"): self.assertEqual(expected, expression.sql(dialect=dialect)) + self.assertEqual( + parse_one("CAST(x AS DECIMAL) / y", read="mysql").sql(dialect="postgres"), + "CAST(x AS DECIMAL) / NULLIF(y, 0)", + ) + def test_limit(self): self.validate_all( "SELECT * FROM data LIMIT 10, 20", @@ -2054,6 +2060,44 @@ SELECT ) def test_logarithm(self): + for base in (2, 10): + with self.subTest(f"Transpiling LOG base {base}"): + self.validate_all( + f"LOG({base}, a)", + read={ + "": f"LOG{base}(a)", + "bigquery": f"LOG{base}(a)", + "clickhouse": f"LOG{base}(a)", + "databricks": f"LOG{base}(a)", + "duckdb": f"LOG{base}(a)", + "mysql": f"LOG{base}(a)", + "postgres": f"LOG{base}(a)", + "presto": f"LOG{base}(a)", + "spark": f"LOG{base}(a)", + "sqlite": f"LOG{base}(a)", + "trino": f"LOG{base}(a)", + "tsql": f"LOG{base}(a)", + }, + write={ + "bigquery": f"LOG(a, {base})", + "clickhouse": f"LOG{base}(a)", + "duckdb": f"LOG({base}, a)", + "mysql": f"LOG({base}, a)", + "oracle": f"LOG({base}, a)", + "postgres": f"LOG({base}, a)", + "presto": f"LOG{base}(a)", + "redshift": f"LOG({base}, a)", + "snowflake": f"LOG({base}, a)", + "spark2": f"LOG({base}, a)", + "spark": f"LOG({base}, a)", + "sqlite": f"LOG({base}, a)", + "starrocks": f"LOG({base}, a)", + "tableau": f"LOG(a, {base})", + "trino": f"LOG({base}, a)", + "tsql": f"LOG(a, {base})", + }, + ) + self.validate_all( "LOG(x)", read={ @@ -2082,6 +2126,7 @@ SELECT "bigquery": "LOG(n, b)", "databricks": "LOG(b, n)", "drill": "LOG(b, n)", + "duckdb": "LOG(b, n)", "hive": "LOG(b, n)", "mysql": "LOG(b, n)", "oracle": "LOG(b, n)", @@ -2089,8 +2134,13 @@ SELECT "snowflake": "LOG(b, n)", "spark": "LOG(b, n)", "sqlite": "LOG(b, n)", + "trino": "LOG(b, n)", "tsql": "LOG(n, b)", }, + write={ + "clickhouse": UnsupportedError, + "presto": UnsupportedError, + }, ) def test_count_if(self): @@ -2190,7 +2240,28 @@ SELECT "WITH t1(x) AS (SELECT 1) SELECT * FROM (WITH t2(y) AS (SELECT 2) SELECT y FROM t2) AS subq", write={ "duckdb": "WITH t1(x) AS (SELECT 1) SELECT * FROM (WITH t2(y) AS (SELECT 2) SELECT y FROM t2) AS subq", - "tsql": "WITH t1(x) AS (SELECT 1), t2(y) AS (SELECT 2) SELECT * FROM (SELECT y AS y FROM t2) AS subq", + "tsql": "WITH t2(y) AS (SELECT 2), t1(x) AS (SELECT 1) SELECT * FROM (SELECT y AS y FROM t2) AS subq", + }, + ) + self.validate_all( + """ +WITH c AS ( + WITH b AS ( + WITH a1 AS ( + SELECT 1 + ), a2 AS ( + SELECT 2 + ) + SELECT * FROM a1, a2 + ) + SELECT * + FROM b +) +SELECT * +FROM c""", + write={ + "duckdb": "WITH c AS (WITH b AS (WITH a1 AS (SELECT 1), a2 AS (SELECT 2) SELECT * FROM a1, a2) SELECT * FROM b) SELECT * FROM c", + "hive": "WITH a1 AS (SELECT 1), a2 AS (SELECT 2), b AS (SELECT * FROM a1, a2), c AS (SELECT * FROM b) SELECT * FROM c", }, ) @@ -2312,3 +2383,37 @@ SELECT self.validate_identity("TRUNCATE TABLE db.schema.test") self.validate_identity("TRUNCATE TABLE IF EXISTS db.schema.test") self.validate_identity("TRUNCATE TABLE t1, t2, t3") + + def test_create_sequence(self): + self.validate_identity("CREATE SEQUENCE seq") + self.validate_identity( + "CREATE TEMPORARY SEQUENCE seq AS SMALLINT START WITH 3 INCREMENT BY 2 MINVALUE 1 MAXVALUE 10 CACHE 1 NO CYCLE OWNED BY table.col" + ) + self.validate_identity( + "CREATE SEQUENCE seq START WITH 1 NO MINVALUE NO MAXVALUE CYCLE NO CACHE" + ) + self.validate_identity("CREATE OR REPLACE TEMPORARY SEQUENCE seq INCREMENT BY 1 NO CYCLE") + self.validate_identity( + "CREATE OR REPLACE SEQUENCE IF NOT EXISTS seq COMMENT='test comment' ORDER" + ) + self.validate_identity( + "CREATE SEQUENCE schema.seq SHARING=METADATA NOORDER NOKEEP SCALE EXTEND SHARD EXTEND SESSION" + ) + self.validate_identity( + "CREATE SEQUENCE schema.seq SHARING=DATA ORDER KEEP NOSCALE NOSHARD GLOBAL" + ) + self.validate_identity( + "CREATE SEQUENCE schema.seq SHARING=DATA NOCACHE NOCYCLE SCALE NOEXTEND" + ) + self.validate_identity( + """CREATE TEMPORARY SEQUENCE seq AS BIGINT INCREMENT BY 2 MINVALUE 1 CACHE 1 NOMAXVALUE NO CYCLE OWNED BY NONE""", + """CREATE TEMPORARY SEQUENCE seq AS BIGINT INCREMENT BY 2 MINVALUE 1 CACHE 1 NOMAXVALUE NO CYCLE""", + ) + self.validate_identity( + """CREATE TEMPORARY SEQUENCE seq START 1""", + """CREATE TEMPORARY SEQUENCE seq START WITH 1""", + ) + self.validate_identity( + """CREATE TEMPORARY SEQUENCE seq START WITH = 1 INCREMENT BY = 2""", + """CREATE TEMPORARY SEQUENCE seq START WITH 1 INCREMENT BY 2""", + ) diff --git a/tests/dialects/test_drill.py b/tests/dialects/test_drill.py index 41c02fb..634c247 100644 --- a/tests/dialects/test_drill.py +++ b/tests/dialects/test_drill.py @@ -5,68 +5,17 @@ class TestDrill(Validator): dialect = "drill" def test_drill(self): - self.validate_all( - "DATE_FORMAT(a, 'yyyy')", - write={"drill": "TO_CHAR(a, 'yyyy')"}, + self.validate_identity( + "SELECT * FROM table(dfs.`test_data.xlsx`(type => 'excel', sheetName => 'secondSheet'))" + ) + self.validate_identity( + "SELECT * FROM (SELECT * FROM t) PIVOT(avg(c1) AS ac1 FOR c2 IN ('V' AS v))", ) - def test_string_literals(self): self.validate_all( "SELECT '2021-01-01' + INTERVAL 1 MONTH", write={ + "drill": "SELECT '2021-01-01' + INTERVAL '1' MONTH", "mysql": "SELECT '2021-01-01' + INTERVAL '1' MONTH", }, ) - - def test_quotes(self): - self.validate_all( - "'\\''", - write={ - "duckdb": "''''", - "presto": "''''", - "hive": "'\\''", - "spark": "'\\''", - }, - ) - self.validate_all( - "'\"x\"'", - write={ - "duckdb": "'\"x\"'", - "presto": "'\"x\"'", - "hive": "'\"x\"'", - "spark": "'\"x\"'", - }, - ) - self.validate_all( - "'\\\\a'", - read={ - "presto": "'\\\\a'", - }, - write={ - "duckdb": "'\\\\a'", - "presto": "'\\\\a'", - "hive": "'\\\\a'", - "spark": "'\\\\a'", - }, - ) - - def test_table_function(self): - self.validate_all( - "SELECT * FROM table( dfs.`test_data.xlsx` (type => 'excel', sheetName => 'secondSheet'))", - write={ - "drill": "SELECT * FROM table(dfs.`test_data.xlsx`(type => 'excel', sheetName => 'secondSheet'))", - }, - ) - - def test_validate_pivot(self): - self.validate_all( - "SELECT * FROM (SELECT education_level, salary, marital_status, " - "EXTRACT(year FROM age(birth_date)) age FROM cp.`employee.json`) PIVOT (avg(salary) AS " - "avg_salary, avg(age) AS avg_age FOR marital_status IN ('M' married, 'S' single))", - write={ - "drill": "SELECT * FROM (SELECT education_level, salary, marital_status, " - "EXTRACT(year FROM age(birth_date)) AS age FROM cp.`employee.json`) " - "PIVOT(avg(salary) AS avg_salary, avg(age) AS avg_age FOR marital_status " - "IN ('M' AS married, 'S' AS single))" - }, - ) diff --git a/tests/dialects/test_duckdb.py b/tests/dialects/test_duckdb.py index 58d1f06..5a7e93e 100644 --- a/tests/dialects/test_duckdb.py +++ b/tests/dialects/test_duckdb.py @@ -1,5 +1,6 @@ from sqlglot import ErrorLevel, UnsupportedError, exp, parse_one, transpile from sqlglot.helper import logger as helper_logger +from sqlglot.optimizer.annotate_types import annotate_types from tests.dialects.test_dialect import Validator @@ -7,6 +8,31 @@ class TestDuckDB(Validator): dialect = "duckdb" def test_duckdb(self): + query = "WITH _data AS (SELECT [{'a': 1, 'b': 2}, {'a': 2, 'b': 3}] AS col) SELECT t.col['b'] FROM _data, UNNEST(_data.col) AS t(col) WHERE t.col['a'] = 1" + expr = annotate_types(self.validate_identity(query)) + self.assertEqual( + expr.sql(dialect="bigquery"), + "WITH _data AS (SELECT [STRUCT(1 AS a, 2 AS b), STRUCT(2 AS a, 3 AS b)] AS col) SELECT col.b FROM _data, UNNEST(_data.col) AS col WHERE col.a = 1", + ) + + self.validate_all( + "SELECT CAST('2020-01-01' AS DATE) + INTERVAL (day_offset) DAY FROM t", + read={ + "duckdb": "SELECT CAST('2020-01-01' AS DATE) + INTERVAL (day_offset) DAY FROM t", + "mysql": "SELECT DATE '2020-01-01' + INTERVAL day_offset DAY FROM t", + }, + ) + self.validate_all( + "SELECT CAST('09:05:03' AS TIME) + INTERVAL 2 HOUR", + read={ + "bigquery": "SELECT TIME_ADD(CAST('09:05:03' AS TIME), INTERVAL 2 HOUR)", + "snowflake": "SELECT TIMEADD(HOUR, 2, TO_TIME('09:05:03'))", + }, + write={ + "duckdb": "SELECT CAST('09:05:03' AS TIME) + INTERVAL '2' HOUR", + "snowflake": "SELECT CAST('09:05:03' AS TIME) + INTERVAL '2 HOUR'", + }, + ) self.validate_all( 'STRUCT_PACK("a b" := 1)', write={ @@ -15,7 +41,25 @@ class TestDuckDB(Validator): "snowflake": "OBJECT_CONSTRUCT('a b', 1)", }, ) - + self.validate_all( + "ARRAY_TO_STRING(arr, delim)", + read={ + "bigquery": "ARRAY_TO_STRING(arr, delim)", + "postgres": "ARRAY_TO_STRING(arr, delim)", + "presto": "ARRAY_JOIN(arr, delim)", + "snowflake": "ARRAY_TO_STRING(arr, delim)", + "spark": "ARRAY_JOIN(arr, delim)", + }, + write={ + "bigquery": "ARRAY_TO_STRING(arr, delim)", + "duckdb": "ARRAY_TO_STRING(arr, delim)", + "postgres": "ARRAY_TO_STRING(arr, delim)", + "presto": "ARRAY_JOIN(arr, delim)", + "snowflake": "ARRAY_TO_STRING(arr, delim)", + "spark": "ARRAY_JOIN(arr, delim)", + "tsql": "STRING_AGG(arr, delim)", + }, + ) self.validate_all( "SELECT SUM(X) OVER (ORDER BY x)", write={ @@ -130,13 +174,6 @@ class TestDuckDB(Validator): "snowflake": """SELECT GET_PATH(GET_PATH(PARSE_JSON('{"fruit": {"foo": "banana"}}'), 'fruit'), 'foo')""", }, ) - self.validate_all( - "WITH _data AS (SELECT [{'a': 1, 'b': 2}, {'a': 2, 'b': 3}] AS col) SELECT (SELECT col['b'] FROM UNNEST(col) AS t(col) WHERE col['a'] = 1) FROM _data", - write={ - "bigquery": "WITH _data AS (SELECT [STRUCT(1 AS a, 2 AS b), STRUCT(2 AS a, 3 AS b)] AS col) SELECT (SELECT col.b FROM UNNEST(col) AS col WHERE col.a = 1) FROM _data", - "duckdb": "WITH _data AS (SELECT [{'a': 1, 'b': 2}, {'a': 2, 'b': 3}] AS col) SELECT (SELECT col['b'] FROM UNNEST(col) AS t(col) WHERE col['a'] = 1) FROM _data", - }, - ) self.validate_all( "SELECT {'bla': column1, 'foo': column2, 'bar': column3} AS data FROM source_table", read={ @@ -201,6 +238,9 @@ class TestDuckDB(Validator): parse_one("a // b", read="duckdb").assert_is(exp.IntDiv).sql(dialect="duckdb"), "a // b" ) + self.validate_identity("SELECT MAP(['key1', 'key2', 'key3'], [10, 20, 30])") + self.validate_identity("SELECT MAP {'x': 1}") + self.validate_identity("SELECT df1.*, df2.* FROM df1 POSITIONAL JOIN df2") self.validate_identity("MAKE_TIMESTAMP(1992, 9, 20, 13, 34, 27.123456)") self.validate_identity("MAKE_TIMESTAMP(1667810584123456)") self.validate_identity("SELECT EPOCH_MS(10) AS t") @@ -234,6 +274,18 @@ class TestDuckDB(Validator): """SELECT '{"foo": [1, 2, 3]}' -> 'foo' -> 0""", """SELECT '{"foo": [1, 2, 3]}' -> '$.foo' -> '$[0]'""", ) + self.validate_identity( + "SELECT ($$hello)'world$$)", + "SELECT ('hello)''world')", + ) + self.validate_identity( + "SELECT $$foo$$", + "SELECT 'foo'", + ) + self.validate_identity( + "SELECT $tag$foo$tag$", + "SELECT 'foo'", + ) self.validate_identity( "JSON_EXTRACT(x, '$.family')", "x -> '$.family'", @@ -679,7 +731,19 @@ class TestDuckDB(Validator): }, ) self.validate_identity( - "[x.STRING_SPLIT(' ')[1] FOR x IN ['1', '2', 3] IF x.CONTAINS('1')]" + "[x.STRING_SPLIT(' ')[i] FOR x IN ['1', '2', 3] IF x.CONTAINS('1')]" + ) + self.validate_identity( + """SELECT LIST_VALUE(1)[i]""", + """SELECT ([1])[i]""", + ) + self.validate_identity( + """{'x': LIST_VALUE(1)[i]}""", + """{'x': ([1])[i]}""", + ) + self.validate_identity( + """SELECT LIST_APPLY(RANGE(1, 4), i -> {'f1': LIST_VALUE(1, 2, 3)[i], 'f2': LIST_VALUE(1, 2, 3)[i]})""", + """SELECT LIST_APPLY(RANGE(1, 4), i -> {'f1': ([1, 2, 3])[i], 'f2': ([1, 2, 3])[i]})""", ) self.assertEqual( @@ -689,8 +753,6 @@ class TestDuckDB(Validator): "WARNING:sqlglot:Applying array index offset (1)", "WARNING:sqlglot:Applying array index offset (1)", "WARNING:sqlglot:Applying array index offset (1)", - "WARNING:sqlglot:Applying array index offset (-1)", - "WARNING:sqlglot:Applying array index offset (1)", ], ) @@ -702,7 +764,7 @@ class TestDuckDB(Validator): "SELECT MAKE_DATE(2016, 12, 25)", read={"bigquery": "SELECT DATE(2016, 12, 25)"} ) self.validate_all( - "SELECT CAST(CAST('2016-12-25 23:59:59' AS DATETIME) AS DATE)", + "SELECT CAST(CAST('2016-12-25 23:59:59' AS TIMESTAMP) AS DATE)", read={"bigquery": "SELECT DATE(DATETIME '2016-12-25 23:59:59')"}, ) self.validate_all( @@ -724,7 +786,7 @@ class TestDuckDB(Validator): write={"duckdb": "SELECT (90 * INTERVAL '1' DAY)"}, ) self.validate_all( - "SELECT ((DATE_TRUNC('DAY', CAST(CAST(DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS DATE) AS TIMESTAMP) + INTERVAL (0 - MOD((DAYOFWEEK(CAST(CAST(DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS DATE) AS TIMESTAMP)) % 7) - 1 + 7, 7)) DAY) + (7 * INTERVAL (-5) DAY))) AS t1", + "SELECT ((DATE_TRUNC('DAY', CAST(CAST(DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS DATE) AS TIMESTAMP) + INTERVAL (0 - (DAYOFWEEK(CAST(CAST(DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS DATE) AS TIMESTAMP)) % 7) - 1 + 7 % 7) DAY) + (7 * INTERVAL (-5) DAY))) AS t1", read={ "presto": "SELECT ((DATE_ADD('week', -5, DATE_TRUNC('DAY', DATE_ADD('day', (0 - MOD((DAY_OF_WEEK(CAST(CAST(DATE_TRUNC('DAY', NOW()) AS DATE) AS TIMESTAMP)) % 7) - 1 + 7, 7)), CAST(CAST(DATE_TRUNC('DAY', NOW()) AS DATE) AS TIMESTAMP)))))) AS t1", }, @@ -952,7 +1014,7 @@ class TestDuckDB(Validator): "hive": "CAST(COL AS ARRAY)", "spark": "CAST(COL AS ARRAY)", "postgres": "CAST(COL AS BIGINT[])", - "snowflake": "CAST(COL AS ARRAY)", + "snowflake": "CAST(COL AS ARRAY(BIGINT))", }, ) self.validate_all( diff --git a/tests/dialects/test_hive.py b/tests/dialects/test_hive.py index b892dd6..33294ee 100644 --- a/tests/dialects/test_hive.py +++ b/tests/dialects/test_hive.py @@ -235,15 +235,18 @@ class TestHive(Validator): }, ) self.validate_all( - "'\\\\a'", + "'\\\\\\\\a'", read={ + "drill": "'\\\\\\\\a'", + "duckdb": "'\\\\a'", "presto": "'\\\\a'", }, write={ + "drill": "'\\\\\\\\a'", "duckdb": "'\\\\a'", + "hive": "'\\\\\\\\a'", "presto": "'\\\\a'", - "hive": "'\\\\a'", - "spark": "'\\\\a'", + "spark": "'\\\\\\\\a'", }, ) @@ -369,7 +372,7 @@ class TestHive(Validator): "UNIX_TIMESTAMP(x)", write={ "duckdb": "EPOCH(STRPTIME(x, '%Y-%m-%d %H:%M:%S'))", - "presto": "TO_UNIXTIME(DATE_PARSE(x, '%Y-%m-%d %T'))", + "presto": "TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST(x AS VARCHAR), '%Y-%m-%d %T')), PARSE_DATETIME(CAST(x AS VARCHAR), 'yyyy-MM-dd HH:mm:ss')))", "hive": "UNIX_TIMESTAMP(x)", "spark": "UNIX_TIMESTAMP(x)", "": "STR_TO_UNIX(x, '%Y-%m-%d %H:%M:%S')", @@ -563,7 +566,7 @@ class TestHive(Validator): "LOCATE('a', x, 3)", write={ "duckdb": "STRPOS(SUBSTR(x, 3), 'a') + 3 - 1", - "presto": "STRPOS(x, 'a', 3)", + "presto": "STRPOS(SUBSTR(x, 3), 'a') + 3 - 1", "hive": "LOCATE('a', x, 3)", "spark": "LOCATE('a', x, 3)", }, @@ -653,15 +656,6 @@ class TestHive(Validator): "spark": "LN(10)", }, ) - self.validate_all( - "LOG(10, 2)", - write={ - "duckdb": "LOG(10, 2)", - "presto": "LOG(10, 2)", - "hive": "LOG(10, 2)", - "spark": "LOG(10, 2)", - }, - ) self.validate_all( 'ds = "2020-01-01"', write={ @@ -745,13 +739,12 @@ class TestHive(Validator): ) def test_escapes(self) -> None: - self.validate_identity("'\n'") + self.validate_identity("'\n'", "'\\n'") self.validate_identity("'\\n'") - self.validate_identity("'\\\n'") + self.validate_identity("'\\\n'", "'\\\\\\n'") self.validate_identity("'\\\\n'") self.validate_identity("''") self.validate_identity("'\\\\'") - self.validate_identity("'\\z'") self.validate_identity("'\\\\z'") def test_data_type(self): diff --git a/tests/dialects/test_mysql.py b/tests/dialects/test_mysql.py index 5f23c44..7a9d6bf 100644 --- a/tests/dialects/test_mysql.py +++ b/tests/dialects/test_mysql.py @@ -85,10 +85,17 @@ class TestMySQL(Validator): "ALTER TABLE test_table ALTER COLUMN test_column SET DATA TYPE LONGTEXT", "ALTER TABLE test_table MODIFY COLUMN test_column LONGTEXT", ) + self.validate_identity( + "ALTER TABLE test_table MODIFY COLUMN test_column LONGTEXT", + ) self.validate_identity( "CREATE TABLE t (c DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP) DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC", "CREATE TABLE t (c DATETIME DEFAULT CURRENT_TIMESTAMP() ON UPDATE CURRENT_TIMESTAMP()) DEFAULT CHARACTER SET=utf8 ROW_FORMAT=DYNAMIC", ) + self.validate_identity( + "CREATE TABLE `foo` (a VARCHAR(10), KEY idx_a (a DESC))", + "CREATE TABLE `foo` (a VARCHAR(10), INDEX idx_a (a DESC))", + ) self.validate_all( "CREATE TABLE z (a INT) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x'", @@ -510,9 +517,8 @@ class TestMySQL(Validator): ) def test_mysql_time(self): - self.validate_identity("FROM_UNIXTIME(a, b)") - self.validate_identity("FROM_UNIXTIME(a, b, c)") self.validate_identity("TIME_STR_TO_UNIX(x)", "UNIX_TIMESTAMP(x)") + self.validate_identity("SELECT FROM_UNIXTIME(1711366265, '%Y %D %M')") self.validate_all( "SELECT TO_DAYS(x)", write={ @@ -578,6 +584,17 @@ class TestMySQL(Validator): self.validate_all( "STR_TO_DATE(x, '%Y-%m-%dT%T')", write={"presto": "DATE_PARSE(x, '%Y-%m-%dT%T')"} ) + self.validate_all( + "SELECT FROM_UNIXTIME(col)", + read={ + "postgres": "SELECT TO_TIMESTAMP(col)", + }, + write={ + "mysql": "SELECT FROM_UNIXTIME(col)", + "postgres": "SELECT TO_TIMESTAMP(col)", + "redshift": "SELECT (TIMESTAMP 'epoch' + col * INTERVAL '1 SECOND')", + }, + ) def test_mysql(self): self.validate_all( @@ -723,6 +740,52 @@ class TestMySQL(Validator): "postgres": "STRING_AGG(DISTINCT x, '' ORDER BY y DESC NULLS LAST)", }, ) + self.validate_all( + "GROUP_CONCAT(a, b, c SEPARATOR ',')", + write={ + "mysql": "GROUP_CONCAT(CONCAT(a, b, c) SEPARATOR ',')", + "sqlite": "GROUP_CONCAT(a || b || c, ',')", + "tsql": "STRING_AGG(CONCAT(a, b, c), ',')", + "postgres": "STRING_AGG(CONCAT(a, b, c), ',')", + "presto": "ARRAY_JOIN(ARRAY_AGG(CONCAT(CAST(a AS VARCHAR), CAST(b AS VARCHAR), CAST(c AS VARCHAR))), ',')", + }, + ) + self.validate_all( + "GROUP_CONCAT(a, b, c SEPARATOR '')", + write={ + "mysql": "GROUP_CONCAT(CONCAT(a, b, c) SEPARATOR '')", + "sqlite": "GROUP_CONCAT(a || b || c, '')", + "tsql": "STRING_AGG(CONCAT(a, b, c), '')", + "postgres": "STRING_AGG(CONCAT(a, b, c), '')", + }, + ) + self.validate_all( + "GROUP_CONCAT(DISTINCT a, b, c SEPARATOR '')", + write={ + "mysql": "GROUP_CONCAT(DISTINCT CONCAT(a, b, c) SEPARATOR '')", + "sqlite": "GROUP_CONCAT(DISTINCT a || b || c, '')", + "tsql": "STRING_AGG(CONCAT(a, b, c), '')", + "postgres": "STRING_AGG(DISTINCT CONCAT(a, b, c), '')", + }, + ) + self.validate_all( + "GROUP_CONCAT(a, b, c ORDER BY d SEPARATOR '')", + write={ + "mysql": "GROUP_CONCAT(CONCAT(a, b, c) ORDER BY d SEPARATOR '')", + "sqlite": "GROUP_CONCAT(a || b || c, '')", + "tsql": "STRING_AGG(CONCAT(a, b, c), '') WITHIN GROUP (ORDER BY d)", + "postgres": "STRING_AGG(CONCAT(a, b, c), '' ORDER BY d NULLS FIRST)", + }, + ) + self.validate_all( + "GROUP_CONCAT(DISTINCT a, b, c ORDER BY d SEPARATOR '')", + write={ + "mysql": "GROUP_CONCAT(DISTINCT CONCAT(a, b, c) ORDER BY d SEPARATOR '')", + "sqlite": "GROUP_CONCAT(DISTINCT a || b || c, '')", + "tsql": "STRING_AGG(CONCAT(a, b, c), '') WITHIN GROUP (ORDER BY d)", + "postgres": "STRING_AGG(DISTINCT CONCAT(a, b, c), '' ORDER BY d NULLS FIRST)", + }, + ) self.validate_identity( "CREATE TABLE z (a INT) ENGINE=InnoDB AUTO_INCREMENT=1 CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x'" ) diff --git a/tests/dialects/test_oracle.py b/tests/dialects/test_oracle.py index 9438507..526b0b5 100644 --- a/tests/dialects/test_oracle.py +++ b/tests/dialects/test_oracle.py @@ -94,7 +94,20 @@ class TestOracle(Validator): "SELECT * FROM t SAMPLE (0.25)", ) self.validate_identity("SELECT TO_CHAR(-100, 'L99', 'NL_CURRENCY = '' AusDollars '' ')") + self.validate_identity( + "SELECT * FROM t START WITH col CONNECT BY NOCYCLE PRIOR col1 = col2" + ) + self.validate_all( + "CURRENT_TIMESTAMP BETWEEN TO_DATE(f.C_SDATE, 'yyyy/mm/dd') AND TO_DATE(f.C_EDATE, 'yyyy/mm/dd')", + read={ + "postgres": "CURRENT_TIMESTAMP BETWEEN TO_DATE(f.C_SDATE, 'yyyy/mm/dd') AND TO_DATE(f.C_EDATE, 'yyyy/mm/dd')", + }, + write={ + "oracle": "CURRENT_TIMESTAMP BETWEEN TO_DATE(f.C_SDATE, 'yyyy/mm/dd') AND TO_DATE(f.C_EDATE, 'yyyy/mm/dd')", + "postgres": "CURRENT_TIMESTAMP BETWEEN TO_DATE(f.C_SDATE, 'yyyy/mm/dd') AND TO_DATE(f.C_EDATE, 'yyyy/mm/dd')", + }, + ) self.validate_all( "TO_CHAR(x)", write={ @@ -102,6 +115,59 @@ class TestOracle(Validator): "oracle": "TO_CHAR(x)", }, ) + self.validate_all( + "TO_NUMBER(expr, fmt, nlsparam)", + read={ + "teradata": "TO_NUMBER(expr, fmt, nlsparam)", + }, + write={ + "oracle": "TO_NUMBER(expr, fmt, nlsparam)", + "teradata": "TO_NUMBER(expr, fmt, nlsparam)", + }, + ) + self.validate_all( + "TO_NUMBER(x)", + write={ + "bigquery": "CAST(x AS FLOAT64)", + "doris": "CAST(x AS DOUBLE)", + "drill": "CAST(x AS DOUBLE)", + "duckdb": "CAST(x AS DOUBLE)", + "hive": "CAST(x AS DOUBLE)", + "mysql": "CAST(x AS DOUBLE)", + "oracle": "TO_NUMBER(x)", + "postgres": "CAST(x AS DOUBLE PRECISION)", + "presto": "CAST(x AS DOUBLE)", + "redshift": "CAST(x AS DOUBLE PRECISION)", + "snowflake": "TO_NUMBER(x)", + "spark": "CAST(x AS DOUBLE)", + "spark2": "CAST(x AS DOUBLE)", + "starrocks": "CAST(x AS DOUBLE)", + "tableau": "CAST(x AS DOUBLE)", + "teradata": "TO_NUMBER(x)", + }, + ) + self.validate_all( + "TO_NUMBER(x, fmt)", + read={ + "databricks": "TO_NUMBER(x, fmt)", + "drill": "TO_NUMBER(x, fmt)", + "postgres": "TO_NUMBER(x, fmt)", + "snowflake": "TO_NUMBER(x, fmt)", + "spark": "TO_NUMBER(x, fmt)", + "redshift": "TO_NUMBER(x, fmt)", + "teradata": "TO_NUMBER(x, fmt)", + }, + write={ + "databricks": "TO_NUMBER(x, fmt)", + "drill": "TO_NUMBER(x, fmt)", + "oracle": "TO_NUMBER(x, fmt)", + "postgres": "TO_NUMBER(x, fmt)", + "snowflake": "TO_NUMBER(x, fmt)", + "spark": "TO_NUMBER(x, fmt)", + "redshift": "TO_NUMBER(x, fmt)", + "teradata": "TO_NUMBER(x, fmt)", + }, + ) self.validate_all( "SELECT TO_CHAR(TIMESTAMP '1999-12-01 10:00:00')", write={ @@ -210,6 +276,8 @@ class TestOracle(Validator): self.validate_identity( "SELECT /*+ LEADING(e j) */ * FROM employees e, departments d, job_history j WHERE e.department_id = d.department_id AND e.hire_date = j.start_date" ) + self.validate_identity("INSERT /*+ APPEND */ INTO IAP_TBL (id, col1) VALUES (2, 'test2')") + self.validate_identity("INSERT /*+ APPEND_VALUES */ INTO dest_table VALUES (i, 'Value')") def test_xml_table(self): self.validate_identity("XMLTABLE('x')") diff --git a/tests/dialects/test_postgres.py b/tests/dialects/test_postgres.py index 1d0ea8b..7a41cef 100644 --- a/tests/dialects/test_postgres.py +++ b/tests/dialects/test_postgres.py @@ -1,4 +1,4 @@ -from sqlglot import ParseError, UnsupportedError, exp, parse_one, transpile +from sqlglot import ParseError, UnsupportedError, exp, transpile from sqlglot.helper import logger as helper_logger from tests.dialects.test_dialect import Validator @@ -12,27 +12,12 @@ class TestPostgres(Validator): self.validate_identity("|/ x", "SQRT(x)") self.validate_identity("||/ x", "CBRT(x)") - expr = parse_one( - "SELECT * FROM r CROSS JOIN LATERAL UNNEST(ARRAY[1]) AS s(location)", read="postgres" - ) + expr = self.parse_one("SELECT * FROM r CROSS JOIN LATERAL UNNEST(ARRAY[1]) AS s(location)") unnest = expr.args["joins"][0].this.this unnest.assert_is(exp.Unnest) alter_table_only = """ALTER TABLE ONLY "Album" ADD CONSTRAINT "FK_AlbumArtistId" FOREIGN KEY ("ArtistId") REFERENCES "Artist" ("ArtistId") ON DELETE NO ACTION ON UPDATE NO ACTION""" - expr = parse_one(alter_table_only, read="postgres") - - # Checks that user-defined types are parsed into DataType instead of Identifier - parse_one("CREATE TABLE t (a udt)", read="postgres").this.expressions[0].args[ - "kind" - ].assert_is(exp.DataType) - - # Checks that OID is parsed into a DataType (ObjectIdentifier) - self.assertIsInstance( - parse_one("CREATE TABLE public.propertydata (propertyvalue oid)", read="postgres").find( - exp.DataType - ), - exp.ObjectIdentifier, - ) + expr = self.parse_one(alter_table_only) self.assertIsInstance(expr, exp.AlterTable) self.assertEqual(expr.sql(dialect="postgres"), alter_table_only) @@ -55,13 +40,6 @@ class TestPostgres(Validator): self.validate_identity("CAST(x AS DATEMULTIRANGE)") self.validate_identity("SELECT ARRAY[1, 2, 3] @> ARRAY[1, 2]") self.validate_identity("SELECT ARRAY[1, 2, 3] <@ ARRAY[1, 2]") - self.validate_all( - "SELECT ARRAY[1, 2, 3] && ARRAY[1, 2]", - write={ - "": "SELECT ARRAY_OVERLAPS(ARRAY(1, 2, 3), ARRAY(1, 2))", - "postgres": "SELECT ARRAY[1, 2, 3] && ARRAY[1, 2]", - }, - ) self.validate_identity("x$") self.validate_identity("SELECT ARRAY[1, 2, 3]") self.validate_identity("SELECT ARRAY(SELECT 1)") @@ -85,6 +63,12 @@ class TestPostgres(Validator): self.validate_identity("EXEC AS myfunc @id = 123", check_command_warning=True) self.validate_identity("SELECT CURRENT_USER") self.validate_identity("SELECT * FROM ONLY t1") + self.validate_identity( + """UPDATE "x" SET "y" = CAST('0 days 60.000000 seconds' AS INTERVAL) WHERE "x"."id" IN (2, 3)""" + ) + self.validate_identity( + "WITH t1 AS MATERIALIZED (SELECT 1), t2 AS NOT MATERIALIZED (SELECT 2) SELECT * FROM t1, t2" + ) self.validate_identity( """LAST_VALUE("col1") OVER (ORDER BY "col2" RANGE BETWEEN INTERVAL '1 DAY' PRECEDING AND '1 month' FOLLOWING)""" ) @@ -106,9 +90,6 @@ class TestPostgres(Validator): self.validate_identity( "SELECT SUM(x) OVER a, SUM(y) OVER b FROM c WINDOW a AS (PARTITION BY d), b AS (PARTITION BY e)" ) - self.validate_identity( - "CREATE TABLE A (LIKE B INCLUDING CONSTRAINT INCLUDING COMPRESSION EXCLUDING COMMENTS)" - ) self.validate_identity( "SELECT CASE WHEN SUBSTRING('abcdefg' FROM 1) IN ('ab') THEN 1 ELSE 0 END" ) @@ -166,6 +147,10 @@ class TestPostgres(Validator): "SELECT $$Dianne's horse$$", "SELECT 'Dianne''s horse'", ) + self.validate_identity( + "SELECT $$The price is $9.95$$ AS msg", + "SELECT 'The price is $9.95' AS msg", + ) self.validate_identity( "COMMENT ON TABLE mytable IS $$doc this$$", "COMMENT ON TABLE mytable IS 'doc this'" ) @@ -328,6 +313,36 @@ class TestPostgres(Validator): ) self.validate_identity("SELECT * FROM t1*", "SELECT * FROM t1") + self.validate_all( + 'SELECT * FROM "test_table" ORDER BY RANDOM() LIMIT 5', + write={ + "bigquery": "SELECT * FROM `test_table` ORDER BY RAND() NULLS LAST LIMIT 5", + "duckdb": 'SELECT * FROM "test_table" ORDER BY RANDOM() LIMIT 5', + "postgres": 'SELECT * FROM "test_table" ORDER BY RANDOM() LIMIT 5', + "tsql": "SELECT TOP 5 * FROM [test_table] ORDER BY RAND()", + }, + ) + self.validate_all( + "SELECT (data -> 'en-US') AS acat FROM my_table", + write={ + "duckdb": """SELECT (data -> '$."en-US"') AS acat FROM my_table""", + "postgres": "SELECT (data -> 'en-US') AS acat FROM my_table", + }, + ) + self.validate_all( + "SELECT (data ->> 'en-US') AS acat FROM my_table", + write={ + "duckdb": """SELECT (data ->> '$."en-US"') AS acat FROM my_table""", + "postgres": "SELECT (data ->> 'en-US') AS acat FROM my_table", + }, + ) + self.validate_all( + "SELECT ARRAY[1, 2, 3] && ARRAY[1, 2]", + write={ + "": "SELECT ARRAY_OVERLAPS(ARRAY(1, 2, 3), ARRAY(1, 2))", + "postgres": "SELECT ARRAY[1, 2, 3] && ARRAY[1, 2]", + }, + ) self.validate_all( "SELECT JSON_EXTRACT_PATH_TEXT(x, k1, k2, k3) FROM t", read={ @@ -517,15 +532,6 @@ class TestPostgres(Validator): "tsql": "SELECT * FROM t CROSS JOIN GENERATE_SERIES(2, 4) AS s", }, ) - self.validate_all( - "CREATE TABLE x (a UUID, b BYTEA)", - write={ - "duckdb": "CREATE TABLE x (a UUID, b BLOB)", - "presto": "CREATE TABLE x (a UUID, b VARBINARY)", - "hive": "CREATE TABLE x (a UUID, b BINARY)", - "spark": "CREATE TABLE x (a UUID, b BINARY)", - }, - ) self.validate_all( "SELECT * FROM x FETCH 1 ROW", write={ @@ -635,11 +641,30 @@ class TestPostgres(Validator): "postgres": "x / y ^ z", }, ) - - self.assertIsInstance(parse_one("id::UUID", read="postgres"), exp.Cast) + self.validate_all( + "CAST(x AS NAME)", + read={ + "redshift": "CAST(x AS NAME)", + }, + write={ + "postgres": "CAST(x AS NAME)", + "redshift": "CAST(x AS NAME)", + }, + ) + self.assertIsInstance(self.parse_one("id::UUID"), exp.Cast) def test_ddl(self): - expr = parse_one("CREATE TABLE t (x INTERVAL day)", read="postgres") + # Checks that user-defined types are parsed into DataType instead of Identifier + self.parse_one("CREATE TABLE t (a udt)").this.expressions[0].args["kind"].assert_is( + exp.DataType + ) + + # Checks that OID is parsed into a DataType (ObjectIdentifier) + self.assertIsInstance( + self.parse_one("CREATE TABLE p.t (c oid)").find(exp.DataType), exp.ObjectIdentifier + ) + + expr = self.parse_one("CREATE TABLE t (x INTERVAL day)") cdef = expr.find(exp.ColumnDef) cdef.args["kind"].assert_is(exp.DataType) self.assertEqual(expr.sql(dialect="postgres"), "CREATE TABLE t (x INTERVAL DAY)") @@ -666,6 +691,21 @@ class TestPostgres(Validator): self.validate_identity("TRUNCATE TABLE t1 RESTRICT") self.validate_identity("TRUNCATE TABLE t1 CONTINUE IDENTITY CASCADE") self.validate_identity("TRUNCATE TABLE t1 RESTART IDENTITY RESTRICT") + self.validate_identity( + "CREATE TABLE t (vid INT NOT NULL, CONSTRAINT ht_vid_nid_fid_idx EXCLUDE (INT4RANGE(vid, nid) WITH &&, INT4RANGE(fid, fid, '[]') WITH &&))" + ) + self.validate_identity( + "CREATE TABLE t (i INT, PRIMARY KEY (i), EXCLUDE USING gist(col varchar_pattern_ops DESC NULLS LAST WITH &&) WITH (sp1=1, sp2=2))" + ) + self.validate_identity( + "CREATE TABLE t (i INT, EXCLUDE USING btree(INT4RANGE(vid, nid, '[]') ASC NULLS FIRST WITH &&) INCLUDE (col1, col2))" + ) + self.validate_identity( + "CREATE TABLE t (i INT, EXCLUDE USING gin(col1 WITH &&, col2 WITH ||) USING INDEX TABLESPACE tablespace WHERE (id > 5))" + ) + self.validate_identity( + "CREATE TABLE A (LIKE B INCLUDING CONSTRAINT INCLUDING COMPRESSION EXCLUDING COMMENTS)" + ) self.validate_identity( "CREATE TABLE cust_part3 PARTITION OF customers FOR VALUES WITH (MODULUS 3, REMAINDER 2)" ) @@ -691,13 +731,13 @@ class TestPostgres(Validator): "CREATE INDEX index_issues_on_title_trigram ON public.issues USING gin(title public.gin_trgm_ops)" ) self.validate_identity( - "INSERT INTO x VALUES (1, 'a', 2.0) ON CONFLICT (id) DO NOTHING RETURNING *" + "INSERT INTO x VALUES (1, 'a', 2.0) ON CONFLICT(id) DO NOTHING RETURNING *" ) self.validate_identity( - "INSERT INTO x VALUES (1, 'a', 2.0) ON CONFLICT (id) DO UPDATE SET x.id = 1 RETURNING *" + "INSERT INTO x VALUES (1, 'a', 2.0) ON CONFLICT(id) DO UPDATE SET x.id = 1 RETURNING *" ) self.validate_identity( - "INSERT INTO x VALUES (1, 'a', 2.0) ON CONFLICT (id) DO UPDATE SET x.id = excluded.id RETURNING *" + "INSERT INTO x VALUES (1, 'a', 2.0) ON CONFLICT(id) DO UPDATE SET x.id = excluded.id RETURNING *" ) self.validate_identity( "INSERT INTO x VALUES (1, 'a', 2.0) ON CONFLICT ON CONSTRAINT pkey DO NOTHING RETURNING *" @@ -740,8 +780,7 @@ class TestPostgres(Validator): check_command_warning=True, ) self.validate_identity( - "CREATE UNLOGGED TABLE foo AS WITH t(c) AS (SELECT 1) SELECT * FROM (SELECT c AS c FROM t) AS temp", - check_command_warning=True, + "CREATE UNLOGGED TABLE foo AS WITH t(c) AS (SELECT 1) SELECT * FROM (SELECT c AS c FROM t) AS temp" ) self.validate_identity( "CREATE FUNCTION x(INT) RETURNS INT SET search_path TO 'public'", @@ -793,7 +832,7 @@ class TestPostgres(Validator): ])) ) """, - "CREATE INDEX index_ci_builds_on_commit_id_and_artifacts_expireatandidpartial ON public.ci_builds USING btree(commit_id, artifacts_expire_at, id) WHERE ((CAST((type) AS TEXT) = CAST('Ci::Build' AS TEXT)) AND ((retried = FALSE) OR (retried IS NULL)) AND (CAST((name) AS TEXT) = ANY (ARRAY[CAST((CAST('sast' AS VARCHAR)) AS TEXT), CAST((CAST('dependency_scanning' AS VARCHAR)) AS TEXT), CAST((CAST('sast:container' AS VARCHAR)) AS TEXT), CAST((CAST('container_scanning' AS VARCHAR)) AS TEXT), CAST((CAST('dast' AS VARCHAR)) AS TEXT)])))", + "CREATE INDEX index_ci_builds_on_commit_id_and_artifacts_expireatandidpartial ON public.ci_builds USING btree(commit_id, artifacts_expire_at, id) WHERE ((CAST((type) AS TEXT) = CAST('Ci::Build' AS TEXT)) AND ((retried = FALSE) OR (retried IS NULL)) AND (CAST((name) AS TEXT) = ANY(ARRAY[CAST((CAST('sast' AS VARCHAR)) AS TEXT), CAST((CAST('dependency_scanning' AS VARCHAR)) AS TEXT), CAST((CAST('sast:container' AS VARCHAR)) AS TEXT), CAST((CAST('container_scanning' AS VARCHAR)) AS TEXT), CAST((CAST('dast' AS VARCHAR)) AS TEXT)])))", ) self.validate_identity( "CREATE INDEX index_ci_pipelines_on_project_idandrefandiddesc ON public.ci_pipelines USING btree(project_id, ref, id DESC)" @@ -803,6 +842,16 @@ class TestPostgres(Validator): "TRUNCATE TABLE ONLY t1, t2, ONLY t3, t4, t5 RESTART IDENTITY CASCADE", ) + self.validate_all( + "CREATE TABLE x (a UUID, b BYTEA)", + write={ + "duckdb": "CREATE TABLE x (a UUID, b BLOB)", + "presto": "CREATE TABLE x (a UUID, b VARBINARY)", + "hive": "CREATE TABLE x (a UUID, b BINARY)", + "spark": "CREATE TABLE x (a UUID, b BINARY)", + }, + ) + with self.assertRaises(ParseError): transpile("CREATE TABLE products (price DECIMAL CHECK price > 0)", read="postgres") with self.assertRaises(ParseError): @@ -857,7 +906,7 @@ class TestPostgres(Validator): ) def test_operator(self): - expr = parse_one("1 OPERATOR(+) 2 OPERATOR(*) 3", read="postgres") + expr = self.parse_one("1 OPERATOR(+) 2 OPERATOR(*) 3") expr.left.assert_is(exp.Operator) expr.left.left.assert_is(exp.Literal) @@ -926,8 +975,8 @@ class TestPostgres(Validator): def test_regexp_binary(self): """See https://github.com/tobymao/sqlglot/pull/2404 for details.""" - self.assertIsInstance(parse_one("'thomas' ~ '.*thomas.*'", read="postgres"), exp.Binary) - self.assertIsInstance(parse_one("'thomas' ~* '.*thomas.*'", read="postgres"), exp.Binary) + self.assertIsInstance(self.parse_one("'thomas' ~ '.*thomas.*'"), exp.Binary) + self.assertIsInstance(self.parse_one("'thomas' ~* '.*thomas.*'"), exp.Binary) def test_unnest_json_array(self): trino_input = """ diff --git a/tests/dialects/test_presto.py b/tests/dialects/test_presto.py index 2ea595e..2162499 100644 --- a/tests/dialects/test_presto.py +++ b/tests/dialects/test_presto.py @@ -63,7 +63,7 @@ class TestPresto(Validator): "duckdb": "CAST(a AS INT[])", "presto": "CAST(a AS ARRAY(INTEGER))", "spark": "CAST(a AS ARRAY)", - "snowflake": "CAST(a AS ARRAY)", + "snowflake": "CAST(a AS ARRAY(INT))", }, ) self.validate_all( @@ -82,18 +82,17 @@ class TestPresto(Validator): "duckdb": "CAST([1, 2] AS BIGINT[])", "presto": "CAST(ARRAY[1, 2] AS ARRAY(BIGINT))", "spark": "CAST(ARRAY(1, 2) AS ARRAY)", - "snowflake": "CAST([1, 2] AS ARRAY)", + "snowflake": "CAST([1, 2] AS ARRAY(BIGINT))", }, ) self.validate_all( - "CAST(MAP(ARRAY[1], ARRAY[1]) AS MAP(INT,INT))", + "CAST(MAP(ARRAY['key'], ARRAY[1]) AS MAP(VARCHAR, INT))", write={ - "bigquery": "CAST(MAP([1], [1]) AS MAP)", - "duckdb": "CAST(MAP([1], [1]) AS MAP(INT, INT))", - "presto": "CAST(MAP(ARRAY[1], ARRAY[1]) AS MAP(INTEGER, INTEGER))", - "hive": "CAST(MAP(1, 1) AS MAP)", - "spark": "CAST(MAP_FROM_ARRAYS(ARRAY(1), ARRAY(1)) AS MAP)", - "snowflake": "CAST(OBJECT_CONSTRUCT(1, 1) AS OBJECT)", + "duckdb": "CAST(MAP(['key'], [1]) AS MAP(TEXT, INT))", + "presto": "CAST(MAP(ARRAY['key'], ARRAY[1]) AS MAP(VARCHAR, INTEGER))", + "hive": "CAST(MAP('key', 1) AS MAP)", + "snowflake": "CAST(OBJECT_CONSTRUCT('key', 1) AS MAP(VARCHAR, INT))", + "spark": "CAST(MAP_FROM_ARRAYS(ARRAY('key'), ARRAY(1)) AS MAP)", }, ) self.validate_all( @@ -104,7 +103,7 @@ class TestPresto(Validator): "presto": "CAST(MAP(ARRAY['a', 'b', 'c'], ARRAY[ARRAY[1], ARRAY[2], ARRAY[3]]) AS MAP(VARCHAR, ARRAY(INTEGER)))", "hive": "CAST(MAP('a', ARRAY(1), 'b', ARRAY(2), 'c', ARRAY(3)) AS MAP>)", "spark": "CAST(MAP_FROM_ARRAYS(ARRAY('a', 'b', 'c'), ARRAY(ARRAY(1), ARRAY(2), ARRAY(3))) AS MAP>)", - "snowflake": "CAST(OBJECT_CONSTRUCT('a', [1], 'b', [2], 'c', [3]) AS OBJECT)", + "snowflake": "CAST(OBJECT_CONSTRUCT('a', [1], 'b', [2], 'c', [3]) AS MAP(VARCHAR, ARRAY(INT)))", }, ) self.validate_all( @@ -178,6 +177,17 @@ class TestPresto(Validator): "spark": "ARRAY_JOIN(x, '-', 'a')", }, ) + self.validate_all( + "STRPOS('ABC', 'A', 3)", + read={ + "trino": "STRPOS('ABC', 'A', 3)", + }, + write={ + "presto": "STRPOS('ABC', 'A', 3)", + "trino": "STRPOS('ABC', 'A', 3)", + "snowflake": "POSITION('A', 'ABC')", + }, + ) def test_interval_plural_to_singular(self): # Microseconds, weeks and quarters are not supported in Presto/Trino INTERVAL literals diff --git a/tests/dialects/test_prql.py b/tests/dialects/test_prql.py new file mode 100644 index 0000000..9a42d0c --- /dev/null +++ b/tests/dialects/test_prql.py @@ -0,0 +1,17 @@ +from tests.dialects.test_dialect import Validator + + +class TestPRQL(Validator): + dialect = "prql" + + def test_prql(self): + self.validate_identity("FROM x", "SELECT * FROM x") + self.validate_identity("FROM x DERIVE a + 1", "SELECT *, a + 1 FROM x") + self.validate_identity("FROM x DERIVE x = a + 1", "SELECT *, a + 1 AS x FROM x") + self.validate_identity("FROM x DERIVE {a + 1}", "SELECT *, a + 1 FROM x") + self.validate_identity("FROM x DERIVE {x = a + 1, b}", "SELECT *, a + 1 AS x, b FROM x") + self.validate_identity("FROM x TAKE 10", "SELECT * FROM x LIMIT 10") + self.validate_identity("FROM x TAKE 10 TAKE 5", "SELECT * FROM x LIMIT 5") + self.validate_identity( + "FROM x DERIVE {x = a + 1, b} SELECT {y = x, 2}", "SELECT a + 1 AS y, 2 FROM x" + ) diff --git a/tests/dialects/test_redshift.py b/tests/dialects/test_redshift.py index 506f429..a91f4f9 100644 --- a/tests/dialects/test_redshift.py +++ b/tests/dialects/test_redshift.py @@ -139,6 +139,15 @@ class TestRedshift(Validator): "presto": "LENGTH(x)", }, ) + self.validate_all( + "x LIKE 'abc' || '%'", + read={ + "duckdb": "STARTS_WITH(x, 'abc')", + }, + write={ + "redshift": "x LIKE 'abc' || '%'", + }, + ) self.validate_all( "SELECT SYSDATE", @@ -203,18 +212,6 @@ class TestRedshift(Validator): "redshift": "SELECT CAST('abc' AS CHAR)", }, ) - self.validate_all( - "SELECT * FROM venue WHERE (venuecity, venuestate) IN (('Miami', 'FL'), ('Tampa', 'FL')) ORDER BY venueid", - write={ - "redshift": "SELECT * FROM venue WHERE (venuecity, venuestate) IN (('Miami', 'FL'), ('Tampa', 'FL')) ORDER BY venueid", - }, - ) - self.validate_all( - 'SELECT tablename, "column" FROM pg_table_def WHERE "column" LIKE \'%start\\_%\' LIMIT 5', - write={ - "redshift": 'SELECT tablename, "column" FROM pg_table_def WHERE "column" LIKE \'%start\\_%\' LIMIT 5' - }, - ) self.validate_all( "SELECT DISTINCT ON (a) a, b FROM x ORDER BY c DESC", write={ @@ -293,6 +290,7 @@ class TestRedshift(Validator): ) def test_identity(self): + self.validate_identity("LISTAGG(DISTINCT foo, ', ')") self.validate_identity("CREATE MATERIALIZED VIEW orders AUTO REFRESH YES AS SELECT 1") self.validate_identity("SELECT DATEADD(DAY, 1, 'today')") self.validate_identity("SELECT * FROM #x") @@ -305,6 +303,12 @@ class TestRedshift(Validator): self.validate_identity("CREATE TABLE datetable (start_date DATE, end_date DATE)") self.validate_identity("SELECT APPROXIMATE AS y") self.validate_identity("CREATE TABLE t (c BIGINT IDENTITY(0, 1))") + self.validate_identity( + "SELECT * FROM venue WHERE (venuecity, venuestate) IN (('Miami', 'FL'), ('Tampa', 'FL')) ORDER BY venueid" + ) + self.validate_identity( + """SELECT tablename, "column" FROM pg_table_def WHERE "column" LIKE '%start\\\\_%' LIMIT 5""" + ) self.validate_identity( """SELECT JSON_EXTRACT_PATH_TEXT('{"f2":{"f3":1},"f4":{"f5":99,"f6":"star"}', 'f4', 'f6', TRUE)""" ) @@ -463,6 +467,10 @@ FROM ( "": "INSERT INTO t(a, b) SELECT a, b FROM (VALUES (1, 2), (3, 4)) AS t (a, b)", }, ) + self.validate_identity("CREATE TABLE table_backup BACKUP NO AS SELECT * FROM event") + self.validate_identity("CREATE TABLE table_backup BACKUP YES AS SELECT * FROM event") + self.validate_identity("CREATE TABLE table_backup (i INTEGER, b VARCHAR) BACKUP NO") + self.validate_identity("CREATE TABLE table_backup (i INTEGER, b VARCHAR) BACKUP YES") def test_create_table_like(self): self.validate_identity( @@ -499,7 +507,11 @@ FROM ( def test_varchar_max(self): self.validate_all( - "CREATE TABLE TEST (cola VARCHAR(MAX))", + 'CREATE TABLE "TEST" ("cola" VARCHAR(MAX))', + read={ + "redshift": "CREATE TABLE TEST (cola VARCHAR(max))", + "tsql": "CREATE TABLE TEST (cola VARCHAR(max))", + }, write={ "redshift": 'CREATE TABLE "TEST" ("cola" VARCHAR(MAX))', }, diff --git a/tests/dialects/test_snowflake.py b/tests/dialects/test_snowflake.py index e48f811..a41d35a 100644 --- a/tests/dialects/test_snowflake.py +++ b/tests/dialects/test_snowflake.py @@ -40,6 +40,19 @@ WHERE )""", ) + self.validate_identity("SELECT TIMEADD(HOUR, 2, CAST('09:05:03' AS TIME))") + self.validate_identity("SELECT CAST(OBJECT_CONSTRUCT('a', 1) AS MAP(VARCHAR, INT))") + self.validate_identity("SELECT CAST(OBJECT_CONSTRUCT('a', 1) AS OBJECT(a CHAR NOT NULL))") + self.validate_identity("SELECT CAST([1, 2, 3] AS ARRAY(INT))") + self.validate_identity("SELECT CAST(obj AS OBJECT(x CHAR) RENAME FIELDS)") + self.validate_identity("SELECT CAST(obj AS OBJECT(x CHAR, y VARCHAR) ADD FIELDS)") + self.validate_identity("SELECT TO_TIMESTAMP(123.4)").selects[0].assert_is(exp.Anonymous) + self.validate_identity("SELECT TO_TIME(x) FROM t") + self.validate_identity("SELECT TO_TIMESTAMP(x) FROM t") + self.validate_identity("SELECT TO_TIMESTAMP_NTZ(x) FROM t") + self.validate_identity("SELECT TO_TIMESTAMP_LTZ(x) FROM t") + self.validate_identity("SELECT TO_TIMESTAMP_TZ(x) FROM t") + self.validate_identity("TO_DECIMAL(expr, fmt, precision, scale)") self.validate_identity("ALTER TABLE authors ADD CONSTRAINT c1 UNIQUE (id, email)") self.validate_identity("RM @parquet_stage", check_command_warning=True) self.validate_identity("REMOVE @parquet_stage", check_command_warning=True) @@ -59,7 +72,6 @@ WHERE self.validate_identity("INITCAP('iqamqinterestedqinqthisqtopic', 'q')") self.validate_identity("CAST(x AS GEOMETRY)") self.validate_identity("OBJECT_CONSTRUCT(*)") - self.validate_identity("SELECT TO_DATE('2019-02-28') + INTERVAL '1 day, 1 year'") self.validate_identity("SELECT CAST('2021-01-01' AS DATE) + INTERVAL '1 DAY'") self.validate_identity("SELECT HLL(*)") self.validate_identity("SELECT HLL(a)") @@ -77,18 +89,29 @@ WHERE self.validate_identity("ALTER TABLE foo UNSET DATA_RETENTION_TIME_IN_DAYS, CHANGE_TRACKING") self.validate_identity("COMMENT IF EXISTS ON TABLE foo IS 'bar'") self.validate_identity("SELECT CONVERT_TIMEZONE('UTC', 'America/Los_Angeles', col)") - self.validate_identity("REGEXP_REPLACE('target', 'pattern', '\n')") self.validate_identity("ALTER TABLE a SWAP WITH b") + self.validate_identity("SELECT MATCH_CONDITION") self.validate_identity( 'DESCRIBE TABLE "SNOWFLAKE_SAMPLE_DATA"."TPCDS_SF100TCL"."WEB_SITE" type=stage' ) self.validate_identity( "SELECT a FROM test PIVOT(SUM(x) FOR y IN ('z', 'q')) AS x TABLESAMPLE (0.1)" ) + self.validate_identity( + "SELECT * FROM DATA AS DATA_L ASOF JOIN DATA AS DATA_R MATCH_CONDITION (DATA_L.VAL > DATA_R.VAL) ON DATA_L.ID = DATA_R.ID" + ) + self.validate_identity( + "REGEXP_REPLACE('target', 'pattern', '\n')", + "REGEXP_REPLACE('target', 'pattern', '\\n')", + ) + self.validate_identity( + "SELECT a:from::STRING, a:from || ' test' ", + "SELECT CAST(GET_PATH(a, 'from') AS TEXT), GET_PATH(a, 'from') || ' test'", + ) self.validate_identity("x:from", "GET_PATH(x, 'from')") self.validate_identity( - "value:values::string", - "CAST(GET_PATH(value, 'values') AS TEXT)", + "value:values::string::int", + "CAST(CAST(GET_PATH(value, 'values') AS TEXT) AS INT)", ) self.validate_identity( """SELECT GET_PATH(PARSE_JSON('{"y": [{"z": 1}]}'), 'y[0]:z')""", @@ -132,7 +155,11 @@ WHERE ) self.validate_identity( "v:attr[0]:name", - "GET_PATH(GET_PATH(v, 'attr[0]'), 'name')", + "GET_PATH(v, 'attr[0].name')", + ) + self.validate_identity( + "a.x:from.b:c.d::int", + "CAST(GET_PATH(a.x, 'from.b.c.d') AS INT)", ) self.validate_identity( """SELECT PARSE_JSON('{"food":{"fruit":"banana"}}'):food.fruit::VARCHAR""", @@ -189,10 +216,6 @@ WHERE "SELECT {fn CEILING(5.3)}", "SELECT CEIL(5.3)", ) - self.validate_identity( - "SELECT TO_TIMESTAMP(x) FROM t", - "SELECT CAST(x AS TIMESTAMPNTZ) FROM t", - ) self.validate_identity( "CAST(x AS BYTEINT)", "CAST(x AS INT)", @@ -380,6 +403,7 @@ WHERE write={ "duckdb": "{'a': b, 'c': d}", "snowflake": "OBJECT_CONSTRUCT('a', b, 'c', d)", + "": "STRUCT(b AS a, d AS c)", }, ) self.validate_identity("OBJECT_CONSTRUCT(a, b, c, d)") @@ -419,6 +443,46 @@ WHERE "sqlite": "SELECT MIN(c1), MIN(c2) FROM test", }, ) + for suffix in ( + "", + " OVER ()", + ): + self.validate_all( + f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x){suffix}", + read={ + "snowflake": f"SELECT MEDIAN(x){suffix}", + "postgres": f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x){suffix}", + }, + write={ + "": f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x NULLS LAST){suffix}", + "duckdb": f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x){suffix}", + "postgres": f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x){suffix}", + "snowflake": f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x){suffix}", + }, + ) + self.validate_all( + f"SELECT MEDIAN(x){suffix}", + write={ + "": f"SELECT PERCENTILE_CONT(x, 0.5){suffix}", + "duckdb": f"SELECT QUANTILE_CONT(x, 0.5){suffix}", + "postgres": f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x){suffix}", + "snowflake": f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x){suffix}", + }, + ) + for func in ( + "CORR", + "COVAR_POP", + "COVAR_SAMP", + ): + self.validate_all( + f"SELECT {func}(y, x){suffix}", + write={ + "": f"SELECT {func}(y, x){suffix}", + "duckdb": f"SELECT {func}(y, x){suffix}", + "postgres": f"SELECT {func}(y, x){suffix}", + "snowflake": f"SELECT {func}(y, x){suffix}", + }, + ) self.validate_all( "TO_CHAR(x, y)", read={ @@ -560,9 +624,9 @@ WHERE self.validate_all( '''SELECT PARSE_JSON('{"a": {"b c": "foo"}}'):a:"b c"''', write={ - "duckdb": """SELECT JSON('{"a": {"b c": "foo"}}') -> '$.a' -> '$."b c"'""", - "mysql": """SELECT JSON_EXTRACT(JSON_EXTRACT('{"a": {"b c": "foo"}}', '$.a'), '$."b c"')""", - "snowflake": """SELECT GET_PATH(GET_PATH(PARSE_JSON('{"a": {"b c": "foo"}}'), 'a'), '["b c"]')""", + "duckdb": """SELECT JSON('{"a": {"b c": "foo"}}') -> '$.a."b c"'""", + "mysql": """SELECT JSON_EXTRACT('{"a": {"b c": "foo"}}', '$.a."b c"')""", + "snowflake": """SELECT GET_PATH(PARSE_JSON('{"a": {"b c": "foo"}}'), 'a["b c"]')""", }, ) self.validate_all( @@ -623,9 +687,16 @@ WHERE self.validate_all( "SELECT TO_TIMESTAMP('2013-04-05 01:02:03')", write={ - "bigquery": "SELECT PARSE_TIMESTAMP('%Y-%m-%d %H:%M:%S', '2013-04-05 01:02:03')", - "snowflake": "SELECT TO_TIMESTAMP('2013-04-05 01:02:03', 'yyyy-mm-DD hh24:mi:ss')", - "spark": "SELECT TO_TIMESTAMP('2013-04-05 01:02:03', 'yyyy-MM-dd HH:mm:ss')", + "bigquery": "SELECT CAST('2013-04-05 01:02:03' AS DATETIME)", + "snowflake": "SELECT CAST('2013-04-05 01:02:03' AS TIMESTAMPNTZ)", + "spark": "SELECT CAST('2013-04-05 01:02:03' AS TIMESTAMP)", + }, + ) + self.validate_all( + "SELECT TO_TIME('12:05:00')", + write={ + "bigquery": "SELECT CAST('12:05:00' AS TIME)", + "snowflake": "SELECT CAST('12:05:00' AS TIME)", }, ) self.validate_all( @@ -667,9 +738,13 @@ WHERE ) self.validate_all( "ARRAY_TO_STRING(x, '')", + read={ + "duckdb": "ARRAY_TO_STRING(x, '')", + }, write={ "spark": "ARRAY_JOIN(x, '')", "snowflake": "ARRAY_TO_STRING(x, '')", + "duckdb": "ARRAY_TO_STRING(x, '')", }, ) self.validate_all( @@ -930,6 +1005,9 @@ WHERE ) self.validate_all( "DATEADD(DAY, 5, CAST('2008-12-25' AS DATE))", + read={ + "snowflake": "TIMESTAMPADD(DAY, 5, CAST('2008-12-25' AS DATE))", + }, write={ "bigquery": "DATE_ADD(CAST('2008-12-25' AS DATE), INTERVAL 5 DAY)", "snowflake": "DATEADD(DAY, 5, CAST('2008-12-25' AS DATE))", @@ -952,6 +1030,46 @@ WHERE self.validate_identity("DATE_PART(yyy, x)", "DATE_PART(YEAR, x)") self.validate_identity("DATE_TRUNC(yr, x)", "DATE_TRUNC('YEAR', x)") + self.validate_identity("TO_DATE('12345')").assert_is(exp.Anonymous) + + self.validate_identity( + "SELECT TO_DATE('2019-02-28') + INTERVAL '1 day, 1 year'", + "SELECT CAST('2019-02-28' AS DATE) + INTERVAL '1 day, 1 year'", + ) + + self.validate_identity("DATE(x)").assert_is(exp.Anonymous) + self.validate_identity("TO_DATE(x)").assert_is(exp.Anonymous) + self.validate_identity("TRY_TO_DATE(x)").assert_is(exp.Anonymous) + + self.validate_all( + "TO_DATE(x, 'MM-DD-YYYY')", + write={ + "snowflake": "TO_DATE(x, 'mm-DD-yyyy')", + "duckdb": "CAST(STRPTIME(x, '%m-%d-%Y') AS DATE)", + }, + ) + self.validate_all( + "DATE('01-01-2000', 'MM-DD-YYYY')", + write={ + "snowflake": "TO_DATE('01-01-2000', 'mm-DD-yyyy')", + "duckdb": "CAST(STRPTIME('01-01-2000', '%m-%d-%Y') AS DATE)", + }, + ) + self.validate_all( + "TO_DATE('01-01-2000', 'MM-DD-YYYY')", + write={ + "snowflake": "TO_DATE('01-01-2000', 'mm-DD-yyyy')", + "duckdb": "CAST(STRPTIME('01-01-2000', '%m-%d-%Y') AS DATE)", + }, + ) + self.validate_all( + "TRY_TO_DATE('01-01-2000', 'MM-DD-YYYY')", + write={ + "snowflake": "TRY_TO_DATE('01-01-2000', 'mm-DD-yyyy')", + "duckdb": "CAST(STRPTIME('01-01-2000', '%m-%d-%Y') AS DATE)", + }, + ) + def test_semi_structured_types(self): self.validate_identity("SELECT CAST(a AS VARIANT)") self.validate_identity("SELECT CAST(a AS ARRAY)") @@ -1046,6 +1164,9 @@ WHERE self.validate_identity("CREATE SCHEMA mytestschema_clone CLONE testschema") self.validate_identity("CREATE TABLE IDENTIFIER('foo') (COLUMN1 VARCHAR, COLUMN2 VARCHAR)") self.validate_identity("CREATE TABLE IDENTIFIER($foo) (col1 VARCHAR, col2 VARCHAR)") + self.validate_identity( + "DROP function my_udf (OBJECT(city VARCHAR, zipcode DECIMAL, val ARRAY(BOOLEAN)))" + ) self.validate_identity( "CREATE TABLE orders_clone_restore CLONE orders AT (TIMESTAMP => TO_TIMESTAMP_TZ('04/05/2013 01:02:03', 'mm/dd/yyyy hh24:mi:ss'))" ) @@ -1061,6 +1182,17 @@ WHERE self.validate_identity( "CREATE OR REPLACE TABLE EXAMPLE_DB.DEMO.USERS (ID DECIMAL(38, 0) NOT NULL, PRIMARY KEY (ID), FOREIGN KEY (CITY_CODE) REFERENCES EXAMPLE_DB.DEMO.CITIES (CITY_CODE))" ) + self.validate_identity( + "CREATE ICEBERG TABLE my_iceberg_table (amount ARRAY(INT)) CATALOG='SNOWFLAKE' EXTERNAL_VOLUME='my_external_volume' BASE_LOCATION='my/relative/path/from/extvol'" + ) + self.validate_identity( + "CREATE OR REPLACE FUNCTION my_udf(location OBJECT(city VARCHAR, zipcode DECIMAL, val ARRAY(BOOLEAN))) RETURNS VARCHAR AS $$ SELECT 'foo' $$", + "CREATE OR REPLACE FUNCTION my_udf(location OBJECT(city VARCHAR, zipcode DECIMAL, val ARRAY(BOOLEAN))) RETURNS VARCHAR AS ' SELECT \\'foo\\' '", + ) + self.validate_identity( + "CREATE OR REPLACE FUNCTION my_udtf(foo BOOLEAN) RETURNS TABLE(col1 ARRAY(INT)) AS $$ WITH t AS (SELECT CAST([1, 2, 3] AS ARRAY(INT)) AS c) SELECT c FROM t $$", + "CREATE OR REPLACE FUNCTION my_udtf(foo BOOLEAN) RETURNS TABLE (col1 ARRAY(INT)) AS ' WITH t AS (SELECT CAST([1, 2, 3] AS ARRAY(INT)) AS c) SELECT c FROM t '", + ) self.validate_all( "CREATE TABLE orders_clone CLONE orders", @@ -1292,7 +1424,6 @@ FROM persons AS p, LATERAL FLATTEN(input => p.c, path => 'contact') AS _flattene "spark": "SELECT `c0`, `c1` FROM (VALUES (1, 2), (3, 4)) AS `t0`(`c0`, `c1`)", }, ) - self.validate_all( """SELECT $1 AS "_1" FROM VALUES ('a'), ('b')""", write={ @@ -1300,6 +1431,18 @@ FROM persons AS p, LATERAL FLATTEN(input => p.c, path => 'contact') AS _flattene "spark": """SELECT ${1} AS `_1` FROM VALUES ('a'), ('b')""", }, ) + self.validate_all( + "SELECT * FROM (SELECT OBJECT_CONSTRUCT('a', 1) AS x) AS t", + read={ + "duckdb": "SELECT * FROM (VALUES ({'a': 1})) AS t(x)", + }, + ) + self.validate_all( + "SELECT * FROM (SELECT OBJECT_CONSTRUCT('a', 1) AS x UNION ALL SELECT OBJECT_CONSTRUCT('a', 2)) AS t", + read={ + "duckdb": "SELECT * FROM (VALUES ({'a': 1}), ({'a': 2})) AS t(x)", + }, + ) def test_describe_table(self): self.validate_all( diff --git a/tests/dialects/test_spark.py b/tests/dialects/test_spark.py index 1cf1ede..18f1fb7 100644 --- a/tests/dialects/test_spark.py +++ b/tests/dialects/test_spark.py @@ -1,6 +1,7 @@ from unittest import mock from sqlglot import exp, parse_one +from sqlglot.dialects.dialect import Dialects from tests.dialects.test_dialect import Validator @@ -245,12 +246,15 @@ TBLPROPERTIES ( self.validate_identity("SELECT TRANSFORM(ARRAY(1, 2, 3), (x, i) -> x + i)") self.validate_identity("REFRESH TABLE a.b.c") self.validate_identity("INTERVAL -86 DAYS") - self.validate_identity("SELECT UNIX_TIMESTAMP()") self.validate_identity("TRIM(' SparkSQL ')") self.validate_identity("TRIM(BOTH 'SL' FROM 'SSparkSQLS')") self.validate_identity("TRIM(LEADING 'SL' FROM 'SSparkSQLS')") self.validate_identity("TRIM(TRAILING 'SL' FROM 'SSparkSQLS')") self.validate_identity("SPLIT(str, pattern, lim)") + self.validate_identity( + "SELECT UNIX_TIMESTAMP()", + "SELECT UNIX_TIMESTAMP(CURRENT_TIMESTAMP())", + ) self.validate_identity( "SELECT CAST('2023-01-01' AS TIMESTAMP) + INTERVAL 23 HOUR + 59 MINUTE + 59 SECONDS", "SELECT CAST('2023-01-01' AS TIMESTAMP) + INTERVAL '23' HOUR + INTERVAL '59' MINUTE + INTERVAL '59' SECONDS", @@ -280,6 +284,18 @@ TBLPROPERTIES ( "SELECT STR_TO_MAP('a:1,b:2,c:3', ',', ':')", ) + self.validate_all( + "SELECT SPLIT('123|789', '\\\\|')", + read={ + "duckdb": "SELECT STR_SPLIT_REGEX('123|789', '\\|')", + "presto": "SELECT REGEXP_SPLIT('123|789', '\\|')", + }, + write={ + "duckdb": "SELECT STR_SPLIT_REGEX('123|789', '\\|')", + "presto": "SELECT REGEXP_SPLIT('123|789', '\\|')", + "spark": "SELECT SPLIT('123|789', '\\\\|')", + }, + ) self.validate_all( "WITH tbl AS (SELECT 1 AS id, 'eggy' AS name UNION ALL SELECT NULL AS id, 'jake' AS name) SELECT COUNT(DISTINCT id, name) AS cnt FROM tbl", write={ @@ -366,7 +382,7 @@ TBLPROPERTIES ( "hive": "SELECT CAST(DATEDIFF(TO_DATE('2020-12-31'), TO_DATE('2020-01-01')) / 7 AS INT)", "postgres": "SELECT CAST(EXTRACT(days FROM (CAST(CAST('2020-12-31' AS DATE) AS TIMESTAMP) - CAST(CAST('2020-01-01' AS DATE) AS TIMESTAMP))) / 7 AS BIGINT)", "redshift": "SELECT DATEDIFF(WEEK, CAST('2020-01-01' AS DATE), CAST('2020-12-31' AS DATE))", - "snowflake": "SELECT DATEDIFF(WEEK, CAST('2020-01-01' AS DATE), CAST('2020-12-31' AS DATE))", + "snowflake": "SELECT DATEDIFF(WEEK, TO_DATE('2020-01-01'), TO_DATE('2020-12-31'))", "spark": "SELECT DATEDIFF(WEEK, TO_DATE('2020-01-01'), TO_DATE('2020-12-31'))", }, ) @@ -644,10 +660,10 @@ TBLPROPERTIES ( "SELECT TRANSFORM(zip_code, name, age) USING 'cat' AS (a STRING, b STRING, c STRING) FROM person WHERE zip_code > 94511" ) self.validate_identity( - "SELECT TRANSFORM(name, age) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' NULL DEFINED AS 'NULL' USING 'cat' AS (name_age STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '@' LINES TERMINATED BY '\n' NULL DEFINED AS 'NULL' FROM person" + "SELECT TRANSFORM(name, age) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\\n' NULL DEFINED AS 'NULL' USING 'cat' AS (name_age STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '@' LINES TERMINATED BY '\\n' NULL DEFINED AS 'NULL' FROM person" ) self.validate_identity( - "SELECT TRANSFORM(zip_code, name, age) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES ('field.delim'='\t') USING 'cat' AS (a STRING, b STRING, c STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES ('field.delim'='\t') FROM person WHERE zip_code > 94511" + "SELECT TRANSFORM(zip_code, name, age) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES ('field.delim'='\\t') USING 'cat' AS (a STRING, b STRING, c STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES ('field.delim'='\\t') FROM person WHERE zip_code > 94511" ) self.validate_identity( "SELECT TRANSFORM(zip_code, name, age) USING 'cat' FROM person WHERE zip_code > 94500" @@ -720,3 +736,16 @@ TBLPROPERTIES ( "presto": "SELECT col, pos, IF(_u_2.pos_2 = _u_3.pos_3, _u_3.col_2) AS col_2, IF(_u_2.pos_2 = _u_3.pos_3, _u_3.pos_3) AS pos_3 FROM _u CROSS JOIN UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(ARRAY[2, 3])))) AS _u_2(pos_2) CROSS JOIN UNNEST(ARRAY[2, 3]) WITH ORDINALITY AS _u_3(col_2, pos_3) WHERE _u_2.pos_2 = _u_3.pos_3 OR (_u_2.pos_2 > CARDINALITY(ARRAY[2, 3]) AND _u_3.pos_3 = CARDINALITY(ARRAY[2, 3]))", }, ) + + def test_strip_modifiers(self): + without_modifiers = "SELECT * FROM t" + with_modifiers = f"{without_modifiers} CLUSTER BY y DISTRIBUTE BY x SORT BY z" + query = self.parse_one(with_modifiers) + + for dialect in Dialects: + with self.subTest(f"Transpiling query with CLUSTER/DISTRIBUTE/SORT BY to {dialect}"): + name = dialect.value + if name in ("", "databricks", "hive", "spark", "spark2"): + self.assertEqual(query.sql(name), with_modifiers) + else: + self.assertEqual(query.sql(name), without_modifiers) diff --git a/tests/dialects/test_sqlite.py b/tests/dialects/test_sqlite.py index 2421987..f3cde0b 100644 --- a/tests/dialects/test_sqlite.py +++ b/tests/dialects/test_sqlite.py @@ -6,58 +6,6 @@ from sqlglot.helper import logger as helper_logger class TestSQLite(Validator): dialect = "sqlite" - def test_ddl(self): - self.validate_identity("INSERT OR ABORT INTO foo (x, y) VALUES (1, 2)") - self.validate_identity("INSERT OR FAIL INTO foo (x, y) VALUES (1, 2)") - self.validate_identity("INSERT OR IGNORE INTO foo (x, y) VALUES (1, 2)") - self.validate_identity("INSERT OR REPLACE INTO foo (x, y) VALUES (1, 2)") - self.validate_identity("INSERT OR ROLLBACK INTO foo (x, y) VALUES (1, 2)") - self.validate_identity("CREATE TABLE foo (id INTEGER PRIMARY KEY ASC)") - self.validate_identity("CREATE TEMPORARY TABLE foo (id INTEGER)") - - self.validate_all( - """ - CREATE TABLE "Track" - ( - CONSTRAINT "PK_Track" FOREIGN KEY ("TrackId"), - FOREIGN KEY ("AlbumId") REFERENCES "Album" ( - "AlbumId" - ) ON DELETE NO ACTION ON UPDATE NO ACTION, - FOREIGN KEY ("AlbumId") ON DELETE CASCADE ON UPDATE RESTRICT, - FOREIGN KEY ("AlbumId") ON DELETE SET NULL ON UPDATE SET DEFAULT - ) - """, - write={ - "sqlite": """CREATE TABLE "Track" ( - CONSTRAINT "PK_Track" FOREIGN KEY ("TrackId"), - FOREIGN KEY ("AlbumId") REFERENCES "Album" ( - "AlbumId" - ) ON DELETE NO ACTION ON UPDATE NO ACTION, - FOREIGN KEY ("AlbumId") ON DELETE CASCADE ON UPDATE RESTRICT, - FOREIGN KEY ("AlbumId") ON DELETE SET NULL ON UPDATE SET DEFAULT -)""", - }, - pretty=True, - ) - self.validate_all( - "CREATE TABLE z (a INTEGER UNIQUE PRIMARY KEY AUTOINCREMENT)", - read={ - "mysql": "CREATE TABLE z (a INT UNIQUE PRIMARY KEY AUTO_INCREMENT)", - }, - write={ - "sqlite": "CREATE TABLE z (a INTEGER UNIQUE PRIMARY KEY AUTOINCREMENT)", - "mysql": "CREATE TABLE z (a INT UNIQUE PRIMARY KEY AUTO_INCREMENT)", - "postgres": "CREATE TABLE z (a INT GENERATED BY DEFAULT AS IDENTITY NOT NULL UNIQUE PRIMARY KEY)", - }, - ) - self.validate_all( - """CREATE TABLE "x" ("Name" NVARCHAR(200) NOT NULL)""", - write={ - "sqlite": """CREATE TABLE "x" ("Name" TEXT(200) NOT NULL)""", - "mysql": "CREATE TABLE `x` (`Name` VARCHAR(200) NOT NULL)", - }, - ) - def test_sqlite(self): self.validate_identity("SELECT DATE()") self.validate_identity("SELECT DATE('now', 'start of month', '+1 month', '-1 day')") @@ -65,7 +13,6 @@ class TestSQLite(Validator): self.validate_identity("SELECT DATETIME(1092941466, 'auto')") self.validate_identity("SELECT DATETIME(1092941466, 'unixepoch', 'localtime')") self.validate_identity("SELECT UNIXEPOCH()") - self.validate_identity("SELECT STRFTIME('%s')") self.validate_identity("SELECT JULIANDAY('now') - JULIANDAY('1776-07-04')") self.validate_identity("SELECT UNIXEPOCH() - UNIXEPOCH('2004-01-01 02:34:56')") self.validate_identity("SELECT DATE('now', 'start of year', '+9 months', 'weekday 2')") @@ -145,6 +92,29 @@ class TestSQLite(Validator): write={"snowflake": "LEAST(x, y, z)"}, ) + def test_strftime(self): + self.validate_identity("SELECT STRFTIME('%Y/%m/%d', 'now')") + self.validate_identity("SELECT STRFTIME('%Y-%m-%d', '2016-10-16', 'start of month')") + self.validate_identity( + "SELECT STRFTIME('%s')", + "SELECT STRFTIME('%s', CURRENT_TIMESTAMP)", + ) + + self.validate_all( + "SELECT STRFTIME('%Y-%m-%d', '2020-01-01 12:05:03')", + write={ + "duckdb": "SELECT STRFTIME(CAST('2020-01-01 12:05:03' AS TIMESTAMP), '%Y-%m-%d')", + "sqlite": "SELECT STRFTIME('%Y-%m-%d', '2020-01-01 12:05:03')", + }, + ) + self.validate_all( + "SELECT STRFTIME('%Y-%m-%d', CURRENT_TIMESTAMP)", + write={ + "duckdb": "SELECT STRFTIME(CAST(CURRENT_TIMESTAMP AS TIMESTAMP), '%Y-%m-%d')", + "sqlite": "SELECT STRFTIME('%Y-%m-%d', CURRENT_TIMESTAMP)", + }, + ) + def test_datediff(self): self.validate_all( "DATEDIFF(a, b, 'day')", @@ -190,3 +160,59 @@ class TestSQLite(Validator): ) self.assertIn("Named columns are not supported in table alias.", cm.output[0]) + + def test_ddl(self): + for conflict_action in ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"): + with self.subTest(f"ON CONFLICT {conflict_action}"): + self.validate_identity("CREATE TABLE a (b, c, UNIQUE (b, c) ON CONFLICT IGNORE)") + + self.validate_identity("INSERT OR ABORT INTO foo (x, y) VALUES (1, 2)") + self.validate_identity("INSERT OR FAIL INTO foo (x, y) VALUES (1, 2)") + self.validate_identity("INSERT OR IGNORE INTO foo (x, y) VALUES (1, 2)") + self.validate_identity("INSERT OR REPLACE INTO foo (x, y) VALUES (1, 2)") + self.validate_identity("INSERT OR ROLLBACK INTO foo (x, y) VALUES (1, 2)") + self.validate_identity("CREATE TABLE foo (id INTEGER PRIMARY KEY ASC)") + self.validate_identity("CREATE TEMPORARY TABLE foo (id INTEGER)") + + self.validate_all( + """ + CREATE TABLE "Track" + ( + CONSTRAINT "PK_Track" FOREIGN KEY ("TrackId"), + FOREIGN KEY ("AlbumId") REFERENCES "Album" ( + "AlbumId" + ) ON DELETE NO ACTION ON UPDATE NO ACTION, + FOREIGN KEY ("AlbumId") ON DELETE CASCADE ON UPDATE RESTRICT, + FOREIGN KEY ("AlbumId") ON DELETE SET NULL ON UPDATE SET DEFAULT + ) + """, + write={ + "sqlite": """CREATE TABLE "Track" ( + CONSTRAINT "PK_Track" FOREIGN KEY ("TrackId"), + FOREIGN KEY ("AlbumId") REFERENCES "Album" ( + "AlbumId" + ) ON DELETE NO ACTION ON UPDATE NO ACTION, + FOREIGN KEY ("AlbumId") ON DELETE CASCADE ON UPDATE RESTRICT, + FOREIGN KEY ("AlbumId") ON DELETE SET NULL ON UPDATE SET DEFAULT +)""", + }, + pretty=True, + ) + self.validate_all( + "CREATE TABLE z (a INTEGER UNIQUE PRIMARY KEY AUTOINCREMENT)", + read={ + "mysql": "CREATE TABLE z (a INT UNIQUE PRIMARY KEY AUTO_INCREMENT)", + }, + write={ + "sqlite": "CREATE TABLE z (a INTEGER UNIQUE PRIMARY KEY AUTOINCREMENT)", + "mysql": "CREATE TABLE z (a INT UNIQUE PRIMARY KEY AUTO_INCREMENT)", + "postgres": "CREATE TABLE z (a INT GENERATED BY DEFAULT AS IDENTITY NOT NULL UNIQUE PRIMARY KEY)", + }, + ) + self.validate_all( + """CREATE TABLE "x" ("Name" NVARCHAR(200) NOT NULL)""", + write={ + "sqlite": """CREATE TABLE "x" ("Name" TEXT(200) NOT NULL)""", + "mysql": "CREATE TABLE `x` (`Name` VARCHAR(200) NOT NULL)", + }, + ) diff --git a/tests/dialects/test_teradata.py b/tests/dialects/test_teradata.py index f3894fd..a85ca8c 100644 --- a/tests/dialects/test_teradata.py +++ b/tests/dialects/test_teradata.py @@ -5,6 +5,7 @@ class TestTeradata(Validator): dialect = "teradata" def test_teradata(self): + self.validate_identity("TO_NUMBER(expr, fmt, nlsparam)") self.validate_identity("SELECT TOP 10 * FROM tbl") self.validate_identity("SELECT * FROM tbl SAMPLE 5") self.validate_identity( @@ -100,7 +101,9 @@ class TestTeradata(Validator): self.validate_identity( "CREATE VOLATILE SET TABLE example1 AS (SELECT col1, col2, col3 FROM table1) WITH DATA PRIMARY INDEX (col1) ON COMMIT PRESERVE ROWS" ) - + self.validate_identity( + "CREATE SET GLOBAL TEMPORARY TABLE a, NO BEFORE JOURNAL, NO AFTER JOURNAL, MINIMUM DATABLOCKSIZE, BLOCKCOMPRESSION=NEVER (a INT)" + ) self.validate_all( """ CREATE SET TABLE test, NO FALLBACK, NO BEFORE JOURNAL, NO AFTER JOURNAL, diff --git a/tests/dialects/test_tsql.py b/tests/dialects/test_tsql.py index ed474fd..aefd857 100644 --- a/tests/dialects/test_tsql.py +++ b/tests/dialects/test_tsql.py @@ -272,6 +272,28 @@ class TestTSQL(Validator): "SELECT [x].[y] FROM foo", ) + self.validate_all( + "SELECT * FROM t ORDER BY (SELECT NULL) OFFSET 2 ROWS", + read={ + "postgres": "SELECT * FROM t OFFSET 2", + }, + write={ + "postgres": "SELECT * FROM t ORDER BY (SELECT NULL) NULLS FIRST OFFSET 2", + "tsql": "SELECT * FROM t ORDER BY (SELECT NULL) OFFSET 2 ROWS", + }, + ) + self.validate_all( + "SELECT * FROM t ORDER BY (SELECT NULL) OFFSET 5 ROWS FETCH FIRST 10 ROWS ONLY", + read={ + "duckdb": "SELECT * FROM t LIMIT 10 OFFSET 5", + "sqlite": "SELECT * FROM t LIMIT 5, 10", + "tsql": "SELECT * FROM t ORDER BY (SELECT NULL) OFFSET 5 ROWS FETCH FIRST 10 ROWS ONLY", + }, + write={ + "duckdb": "SELECT * FROM t ORDER BY (SELECT NULL) NULLS FIRST LIMIT 10 OFFSET 5", + "sqlite": "SELECT * FROM t ORDER BY (SELECT NULL) LIMIT 10 OFFSET 5", + }, + ) self.validate_all( "SELECT CAST([a].[b] AS SMALLINT) FROM foo", write={ @@ -720,6 +742,9 @@ class TestTSQL(Validator): ) def test_ddl(self): + for view_attr in ("ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"): + self.validate_identity(f"CREATE VIEW a.b WITH {view_attr} AS SELECT * FROM x") + expression = parse_one("ALTER TABLE dbo.DocExe DROP CONSTRAINT FK_Column_B", dialect="tsql") self.assertIsInstance(expression, exp.AlterTable) self.assertIsInstance(expression.args["actions"][0], exp.Drop) @@ -1549,7 +1574,7 @@ WHERE "postgres": "CAST(DATE_TRUNC('MONTH', CAST(CURRENT_TIMESTAMP AS DATE)) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)", "presto": "LAST_DAY_OF_MONTH(CAST(CAST(CURRENT_TIMESTAMP AS TIMESTAMP) AS DATE))", "redshift": "LAST_DAY(CAST(GETDATE() AS DATE))", - "snowflake": "LAST_DAY(CAST(CURRENT_TIMESTAMP() AS DATE))", + "snowflake": "LAST_DAY(TO_DATE(CURRENT_TIMESTAMP()))", "spark": "LAST_DAY(TO_DATE(CURRENT_TIMESTAMP()))", "tsql": "EOMONTH(CAST(GETDATE() AS DATE))", }, @@ -1564,7 +1589,7 @@ WHERE "postgres": "CAST(DATE_TRUNC('MONTH', CAST(CURRENT_TIMESTAMP AS DATE) + INTERVAL '-1 MONTH') + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)", "presto": "LAST_DAY_OF_MONTH(DATE_ADD('MONTH', CAST(-1 AS BIGINT), CAST(CAST(CURRENT_TIMESTAMP AS TIMESTAMP) AS DATE)))", "redshift": "LAST_DAY(DATEADD(MONTH, -1, CAST(GETDATE() AS DATE)))", - "snowflake": "LAST_DAY(DATEADD(MONTH, -1, CAST(CURRENT_TIMESTAMP() AS DATE)))", + "snowflake": "LAST_DAY(DATEADD(MONTH, -1, TO_DATE(CURRENT_TIMESTAMP())))", "spark": "LAST_DAY(ADD_MONTHS(TO_DATE(CURRENT_TIMESTAMP()), -1))", "tsql": "EOMONTH(DATEADD(MONTH, -1, CAST(GETDATE() AS DATE)))", }, -- cgit v1.2.3