diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-09-03 07:37:44 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-09-03 07:37:44 +0000 |
commit | 09b58afa78a1784157d0f0a9db2e16531c9641e2 (patch) | |
tree | 1f87edad5142630006941a5effbab36b131e6416 /tests | |
parent | Adding upstream version 25.16.1. (diff) | |
download | sqlglot-09b58afa78a1784157d0f0a9db2e16531c9641e2.tar.xz sqlglot-09b58afa78a1784157d0f0a9db2e16531c9641e2.zip |
Adding upstream version 25.18.0.upstream/25.18.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'tests')
-rw-r--r-- | tests/dialects/test_bigquery.py | 7 | ||||
-rw-r--r-- | tests/dialects/test_clickhouse.py | 10 | ||||
-rw-r--r-- | tests/dialects/test_dialect.py | 27 | ||||
-rw-r--r-- | tests/dialects/test_duckdb.py | 41 | ||||
-rw-r--r-- | tests/dialects/test_mysql.py | 71 | ||||
-rw-r--r-- | tests/dialects/test_oracle.py | 26 | ||||
-rw-r--r-- | tests/dialects/test_postgres.py | 27 | ||||
-rw-r--r-- | tests/dialects/test_presto.py | 10 | ||||
-rw-r--r-- | tests/dialects/test_snowflake.py | 12 | ||||
-rw-r--r-- | tests/dialects/test_spark.py | 26 | ||||
-rw-r--r-- | tests/dialects/test_sqlite.py | 4 | ||||
-rw-r--r-- | tests/dialects/test_starrocks.py | 22 | ||||
-rw-r--r-- | tests/dialects/test_tsql.py | 13 | ||||
-rw-r--r-- | tests/fixtures/optimizer/qualify_columns.sql | 4 | ||||
-rw-r--r-- | tests/fixtures/optimizer/qualify_tables.sql | 4 | ||||
-rw-r--r-- | tests/fixtures/optimizer/simplify.sql | 21 | ||||
-rw-r--r-- | tests/fixtures/optimizer/tpc-ds/tpc-ds.sql | 112 | ||||
-rw-r--r-- | tests/fixtures/pretty.sql | 16 | ||||
-rw-r--r-- | tests/test_generator.py | 8 | ||||
-rw-r--r-- | tests/test_optimizer.py | 10 | ||||
-rw-r--r-- | tests/test_time.py | 22 |
21 files changed, 385 insertions, 108 deletions
diff --git a/tests/dialects/test_bigquery.py b/tests/dialects/test_bigquery.py index 8c9f093..f6e8fe8 100644 --- a/tests/dialects/test_bigquery.py +++ b/tests/dialects/test_bigquery.py @@ -370,7 +370,7 @@ LANGUAGE js AS }, write={ "bigquery": "SELECT SUM(x IGNORE NULLS) AS x", - "duckdb": "SELECT SUM(x IGNORE NULLS) AS x", + "duckdb": "SELECT SUM(x) AS x", "postgres": "SELECT SUM(x) IGNORE NULLS AS x", "spark": "SELECT SUM(x) IGNORE NULLS AS x", "snowflake": "SELECT SUM(x) IGNORE NULLS AS x", @@ -405,7 +405,7 @@ LANGUAGE js AS "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a, b DESC LIMIT 10) AS x", write={ "bigquery": "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a, b DESC LIMIT 10) AS x", - "duckdb": "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a NULLS FIRST, b DESC LIMIT 10) AS x", + "duckdb": "SELECT ARRAY_AGG(DISTINCT x ORDER BY a NULLS FIRST, b DESC LIMIT 10) AS x", "spark": "SELECT COLLECT_LIST(DISTINCT x ORDER BY a, b DESC LIMIT 10) IGNORE NULLS AS x", }, ) @@ -413,7 +413,7 @@ LANGUAGE js AS "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a, b DESC LIMIT 1, 10) AS x", write={ "bigquery": "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a, b DESC LIMIT 1, 10) AS x", - "duckdb": "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a NULLS FIRST, b DESC LIMIT 1, 10) AS x", + "duckdb": "SELECT ARRAY_AGG(DISTINCT x ORDER BY a NULLS FIRST, b DESC LIMIT 1, 10) AS x", "spark": "SELECT COLLECT_LIST(DISTINCT x ORDER BY a, b DESC LIMIT 1, 10) IGNORE NULLS AS x", }, ) @@ -814,6 +814,7 @@ LANGUAGE js AS "presto": "SHA256(x)", "redshift": "SHA2(x, 256)", "trino": "SHA256(x)", + "duckdb": "SHA256(x)", }, ) self.validate_all( diff --git a/tests/dialects/test_clickhouse.py b/tests/dialects/test_clickhouse.py index 53cd0a9..b4ba09e 100644 --- a/tests/dialects/test_clickhouse.py +++ b/tests/dialects/test_clickhouse.py @@ -492,6 +492,8 @@ class TestClickhouse(Validator): "postgres": "INSERT INTO t (col1, col2) VALUES ('abcd', 1234)", }, ) + self.validate_identity("SELECT TRIM(TRAILING ')' FROM '( Hello, world! )')") + self.validate_identity("SELECT TRIM(LEADING '(' FROM '( Hello, world! )')") def test_clickhouse_values(self): values = exp.select("*").from_( @@ -620,6 +622,14 @@ class TestClickhouse(Validator): ) self.assertEqual(create_with_cluster.sql("clickhouse"), "CREATE DATABASE foo ON CLUSTER c") + # Transpiled CREATE SCHEMA may have OnCluster property set + create_with_cluster = exp.Create( + this=db_table_expr, + kind="SCHEMA", + properties=exp.Properties(expressions=[exp.OnCluster(this=exp.to_identifier("c"))]), + ) + self.assertEqual(create_with_cluster.sql("clickhouse"), "CREATE DATABASE foo ON CLUSTER c") + ctas_with_comment = exp.Create( this=exp.table_("foo"), kind="TABLE", diff --git a/tests/dialects/test_dialect.py b/tests/dialects/test_dialect.py index 8f3c183..190d044 100644 --- a/tests/dialects/test_dialect.py +++ b/tests/dialects/test_dialect.py @@ -656,13 +656,29 @@ class TestDialect(Validator): }, ) self.validate_all( + "TIME_STR_TO_TIME('2020-01-01 12:13:14.123456+00:00')", + write={ + "mysql": "CAST('2020-01-01 12:13:14.123456+00:00' AS DATETIME(6))", + "trino": "CAST('2020-01-01 12:13:14.123456+00:00' AS TIMESTAMP(6))", + "presto": "CAST('2020-01-01 12:13:14.123456+00:00' AS TIMESTAMP)", + }, + ) + self.validate_all( + "TIME_STR_TO_TIME('2020-01-01 12:13:14.123-08:00', 'America/Los_Angeles')", + write={ + "mysql": "TIMESTAMP('2020-01-01 12:13:14.123-08:00')", + "trino": "CAST('2020-01-01 12:13:14.123-08:00' AS TIMESTAMP(3) WITH TIME ZONE)", + "presto": "CAST('2020-01-01 12:13:14.123-08:00' AS TIMESTAMP WITH TIME ZONE)", + }, + ) + self.validate_all( "TIME_STR_TO_TIME('2020-01-01 12:13:14-08:00', 'America/Los_Angeles')", write={ "bigquery": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)", "databricks": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)", "duckdb": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMPTZ)", "tsql": "CAST('2020-01-01 12:13:14-08:00' AS DATETIMEOFFSET) AT TIME ZONE 'UTC'", - "mysql": "CAST('2020-01-01 12:13:14-08:00' AS DATETIME)", + "mysql": "TIMESTAMP('2020-01-01 12:13:14-08:00')", "postgres": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMPTZ)", "redshift": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP WITH TIME ZONE)", "snowflake": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMPTZ)", @@ -683,7 +699,7 @@ class TestDialect(Validator): "databricks": "CAST(col AS TIMESTAMP)", "duckdb": "CAST(col AS TIMESTAMPTZ)", "tsql": "CAST(col AS DATETIMEOFFSET) AT TIME ZONE 'UTC'", - "mysql": "CAST(col AS DATETIME)", + "mysql": "TIMESTAMP(col)", "postgres": "CAST(col AS TIMESTAMPTZ)", "redshift": "CAST(col AS TIMESTAMP WITH TIME ZONE)", "snowflake": "CAST(col AS TIMESTAMPTZ)", @@ -723,6 +739,13 @@ class TestDialect(Validator): }, ) self.validate_all( + "TIME_TO_STR(a, '%Y-%m-%d %H:%M:%S.%f')", + write={ + "redshift": "TO_CHAR(a, 'YYYY-MM-DD HH24:MI:SS.US')", + "tsql": "FORMAT(a, 'yyyy-MM-dd HH:mm:ss.ffffff')", + }, + ) + self.validate_all( "TIME_TO_TIME_STR(x)", write={ "drill": "CAST(x AS VARCHAR)", diff --git a/tests/dialects/test_duckdb.py b/tests/dialects/test_duckdb.py index 3fb9930..5d2d044 100644 --- a/tests/dialects/test_duckdb.py +++ b/tests/dialects/test_duckdb.py @@ -231,10 +231,6 @@ class TestDuckDB(Validator): }, ) - self.validate_identity("INSERT INTO x BY NAME SELECT 1 AS y") - self.validate_identity("SELECT 1 AS x UNION ALL BY NAME SELECT 2 AS x") - self.validate_identity("SELECT SUM(x) FILTER (x = 1)", "SELECT SUM(x) FILTER(WHERE x = 1)") - # https://github.com/duckdb/duckdb/releases/tag/v0.8.0 self.assertEqual( parse_one("a / b", read="duckdb").assert_is(exp.Div).sql(dialect="duckdb"), "a / b" @@ -243,6 +239,10 @@ class TestDuckDB(Validator): parse_one("a // b", read="duckdb").assert_is(exp.IntDiv).sql(dialect="duckdb"), "a // b" ) + self.validate_identity("INSERT INTO x BY NAME SELECT 1 AS y") + self.validate_identity("SELECT 1 AS x UNION ALL BY NAME SELECT 2 AS x") + self.validate_identity("SELECT SUM(x) FILTER (x = 1)", "SELECT SUM(x) FILTER(WHERE x = 1)") + self.validate_identity("SELECT * FROM GLOB(x)") self.validate_identity("SELECT MAP(['key1', 'key2', 'key3'], [10, 20, 30])") self.validate_identity("SELECT MAP {'x': 1}") self.validate_identity("SELECT (MAP {'x': 1})['x']") @@ -278,6 +278,15 @@ class TestDuckDB(Validator): self.validate_identity("SUMMARIZE SELECT * FROM tbl").assert_is(exp.Summarize) self.validate_identity("CREATE TABLE tbl_summary AS SELECT * FROM (SUMMARIZE tbl)") self.validate_identity( + "SELECT species, island, COUNT(*) FROM t GROUP BY GROUPING SETS (species), GROUPING SETS (island)" + ) + self.validate_identity( + "SELECT species, island, COUNT(*) FROM t GROUP BY CUBE (species), CUBE (island)" + ) + self.validate_identity( + "SELECT species, island, COUNT(*) FROM t GROUP BY ROLLUP (species), ROLLUP (island)" + ) + self.validate_identity( "SUMMARIZE TABLE 'https://blobs.duckdb.org/data/Star_Trek-Season_1.csv'" ).assert_is(exp.Summarize) self.validate_identity( @@ -999,6 +1008,13 @@ class TestDuckDB(Validator): "duckdb": "SELECT * FROM example TABLESAMPLE RESERVOIR (3 ROWS) REPEATABLE (82)", }, ) + self.validate_all( + "SELECT * FROM (SELECT * FROM t) AS t1 TABLESAMPLE (1 ROWS), (SELECT * FROM t) AS t2 TABLESAMPLE (2 ROWS)", + write={ + "duckdb": "SELECT * FROM (SELECT * FROM t) AS t1 TABLESAMPLE RESERVOIR (1 ROWS), (SELECT * FROM t) AS t2 TABLESAMPLE RESERVOIR (2 ROWS)", + "spark": "SELECT * FROM (SELECT * FROM t) TABLESAMPLE (1 ROWS) AS t1, (SELECT * FROM t) TABLESAMPLE (2 ROWS) AS t2", + }, + ) def test_array(self): self.validate_identity("ARRAY(SELECT id FROM t)") @@ -1256,3 +1272,20 @@ class TestDuckDB(Validator): read={"bigquery": "SELECT @foo"}, write={"bigquery": "SELECT @foo", "duckdb": "SELECT $foo"}, ) + + def test_ignore_nulls(self): + # Note that DuckDB differentiates window functions (e.g. LEAD, LAG) from aggregate functions (e.g. SUM) + from sqlglot.dialects.duckdb import WINDOW_FUNCS_WITH_IGNORE_NULLS + + agg_funcs = (exp.Sum, exp.Max, exp.Min) + + for func_type in WINDOW_FUNCS_WITH_IGNORE_NULLS + agg_funcs: + func = func_type(this=exp.to_identifier("col")) + ignore_null = exp.IgnoreNulls(this=func) + windowed_ignore_null = exp.Window(this=ignore_null) + + if func_type in WINDOW_FUNCS_WITH_IGNORE_NULLS: + self.assertIn("IGNORE NULLS", windowed_ignore_null.sql("duckdb")) + else: + self.assertEqual(ignore_null.sql("duckdb"), func.sql("duckdb")) + self.assertNotIn("IGNORE NULLS", windowed_ignore_null.sql("duckdb")) diff --git a/tests/dialects/test_mysql.py b/tests/dialects/test_mysql.py index c598cb5..45b79bf 100644 --- a/tests/dialects/test_mysql.py +++ b/tests/dialects/test_mysql.py @@ -1,3 +1,6 @@ +import unittest +import sys + from sqlglot import expressions as exp from sqlglot.dialects.mysql import MySQL from tests.dialects.test_dialect import Validator @@ -637,6 +640,53 @@ class TestMySQL(Validator): }, ) + # No timezone, make sure DATETIME captures the correct precision + self.validate_identity( + "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15.123456+00:00')", + write_sql="SELECT CAST('2023-01-01 13:14:15.123456+00:00' AS DATETIME(6))", + ) + self.validate_identity( + "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15.123+00:00')", + write_sql="SELECT CAST('2023-01-01 13:14:15.123+00:00' AS DATETIME(3))", + ) + self.validate_identity( + "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15+00:00')", + write_sql="SELECT CAST('2023-01-01 13:14:15+00:00' AS DATETIME)", + ) + + # With timezone, make sure the TIMESTAMP constructor is used + # also TIMESTAMP doesnt have the subsecond precision truncation issue that DATETIME does so we dont need to TIMESTAMP(6) + self.validate_identity( + "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15-08:00', 'America/Los_Angeles')", + write_sql="SELECT TIMESTAMP('2023-01-01 13:14:15-08:00')", + ) + self.validate_identity( + "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15-08:00', 'America/Los_Angeles')", + write_sql="SELECT TIMESTAMP('2023-01-01 13:14:15-08:00')", + ) + + @unittest.skipUnless( + sys.version_info >= (3, 11), + "Python 3.11 relaxed datetime.fromisoformat() parsing with regards to microseconds", + ) + def test_mysql_time_python311(self): + self.validate_identity( + "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15.12345+00:00')", + write_sql="SELECT CAST('2023-01-01 13:14:15.12345+00:00' AS DATETIME(6))", + ) + self.validate_identity( + "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15.1234+00:00')", + write_sql="SELECT CAST('2023-01-01 13:14:15.1234+00:00' AS DATETIME(6))", + ) + self.validate_identity( + "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15.12+00:00')", + write_sql="SELECT CAST('2023-01-01 13:14:15.12+00:00' AS DATETIME(3))", + ) + self.validate_identity( + "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15.1+00:00')", + write_sql="SELECT CAST('2023-01-01 13:14:15.1+00:00' AS DATETIME(3))", + ) + def test_mysql(self): self.validate_all( "SELECT CONCAT('11', '22')", @@ -1192,3 +1242,24 @@ COMMENT='客户账户表'""" "mysql": f"DATE_ADD('0000-01-01 00:00:00', INTERVAL (TIMESTAMPDIFF({unit}, '0000-01-01 00:00:00', CAST('2001-02-16 20:38:40' AS DATETIME))) {unit})", }, ) + + def test_at_time_zone(self): + with self.assertLogs() as cm: + # Check AT TIME ZONE doesnt discard the column name and also raises a warning + self.validate_identity( + "SELECT foo AT TIME ZONE 'UTC'", + write_sql="SELECT foo", + ) + assert "AT TIME ZONE is not supported" in cm.output[0] + + def test_json_value(self): + json_doc = """'{"item": "shoes", "price": "49.95"}'""" + self.validate_identity(f"""SELECT JSON_VALUE({json_doc}, '$.price')""") + self.validate_identity( + f"""SELECT JSON_VALUE({json_doc}, '$.price' RETURNING DECIMAL(4, 2))""" + ) + + for on_option in ("NULL", "ERROR", "DEFAULT 1"): + self.validate_identity( + f"""SELECT JSON_VALUE({json_doc}, '$.price' RETURNING DECIMAL(4, 2) {on_option} ON EMPTY {on_option} ON ERROR) AS price""" + ) diff --git a/tests/dialects/test_oracle.py b/tests/dialects/test_oracle.py index 4813a49..77f46e4 100644 --- a/tests/dialects/test_oracle.py +++ b/tests/dialects/test_oracle.py @@ -251,7 +251,6 @@ class TestOracle(Validator): """SELECT * FROM t ORDER BY a ASC NULLS LAST, b ASC NULLS FIRST, c DESC NULLS LAST, d DESC NULLS FIRST""", """SELECT * FROM t ORDER BY a ASC, b ASC NULLS FIRST, c DESC NULLS LAST, d DESC""", ) - self.validate_all( "NVL(NULL, 1)", write={ @@ -260,6 +259,29 @@ class TestOracle(Validator): "clickhouse": "COALESCE(NULL, 1)", }, ) + self.validate_all( + "LTRIM('Hello World', 'H')", + write={ + "": "LTRIM('Hello World', 'H')", + "oracle": "LTRIM('Hello World', 'H')", + "clickhouse": "TRIM(LEADING 'H' FROM 'Hello World')", + }, + ) + self.validate_all( + "RTRIM('Hello World', 'd')", + write={ + "": "RTRIM('Hello World', 'd')", + "oracle": "RTRIM('Hello World', 'd')", + "clickhouse": "TRIM(TRAILING 'd' FROM 'Hello World')", + }, + ) + self.validate_all( + "TRIM(BOTH 'h' FROM 'Hello World')", + write={ + "oracle": "TRIM(BOTH 'h' FROM 'Hello World')", + "clickhouse": "TRIM(BOTH 'h' FROM 'Hello World')", + }, + ) def test_join_marker(self): self.validate_identity("SELECT e1.x, e2.x FROM e e1, e e2 WHERE e1.y (+) = e2.y") @@ -347,7 +369,7 @@ FROM warehouses, XMLTABLE( FROM XMLTABLE( 'ROWSET/ROW' PASSING - dbms_xmlgen.GETXMLTYPE('SELECT table_name, column_name, data_default FROM user_tab_columns') + dbms_xmlgen.getxmltype('SELECT table_name, column_name, data_default FROM user_tab_columns') COLUMNS table_name VARCHAR2(128) PATH '*[1]', column_name VARCHAR2(128) PATH '*[2]', diff --git a/tests/dialects/test_postgres.py b/tests/dialects/test_postgres.py index 075c274..c628db4 100644 --- a/tests/dialects/test_postgres.py +++ b/tests/dialects/test_postgres.py @@ -676,17 +676,25 @@ class TestPostgres(Validator): }, ) self.validate_all( - """'{"a":1,"b":2}'::json->'b'""", + "TRIM(BOTH 'as' FROM 'as string as')", write={ - "postgres": """CAST('{"a":1,"b":2}' AS JSON) -> 'b'""", - "redshift": """JSON_EXTRACT_PATH_TEXT('{"a":1,"b":2}', 'b')""", + "postgres": "TRIM(BOTH 'as' FROM 'as string as')", + "spark": "TRIM(BOTH 'as' FROM 'as string as')", }, ) + self.validate_identity( + """SELECT TRIM(LEADING ' XXX ' COLLATE "de_DE")""", + """SELECT LTRIM(' XXX ' COLLATE "de_DE")""", + ) + self.validate_identity( + """SELECT TRIM(TRAILING ' XXX ' COLLATE "de_DE")""", + """SELECT RTRIM(' XXX ' COLLATE "de_DE")""", + ) self.validate_all( - "TRIM(BOTH 'as' FROM 'as string as')", + """'{"a":1,"b":2}'::json->'b'""", write={ - "postgres": "TRIM(BOTH 'as' FROM 'as string as')", - "spark": "TRIM(BOTH 'as' FROM 'as string as')", + "postgres": """CAST('{"a":1,"b":2}' AS JSON) -> 'b'""", + "redshift": """JSON_EXTRACT_PATH_TEXT('{"a":1,"b":2}', 'b')""", }, ) self.validate_all( @@ -780,6 +788,13 @@ class TestPostgres(Validator): }, ) + self.validate_identity( + 'SELECT js, js IS JSON AS "json?", js IS JSON VALUE AS "scalar?", js IS JSON SCALAR AS "scalar?", js IS JSON OBJECT AS "object?", js IS JSON ARRAY AS "array?" FROM t' + ) + self.validate_identity( + 'SELECT js, js IS JSON ARRAY WITH UNIQUE KEYS AS "array w. UK?", js IS JSON ARRAY WITHOUT UNIQUE KEYS AS "array w/o UK?", js IS JSON ARRAY UNIQUE KEYS AS "array w UK 2?" FROM t' + ) + def test_ddl(self): # Checks that user-defined types are parsed into DataType instead of Identifier self.parse_one("CREATE TABLE t (a udt)").this.expressions[0].args["kind"].assert_is( diff --git a/tests/dialects/test_presto.py b/tests/dialects/test_presto.py index 950c89f..c8e616e 100644 --- a/tests/dialects/test_presto.py +++ b/tests/dialects/test_presto.py @@ -634,6 +634,7 @@ class TestPresto(Validator): }, ) + self.validate_identity("SELECT a FROM t GROUP BY a, ROLLUP (b), ROLLUP (c), ROLLUP (d)") self.validate_identity("SELECT a FROM test TABLESAMPLE BERNOULLI (50)") self.validate_identity("SELECT a FROM test TABLESAMPLE SYSTEM (75)") self.validate_identity("string_agg(x, ',')", "ARRAY_JOIN(ARRAY_AGG(x), ',')") @@ -715,9 +716,6 @@ class TestPresto(Validator): ) self.validate_all( "SELECT ROW(1, 2)", - read={ - "spark": "SELECT STRUCT(1, 2)", - }, write={ "presto": "SELECT ROW(1, 2)", "spark": "SELECT STRUCT(1, 2)", @@ -836,12 +834,6 @@ class TestPresto(Validator): }, ) self.validate_all( - "SELECT a FROM t GROUP BY a, ROLLUP(b), ROLLUP(c), ROLLUP(d)", - write={ - "presto": "SELECT a FROM t GROUP BY a, ROLLUP (b, c, d)", - }, - ) - self.validate_all( 'SELECT a."b" FROM "foo"', write={ "duckdb": 'SELECT a."b" FROM "foo"', diff --git a/tests/dialects/test_snowflake.py b/tests/dialects/test_snowflake.py index d01101e..7837cc9 100644 --- a/tests/dialects/test_snowflake.py +++ b/tests/dialects/test_snowflake.py @@ -598,12 +598,12 @@ WHERE self.validate_all( "DIV0(foo, bar)", write={ - "snowflake": "IFF(bar = 0, 0, foo / bar)", - "sqlite": "IIF(bar = 0, 0, CAST(foo AS REAL) / bar)", - "presto": "IF(bar = 0, 0, CAST(foo AS DOUBLE) / bar)", - "spark": "IF(bar = 0, 0, foo / bar)", - "hive": "IF(bar = 0, 0, foo / bar)", - "duckdb": "CASE WHEN bar = 0 THEN 0 ELSE foo / bar END", + "snowflake": "IFF(bar = 0 AND NOT foo IS NULL, 0, foo / bar)", + "sqlite": "IIF(bar = 0 AND NOT foo IS NULL, 0, CAST(foo AS REAL) / bar)", + "presto": "IF(bar = 0 AND NOT foo IS NULL, 0, CAST(foo AS DOUBLE) / bar)", + "spark": "IF(bar = 0 AND NOT foo IS NULL, 0, foo / bar)", + "hive": "IF(bar = 0 AND NOT foo IS NULL, 0, foo / bar)", + "duckdb": "CASE WHEN bar = 0 AND NOT foo IS NULL THEN 0 ELSE foo / bar END", }, ) self.validate_all( diff --git a/tests/dialects/test_spark.py b/tests/dialects/test_spark.py index a2d87df..cbaa169 100644 --- a/tests/dialects/test_spark.py +++ b/tests/dialects/test_spark.py @@ -10,6 +10,7 @@ class TestSpark(Validator): dialect = "spark" def test_ddl(self): + self.validate_identity("INSERT OVERWRITE TABLE db1.tb1 TABLE db2.tb2") self.validate_identity("CREATE TABLE foo AS WITH t AS (SELECT 1 AS col) SELECT col FROM t") self.validate_identity("CREATE TEMPORARY VIEW test AS SELECT 1") self.validate_identity("CREATE TABLE foo (col VARCHAR(50))") @@ -484,7 +485,7 @@ TBLPROPERTIES ( ) self.validate_all( "SELECT CAST(STRUCT('fooo') AS STRUCT<a: VARCHAR(2)>)", - write={"spark": "SELECT CAST(STRUCT('fooo') AS STRUCT<a: STRING>)"}, + write={"spark": "SELECT CAST(STRUCT('fooo' AS col1) AS STRUCT<a: STRING>)"}, ) self.validate_all( "SELECT CAST(123456 AS VARCHAR(3))", @@ -710,6 +711,29 @@ TBLPROPERTIES ( ) self.validate_identity("DESCRIBE schema.test PARTITION(ds = '2024-01-01')") + self.validate_all( + "SELECT ANY_VALUE(col, true), FIRST(col, true), FIRST_VALUE(col, true) OVER ()", + write={ + "duckdb": "SELECT ANY_VALUE(col), FIRST(col), FIRST_VALUE(col IGNORE NULLS) OVER ()" + }, + ) + + self.validate_all( + "SELECT STRUCT(1, 2)", + write={ + "spark": "SELECT STRUCT(1 AS col1, 2 AS col2)", + "presto": "SELECT CAST(ROW(1, 2) AS ROW(col1 INTEGER, col2 INTEGER))", + "duckdb": "SELECT {'col1': 1, 'col2': 2}", + }, + ) + self.validate_all( + "SELECT STRUCT(x, 1, y AS col3, STRUCT(5)) FROM t", + write={ + "spark": "SELECT STRUCT(x AS x, 1 AS col2, y AS col3, STRUCT(5 AS col1) AS col4) FROM t", + "duckdb": "SELECT {'x': x, 'col2': 1, 'col3': y, 'col4': {'col1': 5}} FROM t", + }, + ) + def test_bool_or(self): self.validate_all( "SELECT a, LOGICAL_OR(b) FROM table GROUP BY a", diff --git a/tests/dialects/test_sqlite.py b/tests/dialects/test_sqlite.py index 46bbadc..f2c9802 100644 --- a/tests/dialects/test_sqlite.py +++ b/tests/dialects/test_sqlite.py @@ -91,6 +91,10 @@ class TestSQLite(Validator): read={"snowflake": "LEAST(x, y, z)"}, write={"snowflake": "LEAST(x, y, z)"}, ) + self.validate_identity( + "SELECT * FROM station WHERE city IS NOT ''", + "SELECT * FROM station WHERE NOT city IS ''", + ) def test_strftime(self): self.validate_identity("SELECT STRFTIME('%Y/%m/%d', 'now')") diff --git a/tests/dialects/test_starrocks.py b/tests/dialects/test_starrocks.py index 67aabb9..fa9a2cc 100644 --- a/tests/dialects/test_starrocks.py +++ b/tests/dialects/test_starrocks.py @@ -7,6 +7,7 @@ class TestStarrocks(Validator): def test_identity(self): self.validate_identity("SELECT CAST(`a`.`b` AS INT) FROM foo") self.validate_identity("SELECT APPROX_COUNT_DISTINCT(a) FROM x") + self.validate_identity("SELECT [1, 2, 3]") def test_time(self): self.validate_identity("TIMESTAMP('2022-01-01')") @@ -28,3 +29,24 @@ class TestStarrocks(Validator): "mysql": "SELECT REGEXP_LIKE(abc, '%foo%')", }, ) + + def test_unnest(self): + self.validate_identity( + "SELECT student, score, t.unnest FROM tests CROSS JOIN LATERAL UNNEST(scores) AS t", + "SELECT student, score, t.unnest FROM tests CROSS JOIN LATERAL UNNEST(scores) AS t(unnest)", + ) + + lateral_explode_sqls = [ + "SELECT id, t.col FROM tbl, UNNEST(scores) AS t(col)", + "SELECT id, t.col FROM tbl CROSS JOIN LATERAL UNNEST(scores) AS t(col)", + ] + + for sql in lateral_explode_sqls: + with self.subTest(f"Testing Starrocks roundtrip & transpilation of: {sql}"): + self.validate_all( + sql, + write={ + "starrocks": sql, + "spark": "SELECT id, t.col FROM tbl LATERAL VIEW EXPLODE(scores) t AS col", + }, + ) diff --git a/tests/dialects/test_tsql.py b/tests/dialects/test_tsql.py index ad18981..ecb83da 100644 --- a/tests/dialects/test_tsql.py +++ b/tests/dialects/test_tsql.py @@ -32,6 +32,9 @@ class TestTSQL(Validator): self.validate_identity("CAST(x AS int) OR y", "CAST(x AS INTEGER) <> 0 OR y <> 0") self.validate_identity("TRUNCATE TABLE t1 WITH (PARTITIONS(1, 2 TO 5, 10 TO 20, 84))") self.validate_identity( + "SELECT TOP 10 s.RECORDID, n.c.value('(/*:FORM_ROOT/*:SOME_TAG)[1]', 'float') AS SOME_TAG_VALUE FROM source_table.dbo.source_data AS s(nolock) CROSS APPLY FormContent.nodes('/*:FORM_ROOT') AS N(C)" + ) + self.validate_identity( "CREATE CLUSTERED INDEX [IX_OfficeTagDetail_TagDetailID] ON [dbo].[OfficeTagDetail]([TagDetailID] ASC)" ) self.validate_identity( @@ -412,6 +415,12 @@ class TestTSQL(Validator): }, ) + # Check that TRUE and FALSE dont get expanded to (1=1) or (1=0) when used in a VALUES expression + self.validate_identity( + "SELECT val FROM (VALUES ((TRUE), (FALSE), (NULL))) AS t(val)", + write_sql="SELECT val FROM (VALUES ((1), (0), (NULL))) AS t(val)", + ) + def test_option(self): possible_options = [ "HASH GROUP", @@ -1577,8 +1586,8 @@ WHERE self.validate_all( "SELECT t.x, y.z FROM x OUTER APPLY a.b.tvfTest(t.x)y(z)", write={ - "spark": "SELECT t.x, y.z FROM x LEFT JOIN LATERAL a.b.TVFTEST(t.x) AS y(z)", - "tsql": "SELECT t.x, y.z FROM x OUTER APPLY a.b.TVFTEST(t.x) AS y(z)", + "spark": "SELECT t.x, y.z FROM x LEFT JOIN LATERAL a.b.tvfTest(t.x) AS y(z)", + "tsql": "SELECT t.x, y.z FROM x OUTER APPLY a.b.tvfTest(t.x) AS y(z)", }, ) diff --git a/tests/fixtures/optimizer/qualify_columns.sql b/tests/fixtures/optimizer/qualify_columns.sql index 7d58908..68c0caa 100644 --- a/tests/fixtures/optimizer/qualify_columns.sql +++ b/tests/fixtures/optimizer/qualify_columns.sql @@ -632,6 +632,7 @@ SELECT x.a + 1 AS i, x.a + 1 + 1 AS j, x.a + 1 + 1 + 1 AS k FROM x AS x; # title: noop - reference comes before alias # execute: false +# validate_qualify_columns: false SELECT i + 1 AS j, x.a + 1 AS i FROM x; SELECT i + 1 AS j, x.a + 1 AS i FROM x AS x; @@ -704,3 +705,6 @@ SELECT _q_0.a AS a, _q_0.b AS b, _q_1.b AS b, _q_1.c AS c FROM ((SELECT x.a AS a SELECT b FROM ((SELECT a FROM x) INNER JOIN y ON a = b); SELECT y.b AS b FROM ((SELECT x.a AS a FROM x AS x) AS _q_0 INNER JOIN y AS y ON _q_0.a = y.b); + +SELECT a, c FROM x TABLESAMPLE SYSTEM (10 ROWS) CROSS JOIN y TABLESAMPLE SYSTEM (10 ROWS); +SELECT x.a AS a, y.c AS c FROM x AS x TABLESAMPLE SYSTEM (10 ROWS) CROSS JOIN y AS y TABLESAMPLE SYSTEM (10 ROWS); diff --git a/tests/fixtures/optimizer/qualify_tables.sql b/tests/fixtures/optimizer/qualify_tables.sql index 4b7d33d..61d0b96 100644 --- a/tests/fixtures/optimizer/qualify_tables.sql +++ b/tests/fixtures/optimizer/qualify_tables.sql @@ -180,3 +180,7 @@ SELECT a.id, a_2.id FROM cat.db1.a AS a JOIN c.db2.a AS a_2 ON a.id = a_2.id; COPY INTO (SELECT * FROM x) TO 'data' WITH (FORMAT 'CSV'); COPY INTO (SELECT * FROM c.db.x AS x) TO 'data' WITH (FORMAT 'CSV'); + +# title: tablesample +SELECT 1 FROM x TABLESAMPLE SYSTEM (10 PERCENT) CROSS JOIN y TABLESAMPLE SYSTEM (10 PERCENT); +SELECT 1 FROM c.db.x AS x TABLESAMPLE SYSTEM (10 PERCENT) CROSS JOIN c.db.y AS y TABLESAMPLE SYSTEM (10 PERCENT); diff --git a/tests/fixtures/optimizer/simplify.sql b/tests/fixtures/optimizer/simplify.sql index 6035ee6..ab8f963 100644 --- a/tests/fixtures/optimizer/simplify.sql +++ b/tests/fixtures/optimizer/simplify.sql @@ -85,6 +85,27 @@ NULL; NULL = NULL; NULL; +1 AND 0; +FALSE; + +0 AND 1; +FALSE; + +0 OR 1; +TRUE; + +0 OR NULL; +NULL; + +NULL OR 0; +NULL; + +0 AND NULL; +FALSE; + +NULL AND 0; +FALSE; + -- Can't optimize this because different engines do different things -- mysql converts to 0 and 1 but tsql does true and false NULL <=> NULL; diff --git a/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql b/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql index 5b004fa..7bbeb27 100644 --- a/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql +++ b/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql @@ -852,10 +852,10 @@ SELECT SUM("x"."profit") AS "profit" FROM "x" AS "x" GROUP BY -ROLLUP ( - "x"."channel", - "x"."id" -) + ROLLUP ( + "x"."channel", + "x"."id" + ) ORDER BY "channel", "id" @@ -2294,12 +2294,12 @@ SELECT SUM("y"."number_sales") AS "_col_5" FROM "y" AS "y" GROUP BY -ROLLUP ( - "y"."channel", - "y"."i_brand_id", - "y"."i_class_id", - "y"."i_category_id" -) + ROLLUP ( + "y"."channel", + "y"."i_brand_id", + "y"."i_class_id", + "y"."i_category_id" + ) ORDER BY "channel", "i_brand_id", @@ -2608,12 +2608,12 @@ JOIN "customer_address" AS "customer_address" ON "customer"."c_current_addr_sk" = "customer_address"."ca_address_sk" AND "customer_address"."ca_state" IN ('KS', 'IA', 'AL', 'UT', 'VA', 'NC', 'TX') GROUP BY -ROLLUP ( - "item"."i_item_id", - "customer_address"."ca_country", - "customer_address"."ca_state", - "customer_address"."ca_county" -) + ROLLUP ( + "item"."i_item_id", + "customer_address"."ca_country", + "customer_address"."ca_state", + "customer_address"."ca_county" + ) ORDER BY "ca_country", "ca_state", @@ -2876,12 +2876,12 @@ JOIN "item" AS "item" JOIN "warehouse" AS "warehouse" ON "inventory"."inv_warehouse_sk" = "warehouse"."w_warehouse_sk" GROUP BY -ROLLUP ( - "item"."i_product_name", - "item"."i_brand", - "item"."i_class", - "item"."i_category" -) + ROLLUP ( + "item"."i_product_name", + "item"."i_brand", + "item"."i_class", + "item"."i_category" + ) ORDER BY "qoh", "i_product_name", @@ -3347,10 +3347,10 @@ JOIN "store" AS "store" ON "store"."s_state" IN ('TN', 'TN', 'TN', 'TN', 'TN', 'TN') AND "store"."s_store_sk" = "store_sales"."ss_store_sk" GROUP BY -ROLLUP ( - "item"."i_item_id", - "store"."s_state" -) + ROLLUP ( + "item"."i_item_id", + "store"."s_state" + ) ORDER BY "i_item_id", "s_state" @@ -4463,10 +4463,10 @@ JOIN "store" AS "store" ON "store"."s_state" IN ('TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN') AND "store"."s_store_sk" = "store_sales"."ss_store_sk" GROUP BY -ROLLUP ( - "item"."i_category", - "item"."i_class" -) + ROLLUP ( + "item"."i_category", + "item"."i_class" + ) ORDER BY "lochierarchy" DESC, CASE WHEN "lochierarchy" = 0 THEN "i_category" END, @@ -9017,16 +9017,16 @@ WITH "dw1" AS ( JOIN "store" AS "store" ON "store"."s_store_sk" = "store_sales"."ss_store_sk" GROUP BY - ROLLUP ( - "item"."i_category", - "item"."i_class", - "item"."i_brand", - "item"."i_product_name", - "date_dim"."d_year", - "date_dim"."d_qoy", - "date_dim"."d_moy", - "store"."s_store_id" - ) + ROLLUP ( + "item"."i_category", + "item"."i_class", + "item"."i_brand", + "item"."i_product_name", + "date_dim"."d_year", + "date_dim"."d_qoy", + "date_dim"."d_moy", + "store"."s_store_id" + ) ), "dw2" AS ( SELECT "dw1"."i_category" AS "i_category", @@ -9396,10 +9396,10 @@ LEFT JOIN "_u_0" AS "_u_0" WHERE NOT "_u_0"."s_state" IS NULL GROUP BY -ROLLUP ( - "store"."s_state", - "store"."s_county" -) + ROLLUP ( + "store"."s_state", + "store"."s_county" + ) ORDER BY "lochierarchy" DESC, CASE WHEN "lochierarchy" = 0 THEN "s_state" END, @@ -10433,10 +10433,10 @@ SELECT SUM("x"."profit") AS "profit" FROM "x" AS "x" GROUP BY -ROLLUP ( - "x"."channel", - "x"."id" -) + ROLLUP ( + "x"."channel", + "x"."id" + ) ORDER BY "channel", "id" @@ -10937,10 +10937,10 @@ SELECT SUM("x"."profit") AS "profit" FROM "x" AS "x" GROUP BY -ROLLUP ( - "x"."channel", - "x"."id" -) + ROLLUP ( + "x"."channel", + "x"."id" + ) ORDER BY "channel", "id" @@ -11539,10 +11539,10 @@ JOIN "date_dim" AS "d1" JOIN "item" AS "item" ON "item"."i_item_sk" = "web_sales"."ws_item_sk" GROUP BY -ROLLUP ( - "item"."i_category", - "item"."i_class" -) + ROLLUP ( + "item"."i_category", + "item"."i_class" + ) ORDER BY "lochierarchy" DESC, CASE WHEN "lochierarchy" = 0 THEN "i_category" END, diff --git a/tests/fixtures/pretty.sql b/tests/fixtures/pretty.sql index d4eb3b9..8e10517 100644 --- a/tests/fixtures/pretty.sql +++ b/tests/fixtures/pretty.sql @@ -99,14 +99,14 @@ WITH cte1 AS ( FOO(CASE WHEN a AND b THEN c AND d ELSE 3 END) GROUP BY x, - GROUPING SETS ( - a, - (b, c) - ), - CUBE ( - y, - z - ) + GROUPING SETS ( + a, + (b, c) + ), + CUBE ( + y, + z + ) ) AS x ) SELECT diff --git a/tests/test_generator.py b/tests/test_generator.py index e21aa25..7609831 100644 --- a/tests/test_generator.py +++ b/tests/test_generator.py @@ -1,4 +1,3 @@ -import time import unittest from sqlglot import exp, parse_one @@ -46,8 +45,5 @@ class TestGenerator(unittest.TestCase): assert parse_one("X as 1").sql(identify="safe") == 'X AS "1"' def test_generate_nested_binary(self): - sql = "SELECT 'foo'" + (" || 'foo'" * 500) - - now = time.time() - self.assertEqual(parse_one(sql).sql(), sql) - self.assertLessEqual(time.time() - now, 0.1) + sql = "SELECT 'foo'" + (" || 'foo'" * 1000) + self.assertEqual(parse_one(sql).sql(copy=False), sql) diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py index c746a78..d6e11a9 100644 --- a/tests/test_optimizer.py +++ b/tests/test_optimizer.py @@ -27,11 +27,11 @@ def parse_and_optimize(func, sql, read_dialect, **kwargs): return func(parse_one(sql, read=read_dialect), **kwargs) -def qualify_columns(expression, **kwargs): +def qualify_columns(expression, validate_qualify_columns=True, **kwargs): expression = optimizer.qualify.qualify( expression, infer_schema=True, - validate_qualify_columns=False, + validate_qualify_columns=validate_qualify_columns, identify=False, **kwargs, ) @@ -135,11 +135,17 @@ class TestOptimizer(unittest.TestCase): continue dialect = meta.get("dialect") leave_tables_isolated = meta.get("leave_tables_isolated") + validate_qualify_columns = meta.get("validate_qualify_columns") func_kwargs = {**kwargs} if leave_tables_isolated is not None: func_kwargs["leave_tables_isolated"] = string_to_bool(leave_tables_isolated) + if validate_qualify_columns is not None: + func_kwargs["validate_qualify_columns"] = string_to_bool( + validate_qualify_columns + ) + if set_dialect and dialect: func_kwargs["dialect"] = dialect diff --git a/tests/test_time.py b/tests/test_time.py index bd0e63f..62d4361 100644 --- a/tests/test_time.py +++ b/tests/test_time.py @@ -1,6 +1,7 @@ import unittest +import sys -from sqlglot.time import format_time +from sqlglot.time import format_time, subsecond_precision class TestTime(unittest.TestCase): @@ -12,3 +13,22 @@ class TestTime(unittest.TestCase): self.assertEqual(format_time("aa", mapping), "c") self.assertEqual(format_time("aaada", mapping), "cbdb") self.assertEqual(format_time("da", mapping), "db") + + def test_subsecond_precision(self): + self.assertEqual(6, subsecond_precision("2023-01-01 12:13:14.123456+00:00")) + self.assertEqual(3, subsecond_precision("2023-01-01 12:13:14.123+00:00")) + self.assertEqual(0, subsecond_precision("2023-01-01 12:13:14+00:00")) + self.assertEqual(0, subsecond_precision("2023-01-01 12:13:14")) + self.assertEqual(0, subsecond_precision("garbage")) + + @unittest.skipUnless( + sys.version_info >= (3, 11), + "Python 3.11 relaxed datetime.fromisoformat() parsing with regards to microseconds", + ) + def test_subsecond_precision_python311(self): + # ref: https://docs.python.org/3/whatsnew/3.11.html#datetime + self.assertEqual(6, subsecond_precision("2023-01-01 12:13:14.123456789+00:00")) + self.assertEqual(6, subsecond_precision("2023-01-01 12:13:14.12345+00:00")) + self.assertEqual(6, subsecond_precision("2023-01-01 12:13:14.1234+00:00")) + self.assertEqual(3, subsecond_precision("2023-01-01 12:13:14.12+00:00")) + self.assertEqual(3, subsecond_precision("2023-01-01 12:13:14.1+00:00")) |