diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2025-01-27 16:31:23 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2025-01-27 16:31:23 +0000 |
commit | 886e522786b50d8cb126baacbd1eaf92861832ec (patch) | |
tree | 6d9736731779fe0f981e8a3692c9eeb37e40fbf3 /tests | |
parent | Releasing debian version 26.2.1-1. (diff) | |
download | sqlglot-886e522786b50d8cb126baacbd1eaf92861832ec.tar.xz sqlglot-886e522786b50d8cb126baacbd1eaf92861832ec.zip |
Merging upstream version 26.3.8.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | tests/dialects/test_athena.py | 13 | ||||
-rw-r--r-- | tests/dialects/test_bigquery.py | 42 | ||||
-rw-r--r-- | tests/dialects/test_clickhouse.py | 12 | ||||
-rw-r--r-- | tests/dialects/test_dialect.py | 206 | ||||
-rw-r--r-- | tests/dialects/test_duckdb.py | 7 | ||||
-rw-r--r-- | tests/dialects/test_hive.py | 5 | ||||
-rw-r--r-- | tests/dialects/test_oracle.py | 9 | ||||
-rw-r--r-- | tests/dialects/test_postgres.py | 9 | ||||
-rw-r--r-- | tests/dialects/test_presto.py | 16 | ||||
-rw-r--r-- | tests/dialects/test_snowflake.py | 49 | ||||
-rw-r--r-- | tests/dialects/test_sqlite.py | 1 | ||||
-rw-r--r-- | tests/dialects/test_trino.py | 7 | ||||
-rw-r--r-- | tests/dialects/test_tsql.py | 10 | ||||
-rw-r--r-- | tests/fixtures/optimizer/qualify_columns.sql | 50 | ||||
-rw-r--r-- | tests/fixtures/optimizer/qualify_columns__invalid.sql | 1 | ||||
-rw-r--r-- | tests/fixtures/optimizer/simplify.sql | 3 | ||||
-rw-r--r-- | tests/test_transpile.py | 2 |
17 files changed, 372 insertions, 70 deletions
diff --git a/tests/dialects/test_athena.py b/tests/dialects/test_athena.py index ef96938..c5ba4cc 100644 --- a/tests/dialects/test_athena.py +++ b/tests/dialects/test_athena.py @@ -198,7 +198,8 @@ class TestAthena(Validator): def test_ctas(self): # Hive tables use 'external_location' to specify the table location, Iceberg tables use 'location' to specify the table location - # The 'table_type' property is used to determine if it's a Hive or an Iceberg table + # In addition, Hive tables used 'partitioned_by' to specify the partition fields and Iceberg tables use 'partitioning' to specify the partition fields + # The 'table_type' property is used to determine if it's a Hive or an Iceberg table. If it's omitted, it defaults to Hive # ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties ctas_hive = exp.Create( this=exp.to_table("foo.bar"), @@ -207,13 +208,16 @@ class TestAthena(Validator): expressions=[ exp.FileFormatProperty(this=exp.Literal.string("parquet")), exp.LocationProperty(this=exp.Literal.string("s3://foo")), + exp.PartitionedByProperty( + this=exp.Schema(expressions=[exp.to_column("partition_col")]) + ), ] ), expression=exp.select("1"), ) self.assertEqual( ctas_hive.sql(dialect=self.dialect, identify=True), - "CREATE TABLE \"foo\".\"bar\" WITH (format='parquet', external_location='s3://foo') AS SELECT 1", + "CREATE TABLE \"foo\".\"bar\" WITH (format='parquet', external_location='s3://foo', partitioned_by=ARRAY['partition_col']) AS SELECT 1", ) ctas_iceberg = exp.Create( @@ -223,11 +227,14 @@ class TestAthena(Validator): expressions=[ exp.Property(this=exp.var("table_type"), value=exp.Literal.string("iceberg")), exp.LocationProperty(this=exp.Literal.string("s3://foo")), + exp.PartitionedByProperty( + this=exp.Schema(expressions=[exp.to_column("partition_col")]) + ), ] ), expression=exp.select("1"), ) self.assertEqual( ctas_iceberg.sql(dialect=self.dialect, identify=True), - "CREATE TABLE \"foo\".\"bar\" WITH (table_type='iceberg', location='s3://foo') AS SELECT 1", + "CREATE TABLE \"foo\".\"bar\" WITH (table_type='iceberg', location='s3://foo', partitioning=ARRAY['partition_col']) AS SELECT 1", ) diff --git a/tests/dialects/test_bigquery.py b/tests/dialects/test_bigquery.py index dbe4401..15372fc 100644 --- a/tests/dialects/test_bigquery.py +++ b/tests/dialects/test_bigquery.py @@ -1572,14 +1572,30 @@ WHERE "SAFE_DIVIDE(x, y)", write={ "bigquery": "SAFE_DIVIDE(x, y)", - "duckdb": "IF((y) <> 0, (x) / (y), NULL)", - "presto": "IF((y) <> 0, (x) / (y), NULL)", - "trino": "IF((y) <> 0, (x) / (y), NULL)", - "hive": "IF((y) <> 0, (x) / (y), NULL)", - "spark2": "IF((y) <> 0, (x) / (y), NULL)", - "spark": "IF((y) <> 0, (x) / (y), NULL)", - "databricks": "IF((y) <> 0, (x) / (y), NULL)", - "snowflake": "IFF((y) <> 0, (x) / (y), NULL)", + "duckdb": "CASE WHEN y <> 0 THEN x / y ELSE NULL END", + "presto": "IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)", + "trino": "IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)", + "hive": "IF(y <> 0, x / y, NULL)", + "spark2": "IF(y <> 0, x / y, NULL)", + "spark": "IF(y <> 0, x / y, NULL)", + "databricks": "IF(y <> 0, x / y, NULL)", + "snowflake": "IFF(y <> 0, x / y, NULL)", + "postgres": "CASE WHEN y <> 0 THEN CAST(x AS DOUBLE PRECISION) / y ELSE NULL END", + }, + ) + self.validate_all( + "SAFE_DIVIDE(x + 1, 2 * y)", + write={ + "bigquery": "SAFE_DIVIDE(x + 1, 2 * y)", + "duckdb": "CASE WHEN (2 * y) <> 0 THEN (x + 1) / (2 * y) ELSE NULL END", + "presto": "IF((2 * y) <> 0, CAST((x + 1) AS DOUBLE) / (2 * y), NULL)", + "trino": "IF((2 * y) <> 0, CAST((x + 1) AS DOUBLE) / (2 * y), NULL)", + "hive": "IF((2 * y) <> 0, (x + 1) / (2 * y), NULL)", + "spark2": "IF((2 * y) <> 0, (x + 1) / (2 * y), NULL)", + "spark": "IF((2 * y) <> 0, (x + 1) / (2 * y), NULL)", + "databricks": "IF((2 * y) <> 0, (x + 1) / (2 * y), NULL)", + "snowflake": "IFF((2 * y) <> 0, (x + 1) / (2 * y), NULL)", + "postgres": "CASE WHEN (2 * y) <> 0 THEN CAST((x + 1) AS DOUBLE PRECISION) / (2 * y) ELSE NULL END", }, ) self.validate_all( @@ -1591,11 +1607,11 @@ WHERE }, ) self.validate_all( - "SELECT STRPOS('foo@example.com', '@')", + "SELECT INSTR('foo@example.com', '@')", write={ - "bigquery": "SELECT STRPOS('foo@example.com', '@')", + "bigquery": "SELECT INSTR('foo@example.com', '@')", "duckdb": "SELECT STRPOS('foo@example.com', '@')", - "snowflake": "SELECT POSITION('@', 'foo@example.com')", + "snowflake": "SELECT CHARINDEX('@', 'foo@example.com')", }, ) self.validate_all( @@ -2239,8 +2255,8 @@ OPTIONS ( r"REGEXP_EXTRACT(svc_plugin_output, '\\\\\\((.*)')", ) self.validate_identity( - r"REGEXP_SUBSTR(value, pattern, position, occurence)", - r"REGEXP_EXTRACT(value, pattern, position, occurence)", + r"REGEXP_SUBSTR(value, pattern, position, occurrence)", + r"REGEXP_EXTRACT(value, pattern, position, occurrence)", ) self.validate_all( diff --git a/tests/dialects/test_clickhouse.py b/tests/dialects/test_clickhouse.py index 3bff6e2..a4ee23c 100644 --- a/tests/dialects/test_clickhouse.py +++ b/tests/dialects/test_clickhouse.py @@ -85,8 +85,8 @@ class TestClickhouse(Validator): self.validate_identity("SELECT exponentialTimeDecayedAvg(60)(a, b)") self.validate_identity("levenshteinDistance(col1, col2)", "editDistance(col1, col2)") self.validate_identity("SELECT * FROM foo WHERE x GLOBAL IN (SELECT * FROM bar)") - self.validate_identity("position(haystack, needle)") - self.validate_identity("position(haystack, needle, position)") + self.validate_identity("POSITION(haystack, needle)") + self.validate_identity("POSITION(haystack, needle, position)") self.validate_identity("CAST(x AS DATETIME)", "CAST(x AS DateTime)") self.validate_identity("CAST(x AS TIMESTAMPTZ)", "CAST(x AS DateTime)") self.validate_identity("CAST(x as MEDIUMINT)", "CAST(x AS Int32)") @@ -398,9 +398,8 @@ class TestClickhouse(Validator): "clickhouse": "SELECT quantileIf(0.5)(a, TRUE)", }, ) - self.validate_all( - "SELECT position(needle IN haystack)", - write={"clickhouse": "SELECT position(haystack, needle)"}, + self.validate_identity( + "SELECT POSITION(needle IN haystack)", "SELECT POSITION(haystack, needle)" ) self.validate_identity( "SELECT * FROM x LIMIT 10 SETTINGS max_results = 100, result = 'break'" @@ -742,6 +741,9 @@ class TestClickhouse(Validator): "CREATE TABLE a ENGINE=Memory AS SELECT 1 AS c COMMENT 'foo'", "CREATE TABLE a ENGINE=Memory AS (SELECT 1 AS c) COMMENT 'foo'", ) + self.validate_identity( + 'CREATE TABLE t1 ("x" UInt32, "y" Dynamic, "z" Dynamic(max_types = 10)) ENGINE=MergeTree ORDER BY x' + ) self.validate_all( "CREATE DATABASE x", diff --git a/tests/dialects/test_dialect.py b/tests/dialects/test_dialect.py index 1573329..d408f90 100644 --- a/tests/dialects/test_dialect.py +++ b/tests/dialects/test_dialect.py @@ -314,7 +314,7 @@ class TestDialect(Validator): "materialize": "CAST(a AS SMALLINT)", "mysql": "CAST(a AS SIGNED)", "hive": "CAST(a AS SMALLINT)", - "oracle": "CAST(a AS NUMBER)", + "oracle": "CAST(a AS SMALLINT)", "postgres": "CAST(a AS SMALLINT)", "presto": "CAST(a AS SMALLINT)", "redshift": "CAST(a AS SMALLINT)", @@ -374,10 +374,10 @@ class TestDialect(Validator): "mysql": "TIMESTAMP(a)", }, ) - self.validate_all("CAST(a AS TINYINT)", write={"oracle": "CAST(a AS NUMBER)"}) - self.validate_all("CAST(a AS SMALLINT)", write={"oracle": "CAST(a AS NUMBER)"}) - self.validate_all("CAST(a AS BIGINT)", write={"oracle": "CAST(a AS NUMBER)"}) - self.validate_all("CAST(a AS INT)", write={"oracle": "CAST(a AS NUMBER)"}) + self.validate_all("CAST(a AS TINYINT)", write={"oracle": "CAST(a AS SMALLINT)"}) + self.validate_all("CAST(a AS SMALLINT)", write={"oracle": "CAST(a AS SMALLINT)"}) + self.validate_all("CAST(a AS BIGINT)", write={"oracle": "CAST(a AS INT)"}) + self.validate_all("CAST(a AS INT)", write={"oracle": "CAST(a AS INT)"}) self.validate_all( "CAST(a AS DECIMAL)", read={"oracle": "CAST(a AS NUMBER)"}, @@ -1682,42 +1682,202 @@ class TestDialect(Validator): }, ) self.validate_all( - "POSITION(needle IN haystack)", - write={ + "STR_POSITION(haystack, needle)", + read={ + "athena": "POSITION(needle in haystack)", + "clickhouse": "POSITION(needle in haystack)", + "databricks": "POSITION(needle in haystack)", + "drill": "POSITION(needle in haystack)", + "duckdb": "POSITION(needle in haystack)", + "materialize": "POSITION(needle in haystack)", + "mysql": "POSITION(needle in haystack)", + "postgres": "POSITION(needle in haystack)", + "presto": "POSITION(needle in haystack)", + "redshift": "POSITION(needle in haystack)", + "risingwave": "POSITION(needle in haystack)", + "snowflake": "POSITION(needle in haystack)", + "spark": "POSITION(needle in haystack)", + "spark2": "POSITION(needle in haystack)", + "teradata": "POSITION(needle in haystack)", + "trino": "POSITION(needle in haystack)", + }, + ) + self.validate_all( + "STR_POSITION(haystack, needle)", + read={ + "clickhouse": "POSITION(haystack, needle)", + "databricks": "POSITION(needle, haystack)", + "snowflake": "POSITION(needle, haystack)", + "spark2": "POSITION(needle, haystack)", + }, + ) + self.validate_all( + "STR_POSITION(haystack, needle)", + read={ + "athena": "STRPOS(haystack, needle)", + "bigquery": "STRPOS(haystack, needle)", "drill": "STRPOS(haystack, needle)", "duckdb": "STRPOS(haystack, needle)", "postgres": "STRPOS(haystack, needle)", "presto": "STRPOS(haystack, needle)", - "spark": "LOCATE(needle, haystack)", - "clickhouse": "position(haystack, needle)", - "snowflake": "POSITION(needle, haystack)", + "redshift": "STRPOS(haystack, needle)", + "trino": "STRPOS(haystack, needle)", + }, + ) + self.validate_all( + "STR_POSITION(haystack, needle)", + read={ + "bigquery": "INSTR(haystack, needle)", + "databricks": "INSTR(haystack, needle)", + "doris": "INSTR(haystack, needle)", + "duckdb": "INSTR(haystack, needle)", + "hive": "INSTR(haystack, needle)", + "mysql": "INSTR(haystack, needle)", + "oracle": "INSTR(haystack, needle)", + "spark": "INSTR(haystack, needle)", + "spark2": "INSTR(haystack, needle)", + "sqlite": "INSTR(haystack, needle)", + "starrocks": "INSTR(haystack, needle)", + "teradata": "INSTR(haystack, needle)", + }, + ) + self.validate_all( + "STR_POSITION(haystack, needle)", + read={ + "clickhouse": "LOCATE(needle, haystack)", + "databricks": "LOCATE(needle, haystack)", + "doris": "LOCATE(needle, haystack)", + "hive": "LOCATE(needle, haystack)", "mysql": "LOCATE(needle, haystack)", + "spark": "LOCATE(needle, haystack)", + "spark2": "LOCATE(needle, haystack)", + "starrocks": "LOCATE(needle, haystack)", + "teradata": "LOCATE(needle, haystack)", }, ) self.validate_all( "STR_POSITION(haystack, needle)", + read={ + "athena": "CHARINDEX(needle, haystack)", + "databricks": "CHARINDEX(needle, haystack)", + "snowflake": "CHARINDEX(needle, haystack)", + "tsql": "CHARINDEX(needle, haystack)", + }, + ) + self.validate_all( + "STR_POSITION(haystack, needle)", + read={ + "tableau": "FIND(haystack, needle)", + }, write={ + "athena": "STRPOS(haystack, needle)", + "bigquery": "INSTR(haystack, needle)", + "clickhouse": "POSITION(haystack, needle)", + "databricks": "LOCATE(needle, haystack)", + "doris": "LOCATE(needle, haystack)", "drill": "STRPOS(haystack, needle)", "duckdb": "STRPOS(haystack, needle)", - "postgres": "STRPOS(haystack, needle)", + "hive": "LOCATE(needle, haystack)", + "materialize": "POSITION(needle IN haystack)", + "mysql": "LOCATE(needle, haystack)", + "oracle": "INSTR(haystack, needle)", + "postgres": "POSITION(needle IN haystack)", "presto": "STRPOS(haystack, needle)", - "bigquery": "STRPOS(haystack, needle)", + "redshift": "POSITION(needle IN haystack)", + "risingwave": "POSITION(needle IN haystack)", + "snowflake": "CHARINDEX(needle, haystack)", "spark": "LOCATE(needle, haystack)", - "clickhouse": "position(haystack, needle)", - "snowflake": "POSITION(needle, haystack)", - "mysql": "LOCATE(needle, haystack)", + "spark2": "LOCATE(needle, haystack)", + "sqlite": "INSTR(haystack, needle)", + "tableau": "FIND(haystack, needle)", + "teradata": "INSTR(haystack, needle)", + "trino": "STRPOS(haystack, needle)", + "tsql": "CHARINDEX(needle, haystack)", }, ) self.validate_all( - "POSITION(needle, haystack, pos)", + "STR_POSITION(haystack, needle, position)", + read={ + "clickhouse": "POSITION(haystack, needle, position)", + "databricks": "POSITION(needle, haystack, position)", + "snowflake": "POSITION(needle, haystack, position)", + "spark": "POSITION(needle, haystack, position)", + "spark2": "POSITION(needle, haystack, position)", + }, + ) + self.validate_all( + "STR_POSITION(haystack, needle, position)", + read={ + "doris": "LOCATE(needle, haystack, position)", + "hive": "LOCATE(needle, haystack, position)", + "mysql": "LOCATE(needle, haystack, position)", + "spark": "LOCATE(needle, haystack, position)", + "spark2": "LOCATE(needle, haystack, position)", + "starrocks": "LOCATE(needle, haystack, position)", + "teradata": "LOCATE(needle, haystack, position)", + "clickhouse": "LOCATE(needle, haystack, position)", + "databricks": "LOCATE(needle, haystack, position)", + }, + ) + self.validate_all( + "STR_POSITION(haystack, needle, position)", + read={ + "bigquery": "INSTR(haystack, needle, position)", + "doris": "INSTR(haystack, needle, position)", + "oracle": "INSTR(haystack, needle, position)", + "teradata": "INSTR(haystack, needle, position)", + }, + ) + self.validate_all( + "STR_POSITION(haystack, needle, position)", + read={ + "databricks": "CHARINDEX(needle, haystack, position)", + "snowflake": "CHARINDEX(needle, haystack, position)", + "tsql": "CHARINDEX(needle, haystack, position)", + }, + ) + self.validate_all( + "STR_POSITION(haystack, needle, position)", + write={ + "athena": "IF(STRPOS(SUBSTRING(haystack, position), needle) = 0, 0, STRPOS(SUBSTRING(haystack, position), needle) + position - 1)", + "bigquery": "INSTR(haystack, needle, position)", + "clickhouse": "POSITION(haystack, needle, position)", + "databricks": "LOCATE(needle, haystack, position)", + "doris": "LOCATE(needle, haystack, position)", + "drill": "`IF`(STRPOS(SUBSTRING(haystack, position), needle) = 0, 0, STRPOS(SUBSTRING(haystack, position), needle) + position - 1)", + "duckdb": "CASE WHEN STRPOS(SUBSTRING(haystack, position), needle) = 0 THEN 0 ELSE STRPOS(SUBSTRING(haystack, position), needle) + position - 1 END", + "hive": "LOCATE(needle, haystack, position)", + "materialize": "CASE WHEN POSITION(needle IN SUBSTRING(haystack FROM position)) = 0 THEN 0 ELSE POSITION(needle IN SUBSTRING(haystack FROM position)) + position - 1 END", + "mysql": "LOCATE(needle, haystack, position)", + "oracle": "INSTR(haystack, needle, position)", + "postgres": "CASE WHEN POSITION(needle IN SUBSTRING(haystack FROM position)) = 0 THEN 0 ELSE POSITION(needle IN SUBSTRING(haystack FROM position)) + position - 1 END", + "presto": "IF(STRPOS(SUBSTRING(haystack, position), needle) = 0, 0, STRPOS(SUBSTRING(haystack, position), needle) + position - 1)", + "redshift": "CASE WHEN POSITION(needle IN SUBSTRING(haystack FROM position)) = 0 THEN 0 ELSE POSITION(needle IN SUBSTRING(haystack FROM position)) + position - 1 END", + "risingwave": "CASE WHEN POSITION(needle IN SUBSTRING(haystack FROM position)) = 0 THEN 0 ELSE POSITION(needle IN SUBSTRING(haystack FROM position)) + position - 1 END", + "snowflake": "CHARINDEX(needle, haystack, position)", + "spark": "LOCATE(needle, haystack, position)", + "spark2": "LOCATE(needle, haystack, position)", + "sqlite": "IIF(INSTR(SUBSTRING(haystack, position), needle) = 0, 0, INSTR(SUBSTRING(haystack, position), needle) + position - 1)", + "tableau": "IF FIND(SUBSTRING(haystack, position), needle) = 0 THEN 0 ELSE FIND(SUBSTRING(haystack, position), needle) + position - 1 END", + "teradata": "INSTR(haystack, needle, position)", + "trino": "IF(STRPOS(SUBSTRING(haystack, position), needle) = 0, 0, STRPOS(SUBSTRING(haystack, position), needle) + position - 1)", + "tsql": "CHARINDEX(needle, haystack, position)", + }, + ) + self.validate_all( + "STR_POSITION(haystack, needle, position, occurrence)", + read={ + "bigquery": "INSTR(haystack, needle, position, occurrence)", + "oracle": "INSTR(haystack, needle, position, occurrence)", + "teradata": "INSTR(haystack, needle, position, occurrence)", + }, write={ - "drill": "`IF`(STRPOS(SUBSTR(haystack, pos), needle) = 0, 0, STRPOS(SUBSTR(haystack, pos), needle) + pos - 1)", - "presto": "IF(STRPOS(SUBSTR(haystack, pos), needle) = 0, 0, STRPOS(SUBSTR(haystack, pos), needle) + pos - 1)", - "postgres": "CASE WHEN STRPOS(SUBSTR(haystack, pos), needle) = 0 THEN 0 ELSE STRPOS(SUBSTR(haystack, pos), needle) + pos - 1 END", - "spark": "LOCATE(needle, haystack, pos)", - "clickhouse": "position(haystack, needle, pos)", - "snowflake": "POSITION(needle, haystack, pos)", - "mysql": "LOCATE(needle, haystack, pos)", + "bigquery": "INSTR(haystack, needle, position, occurrence)", + "oracle": "INSTR(haystack, needle, position, occurrence)", + "presto": "IF(STRPOS(SUBSTRING(haystack, position), needle, occurrence) = 0, 0, STRPOS(SUBSTRING(haystack, position), needle, occurrence) + position - 1)", + "tableau": "IF FINDNTH(SUBSTRING(haystack, position), needle, occurrence) = 0 THEN 0 ELSE FINDNTH(SUBSTRING(haystack, position), needle, occurrence) + position - 1 END", + "teradata": "INSTR(haystack, needle, position, occurrence)", + "trino": "IF(STRPOS(SUBSTRING(haystack, position), needle, occurrence) = 0, 0, STRPOS(SUBSTRING(haystack, position), needle, occurrence) + position - 1)", }, ) self.validate_all( diff --git a/tests/dialects/test_duckdb.py b/tests/dialects/test_duckdb.py index 69a592d..959a1b3 100644 --- a/tests/dialects/test_duckdb.py +++ b/tests/dialects/test_duckdb.py @@ -903,6 +903,13 @@ class TestDuckDB(Validator): }, ) + self.validate_all( + "SELECT REGEXP_MATCHES('ThOmAs', 'thomas', 'i')", + read={ + "postgres": "SELECT 'ThOmAs' ~* 'thomas'", + }, + ) + def test_array_index(self): with self.assertLogs(helper_logger) as cm: self.validate_all( diff --git a/tests/dialects/test_hive.py b/tests/dialects/test_hive.py index e52ee72..edc208c 100644 --- a/tests/dialects/test_hive.py +++ b/tests/dialects/test_hive.py @@ -588,8 +588,8 @@ class TestHive(Validator): self.validate_all( "LOCATE('a', x, 3)", write={ - "duckdb": "CASE WHEN STRPOS(SUBSTR(x, 3), 'a') = 0 THEN 0 ELSE STRPOS(SUBSTR(x, 3), 'a') + 3 - 1 END", - "presto": "IF(STRPOS(SUBSTR(x, 3), 'a') = 0, 0, STRPOS(SUBSTR(x, 3), 'a') + 3 - 1)", + "duckdb": "CASE WHEN STRPOS(SUBSTRING(x, 3), 'a') = 0 THEN 0 ELSE STRPOS(SUBSTRING(x, 3), 'a') + 3 - 1 END", + "presto": "IF(STRPOS(SUBSTRING(x, 3), 'a') = 0, 0, STRPOS(SUBSTRING(x, 3), 'a') + 3 - 1)", "hive": "LOCATE('a', x, 3)", "spark": "LOCATE('a', x, 3)", }, @@ -740,6 +740,7 @@ class TestHive(Validator): "presto": "SET_AGG(x)", "snowflake": "ARRAY_UNIQUE_AGG(x)", "spark": "COLLECT_SET(x)", + "trino": "ARRAY_AGG(DISTINCT x)", }, ) self.validate_all( diff --git a/tests/dialects/test_oracle.py b/tests/dialects/test_oracle.py index 0abef50..ac748cd 100644 --- a/tests/dialects/test_oracle.py +++ b/tests/dialects/test_oracle.py @@ -394,6 +394,9 @@ JOIN departments self.validate_identity( "XMLTABLE('x' RETURNING SEQUENCE BY REF COLUMNS a VARCHAR2, b FLOAT)" ) + self.validate_identity( + "SELECT x.* FROM example t, XMLTABLE(XMLNAMESPACES(DEFAULT 'http://example.com/default', 'http://example.com/ns1' AS \"ns1\"), '/root/data' PASSING t.xml COLUMNS id NUMBER PATH '@id', value VARCHAR2(100) PATH 'ns1:value/text()') x" + ) self.validate_all( """SELECT warehouse_name warehouse, @@ -513,10 +516,10 @@ FROM JSON_TABLE(res, '$.info[*]' COLUMNS( LEVEL, SYS_CONNECT_BY_PATH(last_name, '/') AS "Path" FROM employees -START WITH last_name = 'King' -CONNECT BY PRIOR employee_id = manager_id AND LEVEL <= 4 WHERE - level <= 3 AND department_id = 80""" + level <= 3 AND department_id = 80 +START WITH last_name = 'King' +CONNECT BY PRIOR employee_id = manager_id AND LEVEL <= 4""" for query in (f"{body}{start}{connect}", f"{body}{connect}{start}"): self.validate_identity(query, pretty, pretty=True) diff --git a/tests/dialects/test_postgres.py b/tests/dialects/test_postgres.py index 6a52ea2..6714de2 100644 --- a/tests/dialects/test_postgres.py +++ b/tests/dialects/test_postgres.py @@ -76,7 +76,10 @@ class TestPostgres(Validator): self.validate_identity("SELECT CURRENT_USER") self.validate_identity("SELECT * FROM ONLY t1") self.validate_identity( - "SELECT id, name FROM XMLTABLE('/root/user' PASSING xml_data COLUMNS id INT PATH '@id', name TEXT PATH 'name/text()') AS t" + "SELECT id, name FROM xml_data AS t, XMLTABLE('/root/user' PASSING t.xml COLUMNS id INT PATH '@id', name TEXT PATH 'name/text()') AS x" + ) + self.validate_identity( + "SELECT id, value FROM xml_content AS t, XMLTABLE(XMLNAMESPACES('http://example.com/ns1' AS ns1, 'http://example.com/ns2' AS ns2), '/root/data' PASSING t.xml COLUMNS id INT PATH '@ns1:id', value TEXT PATH 'ns2:value/text()') AS x" ) self.validate_identity( "SELECT * FROM t WHERE some_column >= CURRENT_DATE + INTERVAL '1 day 1 hour' AND some_another_column IS TRUE" @@ -830,6 +833,10 @@ class TestPostgres(Validator): "/* + some comment */ SELECT b.foo, b.bar FROM baz AS b", ) + self.validate_identity( + "SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY a) FILTER(WHERE CAST(b AS BOOLEAN)) AS mean_value FROM (VALUES (0, 't')) AS fake_data(a, b)" + ) + def test_ddl(self): # Checks that user-defined types are parsed into DataType instead of Identifier self.parse_one("CREATE TABLE t (a udt)").this.expressions[0].args["kind"].assert_is( diff --git a/tests/dialects/test_presto.py b/tests/dialects/test_presto.py index fa5c8c1..c84f000 100644 --- a/tests/dialects/test_presto.py +++ b/tests/dialects/test_presto.py @@ -204,14 +204,14 @@ class TestPresto(Validator): }, ) self.validate_all( - "STRPOS('ABC', 'A', 3)", - read={ - "trino": "STRPOS('ABC', 'A', 3)", - }, - write={ - "presto": "STRPOS('ABC', 'A', 3)", - "trino": "STRPOS('ABC', 'A', 3)", - "snowflake": "POSITION('A', 'ABC')", + "STRPOS(haystack, needle, occurrence)", + write={ + "bigquery": "INSTR(haystack, needle, 1, occurrence)", + "oracle": "INSTR(haystack, needle, 1, occurrence)", + "presto": "STRPOS(haystack, needle, occurrence)", + "tableau": "FINDNTH(haystack, needle, occurrence)", + "trino": "STRPOS(haystack, needle, occurrence)", + "teradata": "INSTR(haystack, needle, 1, occurrence)", }, ) diff --git a/tests/dialects/test_snowflake.py b/tests/dialects/test_snowflake.py index 5c0cc01..cb38af7 100644 --- a/tests/dialects/test_snowflake.py +++ b/tests/dialects/test_snowflake.py @@ -36,7 +36,6 @@ class TestSnowflake(Validator): self.validate_identity("SELECT CAST(obj AS OBJECT(x CHAR) RENAME FIELDS)") self.validate_identity("SELECT CAST(obj AS OBJECT(x CHAR, y VARCHAR) ADD FIELDS)") self.validate_identity("SELECT TO_TIMESTAMP(123.4)").selects[0].assert_is(exp.Anonymous) - self.validate_identity("SELECT TO_TIME(x) FROM t") self.validate_identity("SELECT TO_TIMESTAMP(x) FROM t") self.validate_identity("SELECT TO_TIMESTAMP_NTZ(x) FROM t") self.validate_identity("SELECT TO_TIMESTAMP_LTZ(x) FROM t") @@ -106,6 +105,9 @@ class TestSnowflake(Validator): "SELECT * FROM DATA AS DATA_L ASOF JOIN DATA AS DATA_R MATCH_CONDITION (DATA_L.VAL > DATA_R.VAL) ON DATA_L.ID = DATA_R.ID" ) self.validate_identity( + """SELECT TO_TIMESTAMP('2025-01-16T14:45:30.123+0500', 'yyyy-mm-DD"T"hh24:mi:ss.ff3TZHTZM')""" + ) + self.validate_identity( "WITH t (SELECT 1 AS c) SELECT c FROM t", "WITH t AS (SELECT 1 AS c) SELECT c FROM t", ) @@ -295,6 +297,13 @@ class TestSnowflake(Validator): ) self.validate_all( + "SELECT TO_TIMESTAMP('2025-01-16 14:45:30.123', 'yyyy-mm-DD hh24:mi:ss.ff6')", + write={ + "": "SELECT STR_TO_TIME('2025-01-16 14:45:30.123', '%Y-%m-%d %H:%M:%S.%f')", + "snowflake": "SELECT TO_TIMESTAMP('2025-01-16 14:45:30.123', 'yyyy-mm-DD hh24:mi:ss.ff6')", + }, + ) + self.validate_all( "ARRAY_CONSTRUCT_COMPACT(1, null, 2)", write={ "spark": "ARRAY_COMPACT(ARRAY(1, NULL, 2))", @@ -721,13 +730,6 @@ class TestSnowflake(Validator): }, ) self.validate_all( - "SELECT TO_TIME('12:05:00')", - write={ - "bigquery": "SELECT CAST('12:05:00' AS TIME)", - "snowflake": "SELECT CAST('12:05:00' AS TIME)", - }, - ) - self.validate_all( "SELECT TO_TIMESTAMP('04/05/2013 01:02:03', 'mm/DD/yyyy hh24:mi:ss')", read={ "bigquery": "SELECT PARSE_TIMESTAMP('%m/%d/%Y %H:%M:%S', '04/05/2013 01:02:03')", @@ -1286,6 +1288,37 @@ class TestSnowflake(Validator): }, ) + self.validate_identity("SELECT TO_TIME(x) FROM t") + self.validate_all( + "SELECT TO_TIME('12:05:00')", + write={ + "bigquery": "SELECT CAST('12:05:00' AS TIME)", + "snowflake": "SELECT CAST('12:05:00' AS TIME)", + "duckdb": "SELECT CAST('12:05:00' AS TIME)", + }, + ) + self.validate_all( + "SELECT TO_TIME(CONVERT_TIMEZONE('UTC', 'US/Pacific', '2024-08-06 09:10:00.000')) AS pst_time", + write={ + "snowflake": "SELECT TO_TIME(CONVERT_TIMEZONE('UTC', 'US/Pacific', '2024-08-06 09:10:00.000')) AS pst_time", + "duckdb": "SELECT CAST(CAST('2024-08-06 09:10:00.000' AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE 'US/Pacific' AS TIME) AS pst_time", + }, + ) + self.validate_all( + "SELECT TO_TIME('11.15.00', 'hh24.mi.ss')", + write={ + "snowflake": "SELECT TO_TIME('11.15.00', 'hh24.mi.ss')", + "duckdb": "SELECT CAST(STRPTIME('11.15.00', '%H.%M.%S') AS TIME)", + }, + ) + self.validate_all( + "SELECT TRY_TO_TIME('11.15.00', 'hh24.mi.ss')", + write={ + "snowflake": "SELECT TRY_TO_TIME('11.15.00', 'hh24.mi.ss')", + "duckdb": "SELECT CAST(STRPTIME('11.15.00', '%H.%M.%S') AS TIME)", + }, + ) + def test_semi_structured_types(self): self.validate_identity("SELECT CAST(a AS VARIANT)") self.validate_identity("SELECT CAST(a AS ARRAY)") diff --git a/tests/dialects/test_sqlite.py b/tests/dialects/test_sqlite.py index 1a4f5c3..066828a 100644 --- a/tests/dialects/test_sqlite.py +++ b/tests/dialects/test_sqlite.py @@ -7,6 +7,7 @@ class TestSQLite(Validator): dialect = "sqlite" def test_sqlite(self): + self.validate_identity("UNHEX(a, b)") self.validate_identity("SELECT DATE()") self.validate_identity("SELECT DATE('now', 'start of month', '+1 month', '-1 day')") self.validate_identity("SELECT DATETIME(1092941466, 'unixepoch')") diff --git a/tests/dialects/test_trino.py b/tests/dialects/test_trino.py index 3b22f0d..44ff1ea 100644 --- a/tests/dialects/test_trino.py +++ b/tests/dialects/test_trino.py @@ -9,6 +9,13 @@ class TestTrino(Validator): self.validate_identity("JSON_QUERY(content, 'lax $.HY.*')") self.validate_identity("JSON_QUERY(content, 'strict $.HY.*' WITH UNCONDITIONAL WRAPPER)") self.validate_identity("JSON_QUERY(content, 'strict $.HY.*' WITHOUT CONDITIONAL WRAPPER)") + self.validate_identity("JSON_QUERY(description, 'strict $.comment' KEEP QUOTES)") + self.validate_identity( + "JSON_QUERY(description, 'strict $.comment' OMIT QUOTES ON SCALAR STRING)" + ) + self.validate_identity( + "JSON_QUERY(content, 'strict $.HY.*' WITH UNCONDITIONAL WRAPPER KEEP QUOTES)" + ) def test_listagg(self): self.validate_identity( diff --git a/tests/dialects/test_tsql.py b/tests/dialects/test_tsql.py index e98ac10..dc35610 100644 --- a/tests/dialects/test_tsql.py +++ b/tests/dialects/test_tsql.py @@ -184,7 +184,7 @@ class TestTSQL(Validator): "tsql": "CREATE TABLE #mytemptable (a INTEGER)", "snowflake": "CREATE TEMPORARY TABLE mytemptable (a INT)", "duckdb": "CREATE TEMPORARY TABLE mytemptable (a INT)", - "oracle": "CREATE GLOBAL TEMPORARY TABLE mytemptable (a NUMBER)", + "oracle": "CREATE GLOBAL TEMPORARY TABLE mytemptable (a INT)", "hive": "CREATE TEMPORARY TABLE mytemptable (a INT)", "spark2": "CREATE TEMPORARY TABLE mytemptable (a INT) USING PARQUET", "spark": "CREATE TEMPORARY TABLE mytemptable (a INT) USING PARQUET", @@ -436,6 +436,13 @@ class TestTSQL(Validator): "'a' + 'b'", ) + self.validate_identity( + "CREATE TABLE db.t1 (a INTEGER, b VARCHAR(50), CONSTRAINT c PRIMARY KEY (a DESC))", + ) + self.validate_identity( + "CREATE TABLE db.t1 (a INTEGER, b INTEGER, CONSTRAINT c PRIMARY KEY (a DESC, b))" + ) + def test_option(self): possible_options = [ "HASH GROUP", @@ -836,6 +843,7 @@ class TestTSQL(Validator): f"UNIQUE {clustered_keyword} ([internal_id] ASC))", ) + self.validate_identity("CREATE SCHEMA testSchema") self.validate_identity("CREATE VIEW t AS WITH cte AS (SELECT 1 AS c) SELECT c FROM cte") self.validate_identity( "ALTER TABLE tbl SET SYSTEM_VERSIONING=ON(HISTORY_TABLE=db.tbl, DATA_CONSISTENCY_CHECK=OFF, HISTORY_RETENTION_PERIOD=5 DAYS)" diff --git a/tests/fixtures/optimizer/qualify_columns.sql b/tests/fixtures/optimizer/qualify_columns.sql index 2be8838..b9a6fc1 100644 --- a/tests/fixtures/optimizer/qualify_columns.sql +++ b/tests/fixtures/optimizer/qualify_columns.sql @@ -227,7 +227,32 @@ START WITH title = 'President' CONNECT BY manager_ID = PRIOR employee_id ORDER BY employee_ID NULLS LAST; -WITH EMPLOYEES AS (SELECT T.TITLE AS TITLE, T.EMPLOYEE_ID AS EMPLOYEE_ID, T.MANAGER_ID AS MANAGER_ID FROM (VALUES ('President', 1, NULL), ('Vice President Engineering', 10, 1), ('Programmer', 100, 10), ('QA Engineer', 101, 10), ('Vice President HR', 20, 1), ('Health Insurance Analyst', 200, 20)) AS T(TITLE, EMPLOYEE_ID, MANAGER_ID)) SELECT EMPLOYEES.EMPLOYEE_ID AS EMPLOYEE_ID, EMPLOYEES.MANAGER_ID AS MANAGER_ID, EMPLOYEES.TITLE AS TITLE, EMPLOYEES.LEVEL AS LEVEL FROM EMPLOYEES AS EMPLOYEES START WITH EMPLOYEES.TITLE = 'President' CONNECT BY EMPLOYEES.MANAGER_ID = PRIOR EMPLOYEES.EMPLOYEE_ID ORDER BY EMPLOYEE_ID; +WITH EMPLOYEES AS (SELECT T.TITLE AS TITLE, T.EMPLOYEE_ID AS EMPLOYEE_ID, T.MANAGER_ID AS MANAGER_ID FROM (VALUES ('President', 1, NULL), ('Vice President Engineering', 10, 1), ('Programmer', 100, 10), ('QA Engineer', 101, 10), ('Vice President HR', 20, 1), ('Health Insurance Analyst', 200, 20)) AS T(TITLE, EMPLOYEE_ID, MANAGER_ID)) SELECT EMPLOYEES.EMPLOYEE_ID AS EMPLOYEE_ID, EMPLOYEES.MANAGER_ID AS MANAGER_ID, EMPLOYEES.TITLE AS TITLE, LEVEL AS LEVEL FROM EMPLOYEES AS EMPLOYEES START WITH EMPLOYEES.TITLE = 'President' CONNECT BY EMPLOYEES.MANAGER_ID = PRIOR EMPLOYEES.EMPLOYEE_ID ORDER BY EMPLOYEE_ID; + +# execute: false +# dialect: oracle +WITH +t1 AS ( + SELECT + 1 AS c1, + 1 AS c2, + 'Y' AS TOP_PARENT_INDICATOR, + 1 AS id + FROM DUAL +), +t2 AS ( + SELECT + 1 AS c2, + 2 AS id + FROM DUAL +) +SELECT t1.c1 +FROM t1 +LEFT JOIN t2 ON t1.c2 = t2.c2 +WHERE (t1.TOP_PARENT_INDICATOR = 'Y' OR LEVEL = 1) +START WITH (t1.id IS NOT NULL) +CONNECT BY PRIOR t1.id = t2.id; +WITH T1 AS (SELECT 1 AS C1, 1 AS C2, 'Y' AS TOP_PARENT_INDICATOR, 1 AS ID FROM DUAL DUAL), T2 AS (SELECT 1 AS C2, 2 AS ID FROM DUAL DUAL) SELECT T1.C1 AS C1 FROM T1 T1 LEFT JOIN T2 T2 ON T1.C2 = T2.C2 WHERE (T1.TOP_PARENT_INDICATOR = 'Y' OR LEVEL = 1) START WITH (NOT T1.ID IS NULL) CONNECT BY PRIOR T1.ID = T2.ID; -------------------------------------- -- Derived tables @@ -785,4 +810,25 @@ SELECT X.A AS FOO FROM X AS X GROUP BY X.A = 1; # dialect: snowflake # execute: false SELECT x.a AS foo FROM x WHERE foo = 1; -SELECT X.A AS FOO FROM X AS X WHERE X.A = 1;
\ No newline at end of file +SELECT X.A AS FOO FROM X AS X WHERE X.A = 1; + + +-------------------------------------- +-- SEMI / ANTI Joins +-------------------------------------- + +# title: SEMI JOIN table is excluded from the scope +SELECT * FROM x SEMI JOIN y USING (b); +SELECT x.a AS a, x.b AS b FROM x AS x SEMI JOIN y AS y ON x.b = y.b; + +# title: ANTI JOIN table is excluded from the scope +SELECT * FROM x ANTI JOIN y USING (b); +SELECT x.a AS a, x.b AS b FROM x AS x ANTI JOIN y AS y ON x.b = y.b; + +# title: SEMI + normal joins reinclude the table on scope +SELECT * FROM x SEMI JOIN y USING (b) JOIN y USING (b); +SELECT x.a AS a, COALESCE(x.b, y_2.b) AS b, y_2.c AS c FROM x AS x SEMI JOIN y AS y ON x.b = y.b JOIN y AS y_2 ON x.b = y_2.b; + +# title: ANTI + normal joins reinclude the table on scope +SELECT * FROM x ANTI JOIN y USING (b) JOIN y USING (b); +SELECT x.a AS a, COALESCE(x.b, y_2.b) AS b, y_2.c AS c FROM x AS x ANTI JOIN y AS y ON x.b = y.b JOIN y AS y_2 ON x.b = y_2.b; diff --git a/tests/fixtures/optimizer/qualify_columns__invalid.sql b/tests/fixtures/optimizer/qualify_columns__invalid.sql index 09a9ddc..a8e646c 100644 --- a/tests/fixtures/optimizer/qualify_columns__invalid.sql +++ b/tests/fixtures/optimizer/qualify_columns__invalid.sql @@ -12,3 +12,4 @@ SELECT a, SUM(b) FROM x GROUP BY 3; SELECT p FROM (SELECT x from xx) y CROSS JOIN yy CROSS JOIN zz SELECT a FROM (SELECT * FROM x CROSS JOIN y); SELECT x FROM tbl AS tbl(a); +SELECT a JOIN b USING (a); diff --git a/tests/fixtures/optimizer/simplify.sql b/tests/fixtures/optimizer/simplify.sql index 1842e55..0597c7b 100644 --- a/tests/fixtures/optimizer/simplify.sql +++ b/tests/fixtures/optimizer/simplify.sql @@ -363,6 +363,9 @@ x * (1 - y); ANY(t.value); ANY(t.value); +SELECT (ARRAY_AGG(foo))[1]; +SELECT (ARRAY_AGG(foo))[1]; + -------------------------------------- -- Literals -------------------------------------- diff --git a/tests/test_transpile.py b/tests/test_transpile.py index 5e50c3c..bb8d032 100644 --- a/tests/test_transpile.py +++ b/tests/test_transpile.py @@ -668,7 +668,7 @@ FROM tbl1""", ) self.validate( "WITH A(filter) AS (VALUES 1, 2, 3) SELECT * FROM A WHERE filter >= 2", - "WITH A(filter) AS (VALUES (1), (2), (3)) SELECT * FROM A WHERE filter >= 2", + "WITH A(filter) AS (SELECT * FROM (VALUES (1), (2), (3)) AS _values) SELECT * FROM A WHERE filter >= 2", read="presto", ) self.validate( |