summaryrefslogtreecommitdiffstats
path: root/tests/dialects
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--tests/dialects/test_athena.py211
-rw-r--r--tests/dialects/test_bigquery.py349
-rw-r--r--tests/dialects/test_clickhouse.py348
-rw-r--r--tests/dialects/test_databricks.py79
-rw-r--r--tests/dialects/test_dialect.py408
-rw-r--r--tests/dialects/test_doris.py28
-rw-r--r--tests/dialects/test_duckdb.py235
-rw-r--r--tests/dialects/test_hive.py35
-rw-r--r--tests/dialects/test_mysql.py125
-rw-r--r--tests/dialects/test_oracle.py227
-rw-r--r--tests/dialects/test_postgres.py179
-rw-r--r--tests/dialects/test_presto.py126
-rw-r--r--tests/dialects/test_prql.py186
-rw-r--r--tests/dialects/test_redshift.py71
-rw-r--r--tests/dialects/test_snowflake.py245
-rw-r--r--tests/dialects/test_spark.py81
-rw-r--r--tests/dialects/test_sqlite.py4
-rw-r--r--tests/dialects/test_starrocks.py87
-rw-r--r--tests/dialects/test_teradata.py23
-rw-r--r--tests/dialects/test_trino.py32
-rw-r--r--tests/dialects/test_tsql.py167
21 files changed, 2803 insertions, 443 deletions
diff --git a/tests/dialects/test_athena.py b/tests/dialects/test_athena.py
index 3288ada..ca91d4a 100644
--- a/tests/dialects/test_athena.py
+++ b/tests/dialects/test_athena.py
@@ -1,3 +1,4 @@
+from sqlglot import exp
from tests.dialects.test_dialect import Validator
@@ -7,6 +8,15 @@ class TestAthena(Validator):
def test_athena(self):
self.validate_identity(
+ "CREATE TABLE IF NOT EXISTS t (name STRING) LOCATION 's3://bucket/tmp/mytable/' TBLPROPERTIES ('table_type'='iceberg', 'FORMAT'='parquet')"
+ )
+ self.validate_identity(
+ "UNLOAD (SELECT name1, address1, comment1, key1 FROM table1) "
+ "TO 's3://amzn-s3-demo-bucket/ partitioned/' "
+ "WITH (format = 'TEXTFILE', partitioned_by = ARRAY['key1'])",
+ check_command_warning=True,
+ )
+ self.validate_identity(
"""USING EXTERNAL FUNCTION some_function(input VARBINARY)
RETURNS VARCHAR
LAMBDA 'some-name'
@@ -16,5 +26,204 @@ class TestAthena(Validator):
)
self.validate_identity(
- "CREATE TABLE IF NOT EXISTS t (name STRING) LOCATION 's3://bucket/tmp/mytable/' TBLPROPERTIES ('table_type'='iceberg', 'FORMAT'='parquet')"
+ "/* leading comment */CREATE SCHEMA foo",
+ write_sql="/* leading comment */ CREATE SCHEMA `foo`",
+ identify=True,
+ )
+ self.validate_identity(
+ "/* leading comment */SELECT * FROM foo",
+ write_sql='/* leading comment */ SELECT * FROM "foo"',
+ identify=True,
+ )
+
+ def test_ddl(self):
+ # Hive-like, https://docs.aws.amazon.com/athena/latest/ug/create-table.html
+ self.validate_identity("CREATE EXTERNAL TABLE foo (id INT) COMMENT 'test comment'")
+ self.validate_identity(
+ "CREATE EXTERNAL TABLE foo (id INT, val STRING) CLUSTERED BY (id, val) INTO 10 BUCKETS"
+ )
+ self.validate_identity(
+ "CREATE EXTERNAL TABLE foo (id INT, val STRING) STORED AS PARQUET LOCATION 's3://foo' TBLPROPERTIES ('has_encryped_data'='true', 'classification'='test')"
+ )
+ self.validate_identity(
+ "CREATE EXTERNAL TABLE IF NOT EXISTS foo (a INT, b STRING) ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe' WITH SERDEPROPERTIES ('case.insensitive'='FALSE') LOCATION 's3://table/path'"
+ )
+ self.validate_identity(
+ """CREATE EXTERNAL TABLE x (y INT) ROW FORMAT SERDE 'serde' ROW FORMAT DELIMITED FIELDS TERMINATED BY '1' WITH SERDEPROPERTIES ('input.regex'='')""",
+ )
+ self.validate_identity(
+ """CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY<DATE>) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')"""
+ )
+
+ # Iceberg, https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html
+ self.validate_identity(
+ "CREATE TABLE iceberg_table (`id` BIGINT, `data` STRING, category STRING) PARTITIONED BY (category, BUCKET(16, id)) LOCATION 's3://amzn-s3-demo-bucket/your-folder/' TBLPROPERTIES ('table_type'='ICEBERG', 'write_compression'='snappy')"
+ )
+
+ # CTAS goes to the Trino engine, where the table properties cant be encased in single quotes like they can for Hive
+ # ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties
+ self.validate_identity(
+ "CREATE TABLE foo WITH (table_type='ICEBERG', external_location='s3://foo/') AS SELECT * FROM a"
+ )
+ self.validate_identity(
+ "CREATE TABLE foo AS WITH foo AS (SELECT a, b FROM bar) SELECT * FROM foo"
+ )
+
+ # ALTER TABLE ADD COLUMN not supported, it needs to be generated as ALTER TABLE ADD COLUMNS
+ self.validate_identity(
+ "ALTER TABLE `foo`.`bar` ADD COLUMN `end_ts` BIGINT",
+ write_sql="ALTER TABLE `foo`.`bar` ADD COLUMNS (`end_ts` BIGINT)",
+ )
+
+ def test_dml(self):
+ self.validate_all(
+ "SELECT CAST(ds AS VARCHAR) AS ds FROM (VALUES ('2022-01-01')) AS t(ds)",
+ read={"": "SELECT CAST(ds AS STRING) AS ds FROM (VALUES ('2022-01-01')) AS t(ds)"},
+ write={
+ "hive": "SELECT CAST(ds AS STRING) AS ds FROM (VALUES ('2022-01-01')) AS t(ds)",
+ "trino": "SELECT CAST(ds AS VARCHAR) AS ds FROM (VALUES ('2022-01-01')) AS t(ds)",
+ "athena": "SELECT CAST(ds AS VARCHAR) AS ds FROM (VALUES ('2022-01-01')) AS t(ds)",
+ },
+ )
+
+ def test_ddl_quoting(self):
+ self.validate_identity("CREATE SCHEMA `foo`")
+ self.validate_identity("CREATE SCHEMA foo")
+
+ self.validate_identity("CREATE EXTERNAL TABLE `foo` (`id` INT) LOCATION 's3://foo/'")
+ self.validate_identity("CREATE EXTERNAL TABLE foo (id INT) LOCATION 's3://foo/'")
+ self.validate_identity(
+ "CREATE EXTERNAL TABLE foo (id INT) LOCATION 's3://foo/'",
+ write_sql="CREATE EXTERNAL TABLE `foo` (`id` INT) LOCATION 's3://foo/'",
+ identify=True,
+ )
+
+ self.validate_identity("CREATE TABLE foo AS SELECT * FROM a")
+ self.validate_identity('CREATE TABLE "foo" AS SELECT * FROM "a"')
+ self.validate_identity(
+ "CREATE TABLE `foo` AS SELECT * FROM `a`",
+ write_sql='CREATE TABLE "foo" AS SELECT * FROM "a"',
+ identify=True,
+ )
+
+ self.validate_identity("DROP TABLE `foo`")
+ self.validate_identity("DROP TABLE foo")
+ self.validate_identity("DROP TABLE foo", write_sql="DROP TABLE `foo`", identify=True)
+
+ self.validate_identity('CREATE VIEW "foo" AS SELECT "id" FROM "tbl"')
+ self.validate_identity("CREATE VIEW foo AS SELECT id FROM tbl")
+ self.validate_identity(
+ "CREATE VIEW foo AS SELECT id FROM tbl",
+ write_sql='CREATE VIEW "foo" AS SELECT "id" FROM "tbl"',
+ identify=True,
+ )
+
+ # As a side effect of being able to parse both quote types, we can also fix the quoting on incorrectly quoted source queries
+ self.validate_identity('CREATE SCHEMA "foo"', write_sql="CREATE SCHEMA `foo`")
+ self.validate_identity(
+ 'CREATE EXTERNAL TABLE "foo" ("id" INT) LOCATION \'s3://foo/\'',
+ write_sql="CREATE EXTERNAL TABLE `foo` (`id` INT) LOCATION 's3://foo/'",
+ )
+ self.validate_identity('DROP TABLE "foo"', write_sql="DROP TABLE `foo`")
+ self.validate_identity(
+ 'CREATE VIEW `foo` AS SELECT "id" FROM `tbl`',
+ write_sql='CREATE VIEW "foo" AS SELECT "id" FROM "tbl"',
+ )
+ self.validate_identity(
+ "DROP VIEW IF EXISTS `foo`.`bar`",
+ write_sql='DROP VIEW IF EXISTS "foo"."bar"',
+ )
+
+ self.validate_identity(
+ 'ALTER TABLE "foo" ADD COLUMNS ("id" STRING)',
+ write_sql="ALTER TABLE `foo` ADD COLUMNS (`id` STRING)",
+ )
+ self.validate_identity(
+ 'ALTER TABLE "foo" DROP COLUMN "id"', write_sql="ALTER TABLE `foo` DROP COLUMN `id`"
+ )
+
+ self.validate_identity(
+ 'CREATE TABLE "foo" AS WITH "foo" AS (SELECT "a", "b" FROM "bar") SELECT * FROM "foo"'
+ )
+ self.validate_identity(
+ 'CREATE TABLE `foo` AS WITH `foo` AS (SELECT "a", `b` FROM "bar") SELECT * FROM "foo"',
+ write_sql='CREATE TABLE "foo" AS WITH "foo" AS (SELECT "a", "b" FROM "bar") SELECT * FROM "foo"',
+ )
+
+ self.validate_identity("DESCRIBE foo.bar", write_sql="DESCRIBE `foo`.`bar`", identify=True)
+
+ def test_dml_quoting(self):
+ self.validate_identity("SELECT a AS foo FROM tbl")
+ self.validate_identity('SELECT "a" AS "foo" FROM "tbl"')
+ self.validate_identity(
+ 'SELECT `a` AS `foo` FROM "tbl"',
+ write_sql='SELECT "a" AS "foo" FROM "tbl"',
+ identify=True,
+ )
+
+ self.validate_identity("INSERT INTO foo (id) VALUES (1)")
+ self.validate_identity('INSERT INTO "foo" ("id") VALUES (1)')
+ self.validate_identity(
+ 'INSERT INTO `foo` ("id") VALUES (1)',
+ write_sql='INSERT INTO "foo" ("id") VALUES (1)',
+ identify=True,
+ )
+
+ self.validate_identity("UPDATE foo SET id = 3 WHERE id = 7")
+ self.validate_identity('UPDATE "foo" SET "id" = 3 WHERE "id" = 7')
+ self.validate_identity(
+ 'UPDATE `foo` SET "id" = 3 WHERE `id` = 7',
+ write_sql='UPDATE "foo" SET "id" = 3 WHERE "id" = 7',
+ identify=True,
+ )
+
+ self.validate_identity("DELETE FROM foo WHERE id > 10")
+ self.validate_identity('DELETE FROM "foo" WHERE "id" > 10')
+ self.validate_identity(
+ "DELETE FROM `foo` WHERE `id` > 10",
+ write_sql='DELETE FROM "foo" WHERE "id" > 10',
+ identify=True,
+ )
+
+ self.validate_identity("WITH foo AS (SELECT a, b FROM bar) SELECT * FROM foo")
+ self.validate_identity(
+ "WITH foo AS (SELECT a, b FROM bar) SELECT * FROM foo",
+ write_sql='WITH "foo" AS (SELECT "a", "b" FROM "bar") SELECT * FROM "foo"',
+ identify=True,
+ )
+
+ def test_ctas(self):
+ # Hive tables use 'external_location' to specify the table location, Iceberg tables use 'location' to specify the table location
+ # The 'table_type' property is used to determine if it's a Hive or an Iceberg table
+ # ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties
+ ctas_hive = exp.Create(
+ this=exp.to_table("foo.bar"),
+ kind="TABLE",
+ properties=exp.Properties(
+ expressions=[
+ exp.FileFormatProperty(this=exp.Literal.string("parquet")),
+ exp.LocationProperty(this=exp.Literal.string("s3://foo")),
+ ]
+ ),
+ expression=exp.select("1"),
+ )
+ self.assertEqual(
+ ctas_hive.sql(dialect=self.dialect, identify=True),
+ "CREATE TABLE \"foo\".\"bar\" WITH (format='parquet', external_location='s3://foo') AS SELECT 1",
+ )
+
+ ctas_iceberg = exp.Create(
+ this=exp.to_table("foo.bar"),
+ kind="TABLE",
+ properties=exp.Properties(
+ expressions=[
+ exp.Property(this=exp.var("table_type"), value=exp.Literal.string("iceberg")),
+ exp.LocationProperty(this=exp.Literal.string("s3://foo")),
+ ]
+ ),
+ expression=exp.select("1"),
+ )
+ self.assertEqual(
+ ctas_iceberg.sql(dialect=self.dialect, identify=True),
+ "CREATE TABLE \"foo\".\"bar\" WITH (table_type='iceberg', location='s3://foo') AS SELECT 1",
)
diff --git a/tests/dialects/test_bigquery.py b/tests/dialects/test_bigquery.py
index ae8ed16..e2adfea 100644
--- a/tests/dialects/test_bigquery.py
+++ b/tests/dialects/test_bigquery.py
@@ -1,4 +1,6 @@
from unittest import mock
+import datetime
+import pytz
from sqlglot import (
ErrorLevel,
@@ -20,6 +22,8 @@ class TestBigQuery(Validator):
maxDiff = None
def test_bigquery(self):
+ self.validate_identity("REGEXP_EXTRACT(x, '(?<)')")
+
self.validate_all(
"EXTRACT(HOUR FROM DATETIME(2008, 12, 25, 15, 30, 00))",
write={
@@ -103,8 +107,9 @@ LANGUAGE js AS
select_with_quoted_udf = self.validate_identity("SELECT `p.d.UdF`(data) FROM `p.d.t`")
self.assertEqual(select_with_quoted_udf.selects[0].name, "p.d.UdF")
+ self.validate_identity("SELECT * FROM READ_CSV('bla.csv')")
+ self.validate_identity("CAST(x AS STRUCT<list ARRAY<INT64>>)")
self.validate_identity("assert.true(1 = 1)")
- self.validate_identity("SELECT ARRAY_TO_STRING(list, '--') AS text")
self.validate_identity("SELECT jsondoc['some_key']")
self.validate_identity("SELECT `p.d.UdF`(data).* FROM `p.d.t`")
self.validate_identity("SELECT * FROM `my-project.my-dataset.my-table`")
@@ -137,7 +142,6 @@ LANGUAGE js AS
self.validate_identity("SELECT CAST(CURRENT_DATE AS STRING FORMAT 'DAY') AS current_day")
self.validate_identity("SAFE_CAST(encrypted_value AS STRING FORMAT 'BASE64')")
self.validate_identity("CAST(encrypted_value AS STRING FORMAT 'BASE64')")
- self.validate_identity("CAST(STRUCT<a INT64>(1) AS STRUCT<a INT64>)")
self.validate_identity("STRING_AGG(a)")
self.validate_identity("STRING_AGG(a, ' & ')")
self.validate_identity("STRING_AGG(DISTINCT a, ' & ')")
@@ -161,12 +165,9 @@ LANGUAGE js AS
self.validate_identity("x <> ''")
self.validate_identity("DATE_TRUNC(col, WEEK(MONDAY))")
self.validate_identity("SELECT b'abc'")
- self.validate_identity("""SELECT * FROM UNNEST(ARRAY<STRUCT<x INT64>>[])""")
self.validate_identity("SELECT AS STRUCT 1 AS a, 2 AS b")
self.validate_identity("SELECT DISTINCT AS STRUCT 1 AS a, 2 AS b")
self.validate_identity("SELECT AS VALUE STRUCT(1 AS a, 2 AS b)")
- self.validate_identity("SELECT STRUCT<ARRAY<STRING>>(['2023-01-17'])")
- self.validate_identity("SELECT STRUCT<STRING>((SELECT a FROM b.c LIMIT 1)).*")
self.validate_identity("SELECT * FROM q UNPIVOT(values FOR quarter IN (b, c))")
self.validate_identity("""CREATE TABLE x (a STRUCT<values ARRAY<INT64>>)""")
self.validate_identity("""CREATE TABLE x (a STRUCT<b STRING OPTIONS (description='b')>)""")
@@ -193,6 +194,9 @@ LANGUAGE js AS
self.validate_identity("CAST(x AS TIMESTAMPTZ)", "CAST(x AS TIMESTAMP)")
self.validate_identity("CAST(x AS RECORD)", "CAST(x AS STRUCT)")
self.validate_identity(
+ "MERGE INTO dataset.NewArrivals USING (SELECT * FROM UNNEST([('microwave', 10, 'warehouse #1'), ('dryer', 30, 'warehouse #1'), ('oven', 20, 'warehouse #2')])) ON FALSE WHEN NOT MATCHED THEN INSERT ROW WHEN NOT MATCHED BY SOURCE THEN DELETE"
+ )
+ self.validate_identity(
"SELECT * FROM `SOME_PROJECT_ID.SOME_DATASET_ID.INFORMATION_SCHEMA.SOME_VIEW`"
)
self.validate_identity(
@@ -292,8 +296,26 @@ LANGUAGE js AS
r"REGEXP_EXTRACT(svc_plugin_output, r'\\\((.*)')",
r"REGEXP_EXTRACT(svc_plugin_output, '\\\\\\((.*)')",
)
+ self.validate_identity(
+ "SELECT CAST(1 AS BYTEINT)",
+ "SELECT CAST(1 AS INT64)",
+ )
self.validate_all(
+ "SAFE_CAST(some_date AS DATE FORMAT 'DD MONTH YYYY')",
+ write={
+ "bigquery": "SAFE_CAST(some_date AS DATE FORMAT 'DD MONTH YYYY')",
+ "duckdb": "CAST(TRY_STRPTIME(some_date, '%d %B %Y') AS DATE)",
+ },
+ )
+ self.validate_all(
+ "SAFE_CAST(some_date AS DATE FORMAT 'YYYY-MM-DD') AS some_date",
+ write={
+ "bigquery": "SAFE_CAST(some_date AS DATE FORMAT 'YYYY-MM-DD') AS some_date",
+ "duckdb": "CAST(TRY_STRPTIME(some_date, '%Y-%m-%d') AS DATE) AS some_date",
+ },
+ )
+ self.validate_all(
"SELECT t.c1, h.c2, s.c3 FROM t1 AS t, UNNEST(t.t2) AS h, UNNEST(h.t3) AS s",
write={
"bigquery": "SELECT t.c1, h.c2, s.c3 FROM t1 AS t, UNNEST(t.t2) AS h, UNNEST(h.t3) AS s",
@@ -352,7 +374,7 @@ LANGUAGE js AS
},
write={
"bigquery": "SELECT SUM(x IGNORE NULLS) AS x",
- "duckdb": "SELECT SUM(x IGNORE NULLS) AS x",
+ "duckdb": "SELECT SUM(x) AS x",
"postgres": "SELECT SUM(x) IGNORE NULLS AS x",
"spark": "SELECT SUM(x) IGNORE NULLS AS x",
"snowflake": "SELECT SUM(x) IGNORE NULLS AS x",
@@ -387,7 +409,7 @@ LANGUAGE js AS
"SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a, b DESC LIMIT 10) AS x",
write={
"bigquery": "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a, b DESC LIMIT 10) AS x",
- "duckdb": "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a NULLS FIRST, b DESC LIMIT 10) AS x",
+ "duckdb": "SELECT ARRAY_AGG(DISTINCT x ORDER BY a NULLS FIRST, b DESC LIMIT 10) AS x",
"spark": "SELECT COLLECT_LIST(DISTINCT x ORDER BY a, b DESC LIMIT 10) IGNORE NULLS AS x",
},
)
@@ -395,7 +417,7 @@ LANGUAGE js AS
"SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a, b DESC LIMIT 1, 10) AS x",
write={
"bigquery": "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a, b DESC LIMIT 1, 10) AS x",
- "duckdb": "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a NULLS FIRST, b DESC LIMIT 1, 10) AS x",
+ "duckdb": "SELECT ARRAY_AGG(DISTINCT x ORDER BY a NULLS FIRST, b DESC LIMIT 1, 10) AS x",
"spark": "SELECT COLLECT_LIST(DISTINCT x ORDER BY a, b DESC LIMIT 1, 10) IGNORE NULLS AS x",
},
)
@@ -431,7 +453,7 @@ LANGUAGE js AS
write={
"bigquery": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE), MONTH)",
"duckdb": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))",
- "clickhouse": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))",
+ "clickhouse": "SELECT LAST_DAY(CAST('2008-11-25' AS Nullable(DATE)))",
"mysql": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))",
"oracle": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))",
"postgres": "SELECT CAST(DATE_TRUNC('MONTH', CAST('2008-11-25' AS DATE)) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)",
@@ -496,6 +518,20 @@ LANGUAGE js AS
},
)
self.validate_all(
+ "SELECT FORMAT_DATETIME('%Y%m%d %H:%M:%S', DATETIME '2023-12-25 15:30:00')",
+ write={
+ "bigquery": "SELECT FORMAT_DATETIME('%Y%m%d %H:%M:%S', CAST('2023-12-25 15:30:00' AS DATETIME))",
+ "duckdb": "SELECT STRFTIME(CAST('2023-12-25 15:30:00' AS TIMESTAMP), '%Y%m%d %H:%M:%S')",
+ },
+ )
+ self.validate_all(
+ "SELECT FORMAT_DATETIME('%x', '2023-12-25 15:30:00')",
+ write={
+ "bigquery": "SELECT FORMAT_DATETIME('%x', '2023-12-25 15:30:00')",
+ "duckdb": "SELECT STRFTIME(CAST('2023-12-25 15:30:00' AS TIMESTAMP), '%x')",
+ },
+ )
+ self.validate_all(
"SELECT COUNTIF(x)",
read={
"clickhouse": "SELECT countIf(x)",
@@ -599,8 +635,9 @@ LANGUAGE js AS
self.validate_all(
"SELECT DATETIME_ADD('2023-01-01T00:00:00', INTERVAL 1 MILLISECOND)",
write={
- "bigquery": "SELECT DATETIME_ADD('2023-01-01T00:00:00', INTERVAL 1 MILLISECOND)",
- "databricks": "SELECT TIMESTAMPADD(MILLISECOND, 1, '2023-01-01T00:00:00')",
+ "bigquery": "SELECT DATETIME_ADD('2023-01-01T00:00:00', INTERVAL '1' MILLISECOND)",
+ "databricks": "SELECT TIMESTAMPADD(MILLISECOND, '1', '2023-01-01T00:00:00')",
+ "duckdb": "SELECT CAST('2023-01-01T00:00:00' AS DATETIME) + INTERVAL '1' MILLISECOND",
},
),
)
@@ -608,8 +645,9 @@ LANGUAGE js AS
self.validate_all(
"SELECT DATETIME_SUB('2023-01-01T00:00:00', INTERVAL 1 MILLISECOND)",
write={
- "bigquery": "SELECT DATETIME_SUB('2023-01-01T00:00:00', INTERVAL 1 MILLISECOND)",
- "databricks": "SELECT TIMESTAMPADD(MILLISECOND, 1 * -1, '2023-01-01T00:00:00')",
+ "bigquery": "SELECT DATETIME_SUB('2023-01-01T00:00:00', INTERVAL '1' MILLISECOND)",
+ "databricks": "SELECT TIMESTAMPADD(MILLISECOND, '1' * -1, '2023-01-01T00:00:00')",
+ "duckdb": "SELECT CAST('2023-01-01T00:00:00' AS DATETIME) - INTERVAL '1' MILLISECOND",
},
),
)
@@ -619,6 +657,7 @@ LANGUAGE js AS
write={
"bigquery": "SELECT DATETIME_TRUNC('2023-01-01T01:01:01', HOUR)",
"databricks": "SELECT DATE_TRUNC('HOUR', '2023-01-01T01:01:01')",
+ "duckdb": "SELECT DATE_TRUNC('HOUR', CAST('2023-01-01T01:01:01' AS DATETIME))",
},
),
)
@@ -626,17 +665,24 @@ LANGUAGE js AS
self.validate_all(
'SELECT TIMESTAMP_ADD(TIMESTAMP "2008-12-25 15:30:00+00", INTERVAL 10 MINUTE)',
write={
- "bigquery": "SELECT TIMESTAMP_ADD(CAST('2008-12-25 15:30:00+00' AS TIMESTAMP), INTERVAL 10 MINUTE)",
- "databricks": "SELECT DATE_ADD(MINUTE, 10, CAST('2008-12-25 15:30:00+00' AS TIMESTAMP))",
- "mysql": "SELECT DATE_ADD(TIMESTAMP('2008-12-25 15:30:00+00'), INTERVAL 10 MINUTE)",
- "spark": "SELECT DATE_ADD(MINUTE, 10, CAST('2008-12-25 15:30:00+00' AS TIMESTAMP))",
+ "bigquery": "SELECT TIMESTAMP_ADD(CAST('2008-12-25 15:30:00+00' AS TIMESTAMP), INTERVAL '10' MINUTE)",
+ "databricks": "SELECT DATE_ADD(MINUTE, '10', CAST('2008-12-25 15:30:00+00' AS TIMESTAMP))",
+ "mysql": "SELECT DATE_ADD(TIMESTAMP('2008-12-25 15:30:00+00'), INTERVAL '10' MINUTE)",
+ "spark": "SELECT DATE_ADD(MINUTE, '10', CAST('2008-12-25 15:30:00+00' AS TIMESTAMP))",
},
)
self.validate_all(
'SELECT TIMESTAMP_SUB(TIMESTAMP "2008-12-25 15:30:00+00", INTERVAL 10 MINUTE)',
write={
- "bigquery": "SELECT TIMESTAMP_SUB(CAST('2008-12-25 15:30:00+00' AS TIMESTAMP), INTERVAL 10 MINUTE)",
- "mysql": "SELECT DATE_SUB(TIMESTAMP('2008-12-25 15:30:00+00'), INTERVAL 10 MINUTE)",
+ "bigquery": "SELECT TIMESTAMP_SUB(CAST('2008-12-25 15:30:00+00' AS TIMESTAMP), INTERVAL '10' MINUTE)",
+ "mysql": "SELECT DATE_SUB(TIMESTAMP('2008-12-25 15:30:00+00'), INTERVAL '10' MINUTE)",
+ },
+ )
+ self.validate_all(
+ "SELECT TIME_ADD(CAST('09:05:03' AS TIME), INTERVAL 2 HOUR)",
+ write={
+ "bigquery": "SELECT TIME_ADD(CAST('09:05:03' AS TIME), INTERVAL '2' HOUR)",
+ "duckdb": "SELECT CAST('09:05:03' AS TIME) + INTERVAL '2' HOUR",
},
)
self.validate_all(
@@ -779,6 +825,7 @@ LANGUAGE js AS
"presto": "SHA256(x)",
"redshift": "SHA2(x, 256)",
"trino": "SHA256(x)",
+ "duckdb": "SHA256(x)",
},
)
self.validate_all(
@@ -1005,7 +1052,7 @@ LANGUAGE js AS
write={
"bigquery": "SELECT * FROM UNNEST(['7', '14']) AS x",
"presto": "SELECT * FROM UNNEST(ARRAY['7', '14']) AS _t0(x)",
- "spark": "SELECT * FROM UNNEST(ARRAY('7', '14')) AS _t0(x)",
+ "spark": "SELECT * FROM EXPLODE(ARRAY('7', '14')) AS _t0(x)",
},
)
self.validate_all(
@@ -1192,29 +1239,28 @@ LANGUAGE js AS
"SELECT * FROM a WHERE b IN UNNEST([1, 2, 3])",
write={
"bigquery": "SELECT * FROM a WHERE b IN UNNEST([1, 2, 3])",
- "mysql": "SELECT * FROM a WHERE b IN (SELECT UNNEST(ARRAY(1, 2, 3)))",
"presto": "SELECT * FROM a WHERE b IN (SELECT UNNEST(ARRAY[1, 2, 3]))",
- "hive": "SELECT * FROM a WHERE b IN (SELECT UNNEST(ARRAY(1, 2, 3)))",
- "spark": "SELECT * FROM a WHERE b IN (SELECT UNNEST(ARRAY(1, 2, 3)))",
+ "hive": "SELECT * FROM a WHERE b IN (SELECT EXPLODE(ARRAY(1, 2, 3)))",
+ "spark": "SELECT * FROM a WHERE b IN (SELECT EXPLODE(ARRAY(1, 2, 3)))",
},
)
self.validate_all(
"DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY)",
write={
"postgres": "CURRENT_DATE - INTERVAL '1 DAY'",
- "bigquery": "DATE_SUB(CURRENT_DATE, INTERVAL 1 DAY)",
+ "bigquery": "DATE_SUB(CURRENT_DATE, INTERVAL '1' DAY)",
},
)
self.validate_all(
- "DATE_ADD(CURRENT_DATE(), INTERVAL 1 DAY)",
+ "DATE_ADD(CURRENT_DATE(), INTERVAL -1 DAY)",
write={
- "bigquery": "DATE_ADD(CURRENT_DATE, INTERVAL 1 DAY)",
- "duckdb": "CURRENT_DATE + INTERVAL 1 DAY",
- "mysql": "DATE_ADD(CURRENT_DATE, INTERVAL 1 DAY)",
- "postgres": "CURRENT_DATE + INTERVAL '1 DAY'",
- "presto": "DATE_ADD('DAY', 1, CURRENT_DATE)",
- "hive": "DATE_ADD(CURRENT_DATE, 1)",
- "spark": "DATE_ADD(CURRENT_DATE, 1)",
+ "bigquery": "DATE_ADD(CURRENT_DATE, INTERVAL '-1' DAY)",
+ "duckdb": "CURRENT_DATE + INTERVAL '-1' DAY",
+ "mysql": "DATE_ADD(CURRENT_DATE, INTERVAL '-1' DAY)",
+ "postgres": "CURRENT_DATE + INTERVAL '-1 DAY'",
+ "presto": "DATE_ADD('DAY', CAST('-1' AS BIGINT), CURRENT_DATE)",
+ "hive": "DATE_ADD(CURRENT_DATE, '-1')",
+ "spark": "DATE_ADD(CURRENT_DATE, '-1')",
},
)
self.validate_all(
@@ -1240,6 +1286,13 @@ LANGUAGE js AS
},
)
self.validate_all(
+ "DATE_DIFF('2021-01-01', '2020-01-01', DAY)",
+ write={
+ "bigquery": "DATE_DIFF('2021-01-01', '2020-01-01', DAY)",
+ "duckdb": "DATE_DIFF('DAY', CAST('2020-01-01' AS DATE), CAST('2021-01-01' AS DATE))",
+ },
+ )
+ self.validate_all(
"CURRENT_DATE('UTC')",
write={
"mysql": "CURRENT_DATE AT TIME ZONE 'UTC'",
@@ -1345,6 +1398,113 @@ WHERE
"bigquery": "SELECT CAST(x AS DATETIME)",
},
)
+ self.validate_all(
+ "SELECT TIME(foo, 'America/Los_Angeles')",
+ write={
+ "duckdb": "SELECT CAST(CAST(foo AS TIMESTAMPTZ) AT TIME ZONE 'America/Los_Angeles' AS TIME)",
+ "bigquery": "SELECT TIME(foo, 'America/Los_Angeles')",
+ },
+ )
+ self.validate_all(
+ "SELECT DATETIME('2020-01-01')",
+ write={
+ "duckdb": "SELECT CAST('2020-01-01' AS TIMESTAMP)",
+ "bigquery": "SELECT DATETIME('2020-01-01')",
+ },
+ )
+ self.validate_all(
+ "SELECT DATETIME('2020-01-01', TIME '23:59:59')",
+ write={
+ "duckdb": "SELECT CAST(CAST('2020-01-01' AS DATE) + CAST('23:59:59' AS TIME) AS TIMESTAMP)",
+ "bigquery": "SELECT DATETIME('2020-01-01', CAST('23:59:59' AS TIME))",
+ },
+ )
+ self.validate_all(
+ "SELECT DATETIME('2020-01-01', 'America/Los_Angeles')",
+ write={
+ "duckdb": "SELECT CAST(CAST('2020-01-01' AS TIMESTAMPTZ) AT TIME ZONE 'America/Los_Angeles' AS TIMESTAMP)",
+ "bigquery": "SELECT DATETIME('2020-01-01', 'America/Los_Angeles')",
+ },
+ )
+ self.validate_all(
+ "SELECT LENGTH(foo)",
+ read={
+ "bigquery": "SELECT LENGTH(foo)",
+ "snowflake": "SELECT LENGTH(foo)",
+ },
+ write={
+ "duckdb": "SELECT CASE TYPEOF(foo) WHEN 'VARCHAR' THEN LENGTH(CAST(foo AS TEXT)) WHEN 'BLOB' THEN OCTET_LENGTH(CAST(foo AS BLOB)) END",
+ "snowflake": "SELECT LENGTH(foo)",
+ "": "SELECT LENGTH(foo)",
+ },
+ )
+ self.validate_all(
+ "SELECT TIME_DIFF('12:00:00', '12:30:00', MINUTE)",
+ write={
+ "duckdb": "SELECT DATE_DIFF('MINUTE', CAST('12:30:00' AS TIME), CAST('12:00:00' AS TIME))",
+ "bigquery": "SELECT TIME_DIFF('12:00:00', '12:30:00', MINUTE)",
+ },
+ )
+ self.validate_all(
+ "ARRAY_CONCAT([1, 2], [3, 4], [5, 6])",
+ write={
+ "bigquery": "ARRAY_CONCAT([1, 2], [3, 4], [5, 6])",
+ "duckdb": "ARRAY_CONCAT([1, 2], ARRAY_CONCAT([3, 4], [5, 6]))",
+ "postgres": "ARRAY_CAT(ARRAY[1, 2], ARRAY_CAT(ARRAY[3, 4], ARRAY[5, 6]))",
+ "redshift": "ARRAY_CONCAT(ARRAY(1, 2), ARRAY_CONCAT(ARRAY(3, 4), ARRAY(5, 6)))",
+ "snowflake": "ARRAY_CAT([1, 2], ARRAY_CAT([3, 4], [5, 6]))",
+ "hive": "CONCAT(ARRAY(1, 2), ARRAY(3, 4), ARRAY(5, 6))",
+ "spark2": "CONCAT(ARRAY(1, 2), ARRAY(3, 4), ARRAY(5, 6))",
+ "spark": "CONCAT(ARRAY(1, 2), ARRAY(3, 4), ARRAY(5, 6))",
+ "databricks": "CONCAT(ARRAY(1, 2), ARRAY(3, 4), ARRAY(5, 6))",
+ "presto": "CONCAT(ARRAY[1, 2], ARRAY[3, 4], ARRAY[5, 6])",
+ "trino": "CONCAT(ARRAY[1, 2], ARRAY[3, 4], ARRAY[5, 6])",
+ },
+ )
+ self.validate_all(
+ "SELECT GENERATE_DATE_ARRAY('2016-10-05', '2016-10-08')",
+ write={
+ "duckdb": "SELECT CAST(GENERATE_SERIES(CAST('2016-10-05' AS DATE), CAST('2016-10-08' AS DATE), INTERVAL 1 DAY) AS DATE[])",
+ "bigquery": "SELECT GENERATE_DATE_ARRAY('2016-10-05', '2016-10-08', INTERVAL 1 DAY)",
+ },
+ )
+ self.validate_all(
+ "SELECT GENERATE_DATE_ARRAY('2016-10-05', '2016-10-08', INTERVAL '1' MONTH)",
+ write={
+ "duckdb": "SELECT CAST(GENERATE_SERIES(CAST('2016-10-05' AS DATE), CAST('2016-10-08' AS DATE), INTERVAL '1' MONTH) AS DATE[])",
+ "bigquery": "SELECT GENERATE_DATE_ARRAY('2016-10-05', '2016-10-08', INTERVAL '1' MONTH)",
+ },
+ )
+ self.validate_all(
+ "SELECT GENERATE_TIMESTAMP_ARRAY('2016-10-05 00:00:00', '2016-10-07 00:00:00', INTERVAL '1' DAY)",
+ write={
+ "duckdb": "SELECT GENERATE_SERIES(CAST('2016-10-05 00:00:00' AS TIMESTAMP), CAST('2016-10-07 00:00:00' AS TIMESTAMP), INTERVAL '1' DAY)",
+ "bigquery": "SELECT GENERATE_TIMESTAMP_ARRAY('2016-10-05 00:00:00', '2016-10-07 00:00:00', INTERVAL '1' DAY)",
+ },
+ )
+ self.validate_all(
+ "SELECT PARSE_DATE('%A %b %e %Y', 'Thursday Dec 25 2008')",
+ write={
+ "bigquery": "SELECT PARSE_DATE('%A %b %e %Y', 'Thursday Dec 25 2008')",
+ "duckdb": "SELECT CAST(STRPTIME('Thursday Dec 25 2008', '%A %b %-d %Y') AS DATE)",
+ },
+ )
+ self.validate_all(
+ "SELECT ARRAY_TO_STRING(['cake', 'pie', NULL], '--') AS text",
+ write={
+ "bigquery": "SELECT ARRAY_TO_STRING(['cake', 'pie', NULL], '--') AS text",
+ "duckdb": "SELECT ARRAY_TO_STRING(['cake', 'pie', NULL], '--') AS text",
+ },
+ )
+ self.validate_all(
+ "SELECT ARRAY_TO_STRING(['cake', 'pie', NULL], '--', 'MISSING') AS text",
+ write={
+ "bigquery": "SELECT ARRAY_TO_STRING(['cake', 'pie', NULL], '--', 'MISSING') AS text",
+ "duckdb": "SELECT ARRAY_TO_STRING(LIST_TRANSFORM(['cake', 'pie', NULL], x -> COALESCE(x, 'MISSING')), '--') AS text",
+ },
+ )
+
+ self.validate_identity("SELECT * FROM a-b c", "SELECT * FROM a-b AS c")
def test_errors(self):
with self.assertRaises(TokenError):
@@ -1372,6 +1532,12 @@ WHERE
transpile("DATE_ADD(x, day)", read="bigquery")
def test_warnings(self):
+ with self.assertLogs(parser_logger) as cm:
+ self.validate_identity(
+ "/* some comment */ DECLARE foo DATE DEFAULT DATE_SUB(current_date, INTERVAL 2 day)"
+ )
+ self.assertIn("contains unsupported syntax", cm.output[0])
+
with self.assertLogs(helper_logger) as cm:
self.validate_identity(
"WITH cte(c) AS (SELECT * FROM t) SELECT * FROM cte",
@@ -1552,7 +1718,7 @@ WHERE
"SELECT * FROM GAP_FILL(TABLE device_data, ts_column => 'time', bucket_width => INTERVAL '1' MINUTE, value_columns => [('signal', 'null')], origin => CAST('2023-11-01 09:30:01' AS DATETIME)) ORDER BY time"
)
self.validate_identity(
- "SELECT * FROM GAP_FILL(TABLE (SELECT * FROM UNNEST(ARRAY<STRUCT<device_id INT64, time DATETIME, signal INT64, state STRING>>[STRUCT(1, CAST('2023-11-01 09:34:01' AS DATETIME), 74, 'INACTIVE'), STRUCT(2, CAST('2023-11-01 09:36:00' AS DATETIME), 77, 'ACTIVE'), STRUCT(3, CAST('2023-11-01 09:37:00' AS DATETIME), 78, 'ACTIVE'), STRUCT(4, CAST('2023-11-01 09:38:01' AS DATETIME), 80, 'ACTIVE')])), ts_column => 'time', bucket_width => INTERVAL '1' MINUTE, value_columns => [('signal', 'linear')]) ORDER BY time"
+ "SELECT * FROM GAP_FILL(TABLE device_data, ts_column => 'time', bucket_width => INTERVAL '1' MINUTE, value_columns => [('signal', 'locf')]) ORDER BY time"
)
def test_models(self):
@@ -1702,3 +1868,120 @@ OPTIONS (
"MOD((a + 1), b)",
"MOD(a + 1, b)",
)
+
+ def test_inline_constructor(self):
+ self.validate_identity(
+ """SELECT STRUCT<ARRAY<STRING>>(["2023-01-17"])""",
+ """SELECT CAST(STRUCT(['2023-01-17']) AS STRUCT<ARRAY<STRING>>)""",
+ )
+ self.validate_identity(
+ """SELECT STRUCT<STRING>((SELECT 'foo')).*""",
+ """SELECT CAST(STRUCT((SELECT 'foo')) AS STRUCT<STRING>).*""",
+ )
+
+ self.validate_all(
+ "SELECT ARRAY<INT>[1, 2, 3]",
+ write={
+ "bigquery": "SELECT CAST([1, 2, 3] AS ARRAY<INT64>)",
+ "duckdb": "SELECT CAST([1, 2, 3] AS INT[])",
+ },
+ )
+ self.validate_all(
+ "CAST(STRUCT<a INT64>(1) AS STRUCT<a INT64>)",
+ write={
+ "bigquery": "CAST(CAST(STRUCT(1) AS STRUCT<a INT64>) AS STRUCT<a INT64>)",
+ "duckdb": "CAST(CAST(ROW(1) AS STRUCT(a BIGINT)) AS STRUCT(a BIGINT))",
+ },
+ )
+ self.validate_all(
+ "SELECT * FROM UNNEST(ARRAY<STRUCT<x INT64>>[])",
+ write={
+ "bigquery": "SELECT * FROM UNNEST(CAST([] AS ARRAY<STRUCT<x INT64>>))",
+ "duckdb": "SELECT * FROM (SELECT UNNEST(CAST([] AS STRUCT(x BIGINT)[]), max_depth => 2))",
+ },
+ )
+ self.validate_all(
+ "SELECT * FROM UNNEST(ARRAY<STRUCT<device_id INT64, time DATETIME, signal INT64, state STRING>>[STRUCT(1, DATETIME '2023-11-01 09:34:01', 74, 'INACTIVE'),STRUCT(4, DATETIME '2023-11-01 09:38:01', 80, 'ACTIVE')])",
+ write={
+ "bigquery": "SELECT * FROM UNNEST(CAST([STRUCT(1, CAST('2023-11-01 09:34:01' AS DATETIME), 74, 'INACTIVE'), STRUCT(4, CAST('2023-11-01 09:38:01' AS DATETIME), 80, 'ACTIVE')] AS ARRAY<STRUCT<device_id INT64, time DATETIME, signal INT64, state STRING>>))",
+ "duckdb": "SELECT * FROM (SELECT UNNEST(CAST([ROW(1, CAST('2023-11-01 09:34:01' AS TIMESTAMP), 74, 'INACTIVE'), ROW(4, CAST('2023-11-01 09:38:01' AS TIMESTAMP), 80, 'ACTIVE')] AS STRUCT(device_id BIGINT, time TIMESTAMP, signal BIGINT, state TEXT)[]), max_depth => 2))",
+ },
+ )
+ self.validate_all(
+ "SELECT STRUCT<a INT64, b STRUCT<c STRING>>(1, STRUCT('c_str'))",
+ write={
+ "bigquery": "SELECT CAST(STRUCT(1, STRUCT('c_str')) AS STRUCT<a INT64, b STRUCT<c STRING>>)",
+ "duckdb": "SELECT CAST(ROW(1, ROW('c_str')) AS STRUCT(a BIGINT, b STRUCT(c TEXT)))",
+ },
+ )
+
+ def test_convert(self):
+ for value, expected in [
+ (datetime.datetime(2023, 1, 1), "CAST('2023-01-01 00:00:00' AS DATETIME)"),
+ (datetime.datetime(2023, 1, 1, 12, 13, 14), "CAST('2023-01-01 12:13:14' AS DATETIME)"),
+ (
+ datetime.datetime(2023, 1, 1, 12, 13, 14, tzinfo=datetime.timezone.utc),
+ "CAST('2023-01-01 12:13:14+00:00' AS TIMESTAMP)",
+ ),
+ (
+ pytz.timezone("America/Los_Angeles").localize(
+ datetime.datetime(2023, 1, 1, 12, 13, 14)
+ ),
+ "CAST('2023-01-01 12:13:14-08:00' AS TIMESTAMP)",
+ ),
+ ]:
+ with self.subTest(value):
+ self.assertEqual(exp.convert(value).sql(dialect=self.dialect), expected)
+
+ def test_unnest(self):
+ self.validate_all(
+ "SELECT name, laps FROM UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps), STRUCT('Makhloufi' AS name, [24.5, 25.4, 26.6, 26.1] AS laps)])",
+ write={
+ "bigquery": "SELECT name, laps FROM UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps), STRUCT('Makhloufi' AS name, [24.5, 25.4, 26.6, 26.1] AS laps)])",
+ "duckdb": "SELECT name, laps FROM (SELECT UNNEST([{'name': 'Rudisha', 'laps': [23.4, 26.3, 26.4, 26.1]}, {'name': 'Makhloufi', 'laps': [24.5, 25.4, 26.6, 26.1]}], max_depth => 2))",
+ },
+ )
+ self.validate_all(
+ "WITH Races AS (SELECT '800M' AS race) SELECT race, name, laps FROM Races AS r CROSS JOIN UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)])",
+ write={
+ "bigquery": "WITH Races AS (SELECT '800M' AS race) SELECT race, name, laps FROM Races AS r CROSS JOIN UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)])",
+ "duckdb": "WITH Races AS (SELECT '800M' AS race) SELECT race, name, laps FROM Races AS r CROSS JOIN (SELECT UNNEST([{'name': 'Rudisha', 'laps': [23.4, 26.3, 26.4, 26.1]}], max_depth => 2))",
+ },
+ )
+ self.validate_all(
+ "SELECT participant FROM UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)]) AS participant",
+ write={
+ "bigquery": "SELECT participant FROM UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)]) AS participant",
+ "duckdb": "SELECT participant FROM (SELECT UNNEST([{'name': 'Rudisha', 'laps': [23.4, 26.3, 26.4, 26.1]}], max_depth => 2)) AS participant",
+ },
+ )
+ self.validate_all(
+ "WITH Races AS (SELECT '800M' AS race) SELECT race, participant FROM Races AS r CROSS JOIN UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)]) AS participant",
+ write={
+ "bigquery": "WITH Races AS (SELECT '800M' AS race) SELECT race, participant FROM Races AS r CROSS JOIN UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)]) AS participant",
+ "duckdb": "WITH Races AS (SELECT '800M' AS race) SELECT race, participant FROM Races AS r CROSS JOIN (SELECT UNNEST([{'name': 'Rudisha', 'laps': [23.4, 26.3, 26.4, 26.1]}], max_depth => 2)) AS participant",
+ },
+ )
+
+ def test_range_type(self):
+ for type, value in (
+ ("RANGE<DATE>", "'[2020-01-01, 2020-12-31)'"),
+ ("RANGE<DATE>", "'[UNBOUNDED, 2020-12-31)'"),
+ ("RANGE<DATETIME>", "'[2020-01-01 12:00:00, 2020-12-31 12:00:00)'"),
+ ("RANGE<TIMESTAMP>", "'[2020-10-01 12:00:00+08, 2020-12-31 12:00:00+08)'"),
+ ):
+ with self.subTest(f"Testing BigQuery's RANGE<T> type: {type} {value}"):
+ self.validate_identity(f"SELECT {type} {value}", f"SELECT CAST({value} AS {type})")
+
+ self.assertEqual(self.parse_one(type), exp.DataType.build(type, dialect="bigquery"))
+
+ self.validate_identity(
+ "SELECT RANGE(CAST('2022-12-01' AS DATE), CAST('2022-12-31' AS DATE))"
+ )
+ self.validate_identity("SELECT RANGE(NULL, CAST('2022-12-31' AS DATE))")
+ self.validate_identity(
+ "SELECT RANGE(CAST('2022-10-01 14:53:27' AS DATETIME), CAST('2022-10-01 16:00:00' AS DATETIME))"
+ )
+ self.validate_identity(
+ "SELECT RANGE(CAST('2022-10-01 14:53:27 America/Los_Angeles' AS TIMESTAMP), CAST('2022-10-01 16:00:00 America/Los_Angeles' AS TIMESTAMP))"
+ )
diff --git a/tests/dialects/test_clickhouse.py b/tests/dialects/test_clickhouse.py
index 72634a8..b4fc587 100644
--- a/tests/dialects/test_clickhouse.py
+++ b/tests/dialects/test_clickhouse.py
@@ -1,4 +1,7 @@
+from datetime import date
from sqlglot import exp, parse_one
+from sqlglot.dialects import ClickHouse
+from sqlglot.expressions import convert
from tests.dialects.test_dialect import Validator
from sqlglot.errors import ErrorLevel
@@ -7,41 +10,33 @@ class TestClickhouse(Validator):
dialect = "clickhouse"
def test_clickhouse(self):
- self.validate_all(
- "SELECT * FROM x PREWHERE y = 1 WHERE z = 2",
- write={
- "": "SELECT * FROM x WHERE z = 2",
- "clickhouse": "SELECT * FROM x PREWHERE y = 1 WHERE z = 2",
- },
- )
- self.validate_all(
- "SELECT * FROM x AS prewhere",
- read={
- "clickhouse": "SELECT * FROM x AS prewhere",
- "duckdb": "SELECT * FROM x prewhere",
- },
- )
-
- self.validate_identity("SELECT * FROM x LIMIT 1 UNION ALL SELECT * FROM y")
+ for string_type_enum in ClickHouse.Generator.STRING_TYPE_MAPPING:
+ self.validate_identity(f"CAST(x AS {string_type_enum.value})", "CAST(x AS String)")
- string_types = [
- "BLOB",
- "LONGBLOB",
- "LONGTEXT",
- "MEDIUMBLOB",
- "MEDIUMTEXT",
- "TINYBLOB",
- "TINYTEXT",
- "VARCHAR(255)",
- ]
+ # Arrays, maps and tuples can't be Nullable in ClickHouse
+ for non_nullable_type in ("ARRAY<INT>", "MAP<INT, INT>", "STRUCT(a: INT)"):
+ try_cast = parse_one(f"TRY_CAST(x AS {non_nullable_type})")
+ target_type = try_cast.to.sql("clickhouse")
+ self.assertEqual(try_cast.sql("clickhouse"), f"CAST(x AS {target_type})")
- for string_type in string_types:
- self.validate_identity(f"CAST(x AS {string_type})", "CAST(x AS String)")
+ for nullable_type in ("INT", "UINT", "BIGINT", "FLOAT", "DOUBLE", "TEXT", "DATE", "UUID"):
+ try_cast = parse_one(f"TRY_CAST(x AS {nullable_type})")
+ target_type = exp.DataType.build(nullable_type, dialect="clickhouse").sql("clickhouse")
+ self.assertEqual(try_cast.sql("clickhouse"), f"CAST(x AS Nullable({target_type}))")
expr = parse_one("count(x)")
self.assertEqual(expr.sql(dialect="clickhouse"), "COUNT(x)")
self.assertIsNone(expr._meta)
+ self.validate_identity("SELECT toString(CHAR(104.1, 101, 108.9, 108.9, 111, 32))")
+ self.validate_identity("@macro").assert_is(exp.Parameter).this.assert_is(exp.Var)
+ self.validate_identity("SELECT toFloat(like)")
+ self.validate_identity("SELECT like")
+ self.validate_identity("SELECT STR_TO_DATE(str, fmt, tz)")
+ self.validate_identity("SELECT STR_TO_DATE('05 12 2000', '%d %m %Y')")
+ self.validate_identity("SELECT EXTRACT(YEAR FROM toDateTime('2023-02-01'))")
+ self.validate_identity("extract(haystack, pattern)")
+ self.validate_identity("SELECT * FROM x LIMIT 1 UNION ALL SELECT * FROM y")
self.validate_identity("SELECT CAST(x AS Tuple(String, Array(Nullable(Float64))))")
self.validate_identity("countIf(x, y)")
self.validate_identity("x = y")
@@ -49,7 +44,7 @@ class TestClickhouse(Validator):
self.validate_identity("SELECT * FROM (SELECT a FROM b SAMPLE 0.01)")
self.validate_identity("SELECT * FROM (SELECT a FROM b SAMPLE 1 / 10 OFFSET 1 / 2)")
self.validate_identity("SELECT sum(foo * bar) FROM bla SAMPLE 10000000")
- self.validate_identity("CAST(x AS Nested(ID UInt32, Serial UInt32, EventTime DATETIME))")
+ self.validate_identity("CAST(x AS Nested(ID UInt32, Serial UInt32, EventTime DateTime))")
self.validate_identity("CAST(x AS Enum('hello' = 1, 'world' = 2))")
self.validate_identity("CAST(x AS Enum('hello', 'world'))")
self.validate_identity("CAST(x AS Enum('hello' = 1, 'world'))")
@@ -88,23 +83,21 @@ class TestClickhouse(Validator):
self.validate_identity("SELECT * FROM foo WHERE x GLOBAL IN (SELECT * FROM bar)")
self.validate_identity("position(haystack, needle)")
self.validate_identity("position(haystack, needle, position)")
- self.validate_identity("CAST(x AS DATETIME)")
+ self.validate_identity("CAST(x AS DATETIME)", "CAST(x AS DateTime)")
+ self.validate_identity("CAST(x AS TIMESTAMPTZ)", "CAST(x AS DateTime)")
self.validate_identity("CAST(x as MEDIUMINT)", "CAST(x AS Int32)")
self.validate_identity("SELECT arrayJoin([1, 2, 3] AS src) AS dst, 'Hello', src")
self.validate_identity("""SELECT JSONExtractString('{"x": {"y": 1}}', 'x', 'y')""")
self.validate_identity("SELECT * FROM table LIMIT 1 BY a, b")
self.validate_identity("SELECT * FROM table LIMIT 2 OFFSET 1 BY a, b")
+ self.validate_identity("TRUNCATE TABLE t1 ON CLUSTER test_cluster")
+ self.validate_identity("TRUNCATE DATABASE db")
+ self.validate_identity("TRUNCATE DATABASE db ON CLUSTER test_cluster")
self.validate_identity(
- "SELECT id, quantileGK(100, 0.95)(reading) OVER (PARTITION BY id ORDER BY id RANGE BETWEEN 30000 PRECEDING AND CURRENT ROW) AS window FROM table"
- )
-
- self.validate_identity(
- "SELECT $1$foo$1$",
- "SELECT 'foo'",
+ "SELECT number, COUNT() OVER (PARTITION BY number % 3) AS partition_count FROM numbers(10) WINDOW window_name AS (PARTITION BY number) QUALIFY partition_count = 4 ORDER BY number"
)
self.validate_identity(
- "SELECT * FROM table LIMIT 1, 2 BY a, b",
- "SELECT * FROM table LIMIT 2 OFFSET 1 BY a, b",
+ "SELECT id, quantileGK(100, 0.95)(reading) OVER (PARTITION BY id ORDER BY id RANGE BETWEEN 30000 PRECEDING AND CURRENT ROW) AS window FROM table"
)
self.validate_identity(
"SELECT * FROM table LIMIT 1 BY CONCAT(datalayerVariantNo, datalayerProductId, warehouse)"
@@ -134,10 +127,6 @@ class TestClickhouse(Validator):
"SELECT sum(1) AS impressions, (arrayJoin(arrayZip(cities, browsers)) AS t).1 AS city, t.2 AS browser FROM (SELECT ['Istanbul', 'Berlin', 'Bobruisk'] AS cities, ['Firefox', 'Chrome', 'Chrome'] AS browsers) GROUP BY 2, 3"
)
self.validate_identity(
- "SELECT SUM(1) AS impressions FROM (SELECT ['Istanbul', 'Berlin', 'Bobruisk'] AS cities) WHERE arrayJoin(cities) IN ['Istanbul', 'Berlin']",
- "SELECT SUM(1) AS impressions FROM (SELECT ['Istanbul', 'Berlin', 'Bobruisk'] AS cities) WHERE arrayJoin(cities) IN ('Istanbul', 'Berlin')",
- )
- self.validate_identity(
'SELECT CAST(tuple(1 AS "a", 2 AS "b", 3.0 AS "c").2 AS Nullable(String))'
)
self.validate_identity(
@@ -150,17 +139,80 @@ class TestClickhouse(Validator):
"CREATE MATERIALIZED VIEW test_view ON CLUSTER cl1 (id UInt8) ENGINE=AggregatingMergeTree() ORDER BY tuple() AS SELECT * FROM test_data"
)
self.validate_identity(
- "CREATE MATERIALIZED VIEW test_view ON CLUSTER cl1 (id UInt8) TO table1 AS SELECT * FROM test_data"
+ "CREATE MATERIALIZED VIEW test_view ON CLUSTER cl1 TO table1 AS SELECT * FROM test_data"
)
self.validate_identity(
- "CREATE MATERIALIZED VIEW test_view (id UInt8) TO db.table1 AS SELECT * FROM test_data"
+ "CREATE MATERIALIZED VIEW test_view TO db.table1 (id UInt8) AS SELECT * FROM test_data"
)
- self.validate_identity("TRUNCATE TABLE t1 ON CLUSTER test_cluster")
- self.validate_identity("TRUNCATE DATABASE db")
- self.validate_identity("TRUNCATE DATABASE db ON CLUSTER test_cluster")
self.validate_identity(
"CREATE TABLE t (foo String CODEC(LZ4HC(9), ZSTD, DELTA), size String ALIAS formatReadableSize(size_bytes), INDEX idx1 a TYPE bloom_filter(0.001) GRANULARITY 1, INDEX idx2 a TYPE set(100) GRANULARITY 2, INDEX idx3 a TYPE minmax GRANULARITY 3)"
)
+ self.validate_identity(
+ "SELECT $1$foo$1$",
+ "SELECT 'foo'",
+ )
+ self.validate_identity(
+ "SELECT * FROM table LIMIT 1, 2 BY a, b",
+ "SELECT * FROM table LIMIT 2 OFFSET 1 BY a, b",
+ )
+ self.validate_identity(
+ "SELECT SUM(1) AS impressions FROM (SELECT ['Istanbul', 'Berlin', 'Bobruisk'] AS cities) WHERE arrayJoin(cities) IN ['Istanbul', 'Berlin']",
+ "SELECT SUM(1) AS impressions FROM (SELECT ['Istanbul', 'Berlin', 'Bobruisk'] AS cities) WHERE arrayJoin(cities) IN ('Istanbul', 'Berlin')",
+ )
+
+ self.validate_all(
+ "CHAR(67) || CHAR(65) || CHAR(84)",
+ read={
+ "clickhouse": "CHAR(67) || CHAR(65) || CHAR(84)",
+ "oracle": "CHR(67) || CHR(65) || CHR(84)",
+ },
+ )
+ self.validate_all(
+ "SELECT lagInFrame(salary, 1, 0) OVER (ORDER BY hire_date) AS prev_sal FROM employees",
+ read={
+ "clickhouse": "SELECT lagInFrame(salary, 1, 0) OVER (ORDER BY hire_date) AS prev_sal FROM employees",
+ "oracle": "SELECT LAG(salary, 1, 0) OVER (ORDER BY hire_date) AS prev_sal FROM employees",
+ },
+ )
+ self.validate_all(
+ "SELECT leadInFrame(salary, 1, 0) OVER (ORDER BY hire_date) AS prev_sal FROM employees",
+ read={
+ "clickhouse": "SELECT leadInFrame(salary, 1, 0) OVER (ORDER BY hire_date) AS prev_sal FROM employees",
+ "oracle": "SELECT LEAD(salary, 1, 0) OVER (ORDER BY hire_date) AS prev_sal FROM employees",
+ },
+ )
+ self.validate_all(
+ "SELECT CAST(STR_TO_DATE('05 12 2000', '%d %m %Y') AS DATE)",
+ read={
+ "clickhouse": "SELECT CAST(STR_TO_DATE('05 12 2000', '%d %m %Y') AS DATE)",
+ "postgres": "SELECT TO_DATE('05 12 2000', 'DD MM YYYY')",
+ },
+ write={
+ "clickhouse": "SELECT CAST(STR_TO_DATE('05 12 2000', '%d %m %Y') AS DATE)",
+ "postgres": "SELECT CAST(CAST(TO_DATE('05 12 2000', 'DD MM YYYY') AS TIMESTAMP) AS DATE)",
+ },
+ )
+ self.validate_all(
+ "SELECT * FROM x PREWHERE y = 1 WHERE z = 2",
+ write={
+ "": "SELECT * FROM x WHERE z = 2",
+ "clickhouse": "SELECT * FROM x PREWHERE y = 1 WHERE z = 2",
+ },
+ )
+ self.validate_all(
+ "SELECT * FROM x AS prewhere",
+ read={
+ "clickhouse": "SELECT * FROM x AS prewhere",
+ "duckdb": "SELECT * FROM x prewhere",
+ },
+ )
+ self.validate_all(
+ "SELECT a, b FROM (SELECT * FROM x) AS t",
+ read={
+ "clickhouse": "SELECT a, b FROM (SELECT * FROM x) AS t",
+ "duckdb": "SELECT a, b FROM (SELECT * FROM x) AS t(a, b)",
+ },
+ )
self.validate_all(
"SELECT arrayJoin([1,2,3])",
write={
@@ -181,11 +233,16 @@ class TestClickhouse(Validator):
},
)
self.validate_all(
- "SELECT CAST('2020-01-01' AS TIMESTAMP) + INTERVAL '500' MICROSECOND",
+ "SELECT CAST('2020-01-01' AS Nullable(DateTime)) + INTERVAL '500' MICROSECOND",
read={
"duckdb": "SELECT TIMESTAMP '2020-01-01' + INTERVAL '500 us'",
"postgres": "SELECT TIMESTAMP '2020-01-01' + INTERVAL '500 us'",
},
+ write={
+ "clickhouse": "SELECT CAST('2020-01-01' AS Nullable(DateTime)) + INTERVAL '500' MICROSECOND",
+ "duckdb": "SELECT CAST('2020-01-01' AS DATETIME) + INTERVAL '500' MICROSECOND",
+ "postgres": "SELECT CAST('2020-01-01' AS TIMESTAMP) + INTERVAL '500 MICROSECOND'",
+ },
)
self.validate_all(
"SELECT CURRENT_DATE()",
@@ -413,15 +470,15 @@ class TestClickhouse(Validator):
self.validate_identity("SELECT FORMAT")
self.validate_identity("1 AS FORMAT").assert_is(exp.Alias)
- self.validate_identity("SELECT DATE_FORMAT(NOW(), '%Y-%m-%d', '%T')")
+ self.validate_identity("SELECT formatDateTime(NOW(), '%Y-%m-%d', '%T')")
self.validate_all(
- "SELECT DATE_FORMAT(NOW(), '%Y-%m-%d')",
+ "SELECT formatDateTime(NOW(), '%Y-%m-%d')",
read={
"clickhouse": "SELECT formatDateTime(NOW(), '%Y-%m-%d')",
"mysql": "SELECT DATE_FORMAT(NOW(), '%Y-%m-%d')",
},
write={
- "clickhouse": "SELECT DATE_FORMAT(NOW(), '%Y-%m-%d')",
+ "clickhouse": "SELECT formatDateTime(NOW(), '%Y-%m-%d')",
"mysql": "SELECT DATE_FORMAT(NOW(), '%Y-%m-%d')",
},
)
@@ -440,6 +497,66 @@ class TestClickhouse(Validator):
)
self.validate_identity("ALTER TABLE visits REPLACE PARTITION ID '201901' FROM visits_tmp")
self.validate_identity("ALTER TABLE visits ON CLUSTER test_cluster DROP COLUMN col1")
+ self.validate_identity("DELETE FROM tbl ON CLUSTER test_cluster WHERE date = '2019-01-01'")
+
+ self.assertIsInstance(
+ parse_one("Tuple(select Int64)", into=exp.DataType, read="clickhouse"), exp.DataType
+ )
+
+ self.validate_identity("INSERT INTO t (col1, col2) VALUES ('abcd', 1234)")
+ self.validate_all(
+ "INSERT INTO t (col1, col2) VALUES ('abcd', 1234)",
+ read={
+ # looks like values table function, but should be parsed as VALUES block
+ "clickhouse": "INSERT INTO t (col1, col2) values('abcd', 1234)"
+ },
+ write={
+ "clickhouse": "INSERT INTO t (col1, col2) VALUES ('abcd', 1234)",
+ "postgres": "INSERT INTO t (col1, col2) VALUES ('abcd', 1234)",
+ },
+ )
+ self.validate_identity("SELECT TRIM(TRAILING ')' FROM '( Hello, world! )')")
+ self.validate_identity("SELECT TRIM(LEADING '(' FROM '( Hello, world! )')")
+ self.validate_identity("current_timestamp").assert_is(exp.Column)
+
+ self.validate_identity("SELECT * APPLY(sum) FROM columns_transformers")
+ self.validate_identity("SELECT COLUMNS('[jk]') APPLY(toString) FROM columns_transformers")
+ self.validate_identity(
+ "SELECT COLUMNS('[jk]') APPLY(toString) APPLY(length) APPLY(max) FROM columns_transformers"
+ )
+ self.validate_identity("SELECT * APPLY(sum), COLUMNS('col') APPLY(sum) APPLY(avg) FROM t")
+ self.validate_identity(
+ "SELECT * FROM ABC WHERE hasAny(COLUMNS('.*field') APPLY(toUInt64) APPLY(to), (SELECT groupUniqArray(toUInt64(field))))"
+ )
+ self.validate_identity("SELECT col apply", "SELECT col AS apply")
+
+ def test_clickhouse_values(self):
+ values = exp.select("*").from_(
+ exp.values([exp.tuple_(1, 2, 3)], alias="subq", columns=["a", "b", "c"])
+ )
+ self.assertEqual(
+ values.sql("clickhouse"),
+ "SELECT * FROM (SELECT 1 AS a, 2 AS b, 3 AS c) AS subq",
+ )
+
+ self.validate_identity("INSERT INTO t (col1, col2) VALUES ('abcd', 1234)")
+ self.validate_identity(
+ "INSERT INTO t (col1, col2) FORMAT Values('abcd', 1234)",
+ "INSERT INTO t (col1, col2) VALUES ('abcd', 1234)",
+ )
+
+ self.validate_all(
+ "SELECT col FROM (SELECT 1 AS col) AS _t",
+ read={
+ "duckdb": "SELECT col FROM (VALUES (1)) AS _t(col)",
+ },
+ )
+ self.validate_all(
+ "SELECT col1, col2 FROM (SELECT 1 AS col1, 2 AS col2 UNION ALL SELECT 3, 4) AS _t",
+ read={
+ "duckdb": "SELECT col1, col2 FROM (VALUES (1, 2), (3, 4)) AS _t(col1, col2)",
+ },
+ )
def test_cte(self):
self.validate_identity("WITH 'x' AS foo SELECT foo")
@@ -501,7 +618,7 @@ class TestClickhouse(Validator):
self.validate_all(
"SELECT {abc: UInt32}, {b: String}, {c: DateTime},{d: Map(String, Array(UInt8))}, {e: Tuple(UInt8, String)}",
write={
- "clickhouse": "SELECT {abc: UInt32}, {b: String}, {c: DATETIME}, {d: Map(String, Array(UInt8))}, {e: Tuple(UInt8, String)}",
+ "clickhouse": "SELECT {abc: UInt32}, {b: String}, {c: DateTime}, {d: Map(String, Array(UInt8))}, {e: Tuple(UInt8, String)}",
"": "SELECT :abc, :b, :c, :d, :e",
},
)
@@ -532,14 +649,87 @@ class TestClickhouse(Validator):
)
def test_ddl(self):
+ db_table_expr = exp.Table(this=None, db=exp.to_identifier("foo"), catalog=None)
+ create_with_cluster = exp.Create(
+ this=db_table_expr,
+ kind="DATABASE",
+ properties=exp.Properties(expressions=[exp.OnCluster(this=exp.to_identifier("c"))]),
+ )
+ self.assertEqual(create_with_cluster.sql("clickhouse"), "CREATE DATABASE foo ON CLUSTER c")
+
+ # Transpiled CREATE SCHEMA may have OnCluster property set
+ create_with_cluster = exp.Create(
+ this=db_table_expr,
+ kind="SCHEMA",
+ properties=exp.Properties(expressions=[exp.OnCluster(this=exp.to_identifier("c"))]),
+ )
+ self.assertEqual(create_with_cluster.sql("clickhouse"), "CREATE DATABASE foo ON CLUSTER c")
+
+ ctas_with_comment = exp.Create(
+ this=exp.table_("foo"),
+ kind="TABLE",
+ expression=exp.select("*").from_("db.other_table"),
+ properties=exp.Properties(
+ expressions=[
+ exp.EngineProperty(this=exp.var("Memory")),
+ exp.SchemaCommentProperty(this=exp.Literal.string("foo")),
+ ],
+ ),
+ )
+ self.assertEqual(
+ ctas_with_comment.sql("clickhouse"),
+ "CREATE TABLE foo ENGINE=Memory AS (SELECT * FROM db.other_table) COMMENT 'foo'",
+ )
+
+ self.validate_identity("CREATE MATERIALIZED VIEW a.b TO a.c (c Int32) AS SELECT * FROM a.d")
+ self.validate_identity("""CREATE TABLE ip_data (ip4 IPv4, ip6 IPv6) ENGINE=TinyLog()""")
+ self.validate_identity("""CREATE TABLE dates (dt1 Date32) ENGINE=TinyLog()""")
+ self.validate_identity("CREATE TABLE named_tuples (a Tuple(select String, i Int64))")
+ self.validate_identity("""CREATE TABLE t (a String) EMPTY AS SELECT * FROM dummy""")
+ self.validate_identity(
+ "CREATE TABLE t1 (a String EPHEMERAL, b String EPHEMERAL func(), c String MATERIALIZED func(), d String ALIAS func()) ENGINE=TinyLog()"
+ )
+ self.validate_identity(
+ "CREATE TABLE t (a String, b String, c UInt64, PROJECTION p1 (SELECT a, sum(c) GROUP BY a, b), PROJECTION p2 (SELECT b, sum(c) GROUP BY b)) ENGINE=MergeTree()"
+ )
+ self.validate_identity(
+ """CREATE TABLE xyz (ts DateTime, data String) ENGINE=MergeTree() ORDER BY ts SETTINGS index_granularity = 8192 COMMENT '{"key": "value"}'"""
+ )
+ self.validate_identity(
+ "INSERT INTO FUNCTION s3('a', 'b', 'c', 'd', 'e') PARTITION BY CONCAT(s1, s2, s3, s4) SETTINGS set1 = 1, set2 = '2' SELECT * FROM some_table SETTINGS foo = 3"
+ )
self.validate_identity(
'CREATE TABLE data5 ("x" UInt32, "y" UInt32) ENGINE=MergeTree ORDER BY (round(y / 1000000000), cityHash64(x)) SAMPLE BY cityHash64(x)'
)
self.validate_identity(
"CREATE TABLE foo (x UInt32) TTL time_column + INTERVAL '1' MONTH DELETE WHERE column = 'value'"
)
+ self.validate_identity(
+ "CREATE TABLE a ENGINE=Memory AS SELECT 1 AS c COMMENT 'foo'",
+ "CREATE TABLE a ENGINE=Memory AS (SELECT 1 AS c) COMMENT 'foo'",
+ )
self.validate_all(
+ "CREATE DATABASE x",
+ read={
+ "duckdb": "CREATE SCHEMA x",
+ },
+ write={
+ "clickhouse": "CREATE DATABASE x",
+ "duckdb": "CREATE SCHEMA x",
+ },
+ )
+ self.validate_all(
+ "DROP DATABASE x",
+ read={
+ "duckdb": "DROP SCHEMA x",
+ },
+ write={
+ "clickhouse": "DROP DATABASE x",
+ "duckdb": "DROP SCHEMA x",
+ },
+ )
+ self.validate_all(
"""
CREATE TABLE example1 (
timestamp DateTime,
@@ -552,7 +742,7 @@ class TestClickhouse(Validator):
""",
write={
"clickhouse": """CREATE TABLE example1 (
- timestamp DATETIME,
+ timestamp DateTime,
x UInt32 TTL now() + INTERVAL '1' MONTH,
y String TTL timestamp + INTERVAL '1' DAY,
z String
@@ -630,7 +820,7 @@ SETTINGS
""",
write={
"clickhouse": """CREATE TABLE example_table (
- d DATETIME,
+ d DateTime,
a Int32
)
ENGINE=MergeTree
@@ -657,7 +847,7 @@ TTL
""",
write={
"clickhouse": """CREATE TABLE table_with_where (
- d DATETIME,
+ d DateTime,
a Int32
)
ENGINE=MergeTree
@@ -685,7 +875,7 @@ WHERE
""",
write={
"clickhouse": """CREATE TABLE table_for_recompression (
- d DATETIME,
+ d DateTime,
key UInt64,
value String
)
@@ -717,7 +907,7 @@ SETTINGS
""",
write={
"clickhouse": """CREATE TABLE table_for_aggregation (
- d DATETIME,
+ d DateTime,
k1 Int32,
k2 Int32,
x Int32,
@@ -824,8 +1014,6 @@ LIFETIME(MIN 0 MAX 0)""",
},
pretty=True,
)
- self.validate_identity("""CREATE TABLE ip_data (ip4 IPv4, ip6 IPv6) ENGINE=TinyLog()""")
- self.validate_identity("""CREATE TABLE dates (dt1 Date32) ENGINE=TinyLog()""")
self.validate_all(
"""
CREATE TABLE t (
@@ -842,11 +1030,11 @@ LIFETIME(MIN 0 MAX 0)""",
},
pretty=True,
)
- self.validate_identity(
- "CREATE TABLE t1 (a String EPHEMERAL, b String EPHEMERAL func(), c String MATERIALIZED func(), d String ALIAS func()) ENGINE=TinyLog()"
- )
- self.validate_identity(
- "CREATE TABLE t (a String, b String, c UInt64, PROJECTION p1 (SELECT a, sum(c) GROUP BY a, b), PROJECTION p2 (SELECT b, sum(c) GROUP BY b)) ENGINE=MergeTree()"
+
+ self.assertIsNotNone(
+ self.validate_identity("CREATE TABLE t1 (a String MATERIALIZED func())").find(
+ exp.ColumnConstraint
+ )
)
def test_agg_functions(self):
@@ -880,3 +1068,35 @@ LIFETIME(MIN 0 MAX 0)""",
for creatable in ("DATABASE", "TABLE", "VIEW", "DICTIONARY", "FUNCTION"):
with self.subTest(f"Test DROP {creatable} ON CLUSTER"):
self.validate_identity(f"DROP {creatable} test ON CLUSTER test_cluster")
+
+ def test_datetime_funcs(self):
+ # Each datetime func has an alias that is roundtripped to the original name e.g. (DATE_SUB, DATESUB) -> DATE_SUB
+ datetime_funcs = (("DATE_SUB", "DATESUB"), ("DATE_ADD", "DATEADD"))
+
+ # 2-arg functions of type <func>(date, unit)
+ for func in (*datetime_funcs, ("TIMESTAMP_ADD", "TIMESTAMPADD")):
+ func_name = func[0]
+ for func_alias in func:
+ self.validate_identity(
+ f"""SELECT {func_alias}(date, INTERVAL '3' YEAR)""",
+ f"""SELECT {func_name}(date, INTERVAL '3' YEAR)""",
+ )
+
+ # 3-arg functions of type <func>(unit, value, date)
+ for func in (*datetime_funcs, ("DATE_DIFF", "DATEDIFF"), ("TIMESTAMP_SUB", "TIMESTAMPSUB")):
+ func_name = func[0]
+ for func_alias in func:
+ with self.subTest(f"Test 3-arg date-time function {func_alias}"):
+ self.validate_identity(
+ f"SELECT {func_alias}(SECOND, 1, bar)",
+ f"SELECT {func_name}(SECOND, 1, bar)",
+ )
+
+ def test_convert(self):
+ self.assertEqual(
+ convert(date(2020, 1, 1)).sql(dialect=self.dialect), "toDate('2020-01-01')"
+ )
+
+ def test_grant(self):
+ self.validate_identity("GRANT SELECT(x, y) ON db.table TO john WITH GRANT OPTION")
+ self.validate_identity("GRANT INSERT(x, y) ON db.table TO john")
diff --git a/tests/dialects/test_databricks.py b/tests/dialects/test_databricks.py
index 9ef3b86..65e8d5d 100644
--- a/tests/dialects/test_databricks.py
+++ b/tests/dialects/test_databricks.py
@@ -1,4 +1,4 @@
-from sqlglot import transpile
+from sqlglot import exp, transpile
from sqlglot.errors import ParseError
from tests.dialects.test_dialect import Validator
@@ -7,6 +7,7 @@ class TestDatabricks(Validator):
dialect = "databricks"
def test_databricks(self):
+ self.validate_identity("ALTER TABLE labels ADD COLUMN label_score FLOAT")
self.validate_identity("DESCRIBE HISTORY a.b")
self.validate_identity("DESCRIBE history.tbl")
self.validate_identity("CREATE TABLE t (a STRUCT<c: MAP<STRING, STRING>>)")
@@ -25,6 +26,7 @@ class TestDatabricks(Validator):
self.validate_identity("CREATE FUNCTION a AS b")
self.validate_identity("SELECT ${x} FROM ${y} WHERE ${z} > 1")
self.validate_identity("CREATE TABLE foo (x DATE GENERATED ALWAYS AS (CAST(y AS DATE)))")
+ self.validate_identity("TRUNCATE TABLE t1 PARTITION(age = 10, name = 'test', address)")
self.validate_identity(
"CREATE TABLE IF NOT EXISTS db.table (a TIMESTAMP, b BOOLEAN GENERATED ALWAYS AS (NOT a IS NULL)) USING DELTA"
)
@@ -37,22 +39,26 @@ class TestDatabricks(Validator):
self.validate_identity(
"SELECT * FROM sales UNPIVOT EXCLUDE NULLS (sales FOR quarter IN (q1 AS `Jan-Mar`))"
)
-
self.validate_identity(
"CREATE FUNCTION add_one(x INT) RETURNS INT LANGUAGE PYTHON AS $$def add_one(x):\n return x+1$$"
)
-
self.validate_identity(
"CREATE FUNCTION add_one(x INT) RETURNS INT LANGUAGE PYTHON AS $FOO$def add_one(x):\n return x+1$FOO$"
)
-
- self.validate_identity("TRUNCATE TABLE t1 PARTITION(age = 10, name = 'test', address)")
self.validate_identity(
"TRUNCATE TABLE t1 PARTITION(age = 10, name = 'test', city LIKE 'LA')"
)
self.validate_identity(
"COPY INTO target FROM `s3://link` FILEFORMAT = AVRO VALIDATE = ALL FILES = ('file1', 'file2') FORMAT_OPTIONS ('opt1'='true', 'opt2'='test') COPY_OPTIONS ('mergeSchema'='true')"
)
+ self.validate_identity(
+ "DATE_DIFF(day, created_at, current_date())",
+ "DATEDIFF(DAY, created_at, CURRENT_DATE)",
+ ).args["unit"].assert_is(exp.Var)
+ self.validate_identity(
+ r'SELECT r"\\foo.bar\"',
+ r"SELECT '\\\\foo.bar\\'",
+ )
self.validate_all(
"CREATE TABLE foo (x INT GENERATED ALWAYS AS (YEAR(y)))",
@@ -67,7 +73,6 @@ class TestDatabricks(Validator):
"teradata": "CREATE TABLE t1 AS (SELECT c FROM t2) WITH DATA",
},
)
-
self.validate_all(
"SELECT X'1A2B'",
read={
@@ -94,35 +99,55 @@ class TestDatabricks(Validator):
read="databricks",
)
+ self.validate_all(
+ "CREATE OR REPLACE FUNCTION func(a BIGINT, b BIGINT) RETURNS TABLE (a INT) RETURN SELECT a",
+ write={
+ "databricks": "CREATE OR REPLACE FUNCTION func(a BIGINT, b BIGINT) RETURNS TABLE (a INT) RETURN SELECT a",
+ "duckdb": "CREATE OR REPLACE FUNCTION func(a, b) AS TABLE SELECT a",
+ },
+ )
+
+ self.validate_all(
+ "CREATE OR REPLACE FUNCTION func(a BIGINT, b BIGINT) RETURNS BIGINT RETURN a",
+ write={
+ "databricks": "CREATE OR REPLACE FUNCTION func(a BIGINT, b BIGINT) RETURNS BIGINT RETURN a",
+ "duckdb": "CREATE OR REPLACE FUNCTION func(a, b) AS a",
+ },
+ )
+
# https://docs.databricks.com/sql/language-manual/functions/colonsign.html
def test_json(self):
+ self.validate_identity("SELECT c1:price, c1:price.foo, c1:price.bar[1]")
self.validate_identity(
- """SELECT c1 : price FROM VALUES ('{ "price": 5 }') AS T(c1)""",
- """SELECT GET_JSON_OBJECT(c1, '$.price') FROM VALUES ('{ "price": 5 }') AS T(c1)""",
+ """SELECT c1:item[1].price FROM VALUES ('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)"""
)
self.validate_identity(
- """SELECT c1:['price'] FROM VALUES('{ "price": 5 }') AS T(c1)""",
- """SELECT GET_JSON_OBJECT(c1, '$.price') FROM VALUES ('{ "price": 5 }') AS T(c1)""",
+ """SELECT c1:item[*].price FROM VALUES ('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)"""
)
self.validate_identity(
- """SELECT c1:item[1].price FROM VALUES('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)""",
- """SELECT GET_JSON_OBJECT(c1, '$.item[1].price') FROM VALUES ('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)""",
+ """SELECT FROM_JSON(c1:item[*].price, 'ARRAY<DOUBLE>')[0] FROM VALUES ('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)"""
)
self.validate_identity(
- """SELECT c1:item[*].price FROM VALUES('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)""",
- """SELECT GET_JSON_OBJECT(c1, '$.item[*].price') FROM VALUES ('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)""",
+ """SELECT INLINE(FROM_JSON(c1:item[*], 'ARRAY<STRUCT<model STRING, price DOUBLE>>')) FROM VALUES ('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)"""
)
self.validate_identity(
- """SELECT from_json(c1:item[*].price, 'ARRAY<DOUBLE>')[0] FROM VALUES('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)""",
- """SELECT FROM_JSON(GET_JSON_OBJECT(c1, '$.item[*].price'), 'ARRAY<DOUBLE>')[0] FROM VALUES ('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)""",
+ """SELECT c1:['price'] FROM VALUES ('{ "price": 5 }') AS T(c1)""",
+ """SELECT c1:price FROM VALUES ('{ "price": 5 }') AS T(c1)""",
)
self.validate_identity(
- """SELECT inline(from_json(c1:item[*], 'ARRAY<STRUCT<model STRING, price DOUBLE>>')) FROM VALUES('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)""",
- """SELECT INLINE(FROM_JSON(GET_JSON_OBJECT(c1, '$.item[*]'), 'ARRAY<STRUCT<model STRING, price DOUBLE>>')) FROM VALUES ('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)""",
+ """SELECT GET_JSON_OBJECT(c1, '$.price') FROM VALUES ('{ "price": 5 }') AS T(c1)""",
+ """SELECT c1:price FROM VALUES ('{ "price": 5 }') AS T(c1)""",
)
self.validate_identity(
- "SELECT c1 : price",
- "SELECT GET_JSON_OBJECT(c1, '$.price')",
+ """SELECT raw:`zip code`, raw:`fb:testid`, raw:store['bicycle'], raw:store["zip code"]""",
+ """SELECT raw:["zip code"], raw:["fb:testid"], raw:store.bicycle, raw:store["zip code"]""",
+ )
+ self.validate_all(
+ "SELECT col:`fr'uit`",
+ write={
+ "databricks": """SELECT col:["fr'uit"]""",
+ "postgres": "SELECT JSON_EXTRACT_PATH(col, 'fr''uit')",
+ },
)
def test_datediff(self):
@@ -238,3 +263,17 @@ class TestDatabricks(Validator):
"databricks": "WITH x AS (SELECT 1) SELECT * FROM x",
},
)
+
+ def test_streaming_tables(self):
+ self.validate_identity(
+ "CREATE STREAMING TABLE raw_data AS SELECT * FROM STREAM READ_FILES('abfss://container@storageAccount.dfs.core.windows.net/base/path')"
+ )
+ self.validate_identity(
+ "CREATE OR REFRESH STREAMING TABLE csv_data (id INT, ts TIMESTAMP, event STRING) AS SELECT * FROM STREAM READ_FILES('s3://bucket/path', format => 'csv', schema => 'id int, ts timestamp, event string')"
+ )
+
+ def test_grant(self):
+ self.validate_identity("GRANT CREATE ON SCHEMA my_schema TO `alf@melmak.et`")
+ self.validate_identity("GRANT SELECT ON TABLE sample_data TO `alf@melmak.et`")
+ self.validate_identity("GRANT ALL PRIVILEGES ON TABLE forecasts TO finance")
+ self.validate_identity("GRANT SELECT ON TABLE t TO `fab9e00e-ca35-11ec-9d64-0242ac120002`")
diff --git a/tests/dialects/test_dialect.py b/tests/dialects/test_dialect.py
index aaeb7b0..96ce600 100644
--- a/tests/dialects/test_dialect.py
+++ b/tests/dialects/test_dialect.py
@@ -20,7 +20,9 @@ class Validator(unittest.TestCase):
def parse_one(self, sql, **kwargs):
return parse_one(sql, read=self.dialect, **kwargs)
- def validate_identity(self, sql, write_sql=None, pretty=False, check_command_warning=False):
+ def validate_identity(
+ self, sql, write_sql=None, pretty=False, check_command_warning=False, identify=False
+ ):
if check_command_warning:
with self.assertLogs(parser_logger) as cm:
expression = self.parse_one(sql)
@@ -28,7 +30,9 @@ class Validator(unittest.TestCase):
else:
expression = self.parse_one(sql)
- self.assertEqual(write_sql or sql, expression.sql(dialect=self.dialect, pretty=pretty))
+ self.assertEqual(
+ write_sql or sql, expression.sql(dialect=self.dialect, pretty=pretty, identify=identify)
+ )
return expression
def validate_all(self, sql, read=None, write=None, pretty=False, identify=False):
@@ -102,14 +106,10 @@ class TestDialect(Validator):
lowercase_mysql = Dialect.get_or_raise("mysql, normalization_strategy = lowercase")
self.assertEqual(lowercase_mysql.normalization_strategy.value, "LOWERCASE")
- with self.assertRaises(ValueError) as cm:
+ with self.assertRaises(AttributeError) as cm:
Dialect.get_or_raise("mysql, normalization_strategy")
- self.assertEqual(
- str(cm.exception),
- "Invalid dialect format: 'mysql, normalization_strategy'. "
- "Please use the correct format: 'dialect [, k1 = v2 [, ...]]'.",
- )
+ self.assertEqual(str(cm.exception), "'bool' object has no attribute 'upper'")
with self.assertRaises(ValueError) as cm:
Dialect.get_or_raise("myqsl")
@@ -121,6 +121,18 @@ class TestDialect(Validator):
self.assertEqual(str(cm.exception), "Unknown dialect 'asdfjasodiufjsd'.")
+ oracle_with_settings = Dialect.get_or_raise(
+ "oracle, normalization_strategy = lowercase, version = 19.5"
+ )
+ self.assertEqual(oracle_with_settings.normalization_strategy.value, "LOWERCASE")
+ self.assertEqual(oracle_with_settings.settings, {"version": "19.5"})
+
+ bool_settings = Dialect.get_or_raise("oracle, s1=TruE, s2=1, s3=FaLse, s4=0, s5=nonbool")
+ self.assertEqual(
+ bool_settings.settings,
+ {"s1": True, "s2": True, "s3": False, "s4": False, "s5": "nonbool"},
+ )
+
def test_compare_dialects(self):
bigquery_class = Dialect["bigquery"]
bigquery_object = BigQuery()
@@ -152,7 +164,7 @@ class TestDialect(Validator):
"CAST(a AS TEXT)",
write={
"bigquery": "CAST(a AS STRING)",
- "clickhouse": "CAST(a AS String)",
+ "clickhouse": "CAST(a AS Nullable(String))",
"drill": "CAST(a AS VARCHAR)",
"duckdb": "CAST(a AS TEXT)",
"materialize": "CAST(a AS TEXT)",
@@ -173,7 +185,7 @@ class TestDialect(Validator):
"CAST(a AS BINARY(4))",
write={
"bigquery": "CAST(a AS BYTES)",
- "clickhouse": "CAST(a AS BINARY(4))",
+ "clickhouse": "CAST(a AS Nullable(BINARY(4)))",
"drill": "CAST(a AS VARBINARY(4))",
"duckdb": "CAST(a AS BLOB(4))",
"materialize": "CAST(a AS BYTEA(4))",
@@ -193,7 +205,7 @@ class TestDialect(Validator):
"CAST(a AS VARBINARY(4))",
write={
"bigquery": "CAST(a AS BYTES)",
- "clickhouse": "CAST(a AS String)",
+ "clickhouse": "CAST(a AS Nullable(String))",
"duckdb": "CAST(a AS BLOB(4))",
"materialize": "CAST(a AS BYTEA(4))",
"mysql": "CAST(a AS VARBINARY(4))",
@@ -211,19 +223,19 @@ class TestDialect(Validator):
self.validate_all(
"CAST(MAP('a', '1') AS MAP(TEXT, TEXT))",
write={
- "clickhouse": "CAST(map('a', '1') AS Map(String, String))",
+ "clickhouse": "CAST(map('a', '1') AS Map(String, Nullable(String)))",
},
)
self.validate_all(
"CAST(ARRAY(1, 2) AS ARRAY<TINYINT>)",
write={
- "clickhouse": "CAST([1, 2] AS Array(Int8))",
+ "clickhouse": "CAST([1, 2] AS Array(Nullable(Int8)))",
},
)
self.validate_all(
- "CAST((1, 2) AS STRUCT<a: TINYINT, b: SMALLINT, c: INT, d: BIGINT>)",
+ "CAST((1, 2, 3, 4) AS STRUCT<a: TINYINT, b: SMALLINT, c: INT, d: BIGINT>)",
write={
- "clickhouse": "CAST((1, 2) AS Tuple(a Int8, b Int16, c Int32, d Int64))",
+ "clickhouse": "CAST((1, 2, 3, 4) AS Tuple(a Nullable(Int8), b Nullable(Int16), c Nullable(Int32), d Nullable(Int64)))",
},
)
self.validate_all(
@@ -320,19 +332,9 @@ class TestDialect(Validator):
"redshift": "CAST(a AS DOUBLE PRECISION)",
},
write={
- "duckdb": "CAST(a AS DOUBLE)",
- "drill": "CAST(a AS DOUBLE)",
- "postgres": "CAST(a AS DOUBLE PRECISION)",
- "redshift": "CAST(a AS DOUBLE PRECISION)",
- "doris": "CAST(a AS DOUBLE)",
- },
- )
-
- self.validate_all(
- "CAST(a AS DOUBLE)",
- write={
"bigquery": "CAST(a AS FLOAT64)",
- "clickhouse": "CAST(a AS Float64)",
+ "clickhouse": "CAST(a AS Nullable(Float64))",
+ "doris": "CAST(a AS DOUBLE)",
"drill": "CAST(a AS DOUBLE)",
"duckdb": "CAST(a AS DOUBLE)",
"materialize": "CAST(a AS DOUBLE PRECISION)",
@@ -584,7 +586,7 @@ class TestDialect(Validator):
"hive": "CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, 'yyyy-MM-ddTHH:mm:ss')) AS TIMESTAMP)",
"presto": "DATE_PARSE(x, '%Y-%m-%dT%T')",
"drill": "TO_TIMESTAMP(x, 'yyyy-MM-dd''T''HH:mm:ss')",
- "redshift": "TO_TIMESTAMP(x, 'YYYY-MM-DDTHH:MI:SS')",
+ "redshift": "TO_TIMESTAMP(x, 'YYYY-MM-DDTHH24:MI:SS')",
"spark": "TO_TIMESTAMP(x, 'yyyy-MM-ddTHH:mm:ss')",
},
)
@@ -620,7 +622,7 @@ class TestDialect(Validator):
write={
"duckdb": "EPOCH(STRPTIME('2020-01-01', '%Y-%m-%d'))",
"hive": "UNIX_TIMESTAMP('2020-01-01', 'yyyy-MM-dd')",
- "presto": "TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST('2020-01-01' AS VARCHAR), '%Y-%m-%d')), PARSE_DATETIME(CAST('2020-01-01' AS VARCHAR), 'yyyy-MM-dd')))",
+ "presto": "TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST('2020-01-01' AS VARCHAR), '%Y-%m-%d')), PARSE_DATETIME(DATE_FORMAT(CAST('2020-01-01' AS TIMESTAMP), '%Y-%m-%d'), 'yyyy-MM-dd')))",
"starrocks": "UNIX_TIMESTAMP('2020-01-01', '%Y-%m-%d')",
"doris": "UNIX_TIMESTAMP('2020-01-01', '%Y-%m-%d')",
},
@@ -639,8 +641,18 @@ class TestDialect(Validator):
self.validate_all(
"TIME_STR_TO_TIME('2020-01-01')",
write={
- "drill": "CAST('2020-01-01' AS TIMESTAMP)",
+ "bigquery": "CAST('2020-01-01' AS DATETIME)",
+ "databricks": "CAST('2020-01-01' AS TIMESTAMP)",
"duckdb": "CAST('2020-01-01' AS TIMESTAMP)",
+ "tsql": "CAST('2020-01-01' AS DATETIME2)",
+ "mysql": "CAST('2020-01-01' AS DATETIME)",
+ "postgres": "CAST('2020-01-01' AS TIMESTAMP)",
+ "redshift": "CAST('2020-01-01' AS TIMESTAMP)",
+ "snowflake": "CAST('2020-01-01' AS TIMESTAMP)",
+ "spark": "CAST('2020-01-01' AS TIMESTAMP)",
+ "trino": "CAST('2020-01-01' AS TIMESTAMP)",
+ "clickhouse": "CAST('2020-01-01' AS Nullable(DateTime))",
+ "drill": "CAST('2020-01-01' AS TIMESTAMP)",
"hive": "CAST('2020-01-01' AS TIMESTAMP)",
"presto": "CAST('2020-01-01' AS TIMESTAMP)",
"sqlite": "'2020-01-01'",
@@ -648,6 +660,64 @@ class TestDialect(Validator):
},
)
self.validate_all(
+ "TIME_STR_TO_TIME('2020-01-01 12:13:14.123456+00:00')",
+ write={
+ "mysql": "CAST('2020-01-01 12:13:14.123456+00:00' AS DATETIME(6))",
+ "trino": "CAST('2020-01-01 12:13:14.123456+00:00' AS TIMESTAMP(6))",
+ "presto": "CAST('2020-01-01 12:13:14.123456+00:00' AS TIMESTAMP)",
+ },
+ )
+ self.validate_all(
+ "TIME_STR_TO_TIME('2020-01-01 12:13:14.123-08:00', 'America/Los_Angeles')",
+ write={
+ "mysql": "TIMESTAMP('2020-01-01 12:13:14.123-08:00')",
+ "trino": "CAST('2020-01-01 12:13:14.123-08:00' AS TIMESTAMP(3) WITH TIME ZONE)",
+ "presto": "CAST('2020-01-01 12:13:14.123-08:00' AS TIMESTAMP WITH TIME ZONE)",
+ },
+ )
+ self.validate_all(
+ "TIME_STR_TO_TIME('2020-01-01 12:13:14-08:00', 'America/Los_Angeles')",
+ write={
+ "bigquery": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)",
+ "databricks": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)",
+ "duckdb": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMPTZ)",
+ "tsql": "CAST('2020-01-01 12:13:14-08:00' AS DATETIMEOFFSET) AT TIME ZONE 'UTC'",
+ "mysql": "TIMESTAMP('2020-01-01 12:13:14-08:00')",
+ "postgres": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMPTZ)",
+ "redshift": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP WITH TIME ZONE)",
+ "snowflake": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMPTZ)",
+ "spark": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)",
+ "trino": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP WITH TIME ZONE)",
+ "clickhouse": "CAST('2020-01-01 12:13:14' AS Nullable(DateTime('America/Los_Angeles')))",
+ "drill": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)",
+ "hive": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)",
+ "presto": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP WITH TIME ZONE)",
+ "sqlite": "'2020-01-01 12:13:14-08:00'",
+ "doris": "CAST('2020-01-01 12:13:14-08:00' AS DATETIME)",
+ },
+ )
+ self.validate_all(
+ "TIME_STR_TO_TIME(col, 'America/Los_Angeles')",
+ write={
+ "bigquery": "CAST(col AS TIMESTAMP)",
+ "databricks": "CAST(col AS TIMESTAMP)",
+ "duckdb": "CAST(col AS TIMESTAMPTZ)",
+ "tsql": "CAST(col AS DATETIMEOFFSET) AT TIME ZONE 'UTC'",
+ "mysql": "TIMESTAMP(col)",
+ "postgres": "CAST(col AS TIMESTAMPTZ)",
+ "redshift": "CAST(col AS TIMESTAMP WITH TIME ZONE)",
+ "snowflake": "CAST(col AS TIMESTAMPTZ)",
+ "spark": "CAST(col AS TIMESTAMP)",
+ "trino": "CAST(col AS TIMESTAMP WITH TIME ZONE)",
+ "clickhouse": "CAST(col AS Nullable(DateTime('America/Los_Angeles')))",
+ "drill": "CAST(col AS TIMESTAMP)",
+ "hive": "CAST(col AS TIMESTAMP)",
+ "presto": "CAST(col AS TIMESTAMP WITH TIME ZONE)",
+ "sqlite": "col",
+ "doris": "CAST(col AS DATETIME)",
+ },
+ )
+ self.validate_all(
"TIME_STR_TO_UNIX('2020-01-01')",
write={
"duckdb": "EPOCH(CAST('2020-01-01' AS TIMESTAMP))",
@@ -673,6 +743,13 @@ class TestDialect(Validator):
},
)
self.validate_all(
+ "TIME_TO_STR(a, '%Y-%m-%d %H:%M:%S.%f')",
+ write={
+ "redshift": "TO_CHAR(a, 'YYYY-MM-DD HH24:MI:SS.US')",
+ "tsql": "FORMAT(a, 'yyyy-MM-dd HH:mm:ss.ffffff')",
+ },
+ )
+ self.validate_all(
"TIME_TO_TIME_STR(x)",
write={
"drill": "CAST(x AS VARCHAR)",
@@ -1150,13 +1227,19 @@ class TestDialect(Validator):
write={
"bigquery": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname",
"duckdb": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC, lname NULLS FIRST",
- "oracle": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname",
"presto": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC, lname NULLS FIRST",
"hive": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname",
"spark": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname",
},
)
+ order_by_all_sql = "SELECT * FROM t ORDER BY ALL"
+ self.validate_identity(order_by_all_sql).find(exp.Ordered).this.assert_is(exp.Column)
+
+ for dialect in ("duckdb", "spark", "databricks"):
+ with self.subTest(f"Testing ORDER BY ALL in {dialect}"):
+ parse_one(order_by_all_sql, read=dialect).find(exp.Ordered).this.assert_is(exp.Var)
+
def test_json(self):
self.validate_all(
"""JSON_EXTRACT(x, '$["a b"]')""",
@@ -1329,6 +1412,13 @@ class TestDialect(Validator):
},
)
+ for dialect in ("duckdb", "starrocks"):
+ with self.subTest(f"Generating json extraction with digit-prefixed key ({dialect})"):
+ self.assertEqual(
+ parse_one("""select '{"0": "v"}' -> '0'""", read=dialect).sql(dialect=dialect),
+ """SELECT '{"0": "v"}' -> '0'""",
+ )
+
def test_cross_join(self):
self.validate_all(
"SELECT a FROM x CROSS JOIN UNNEST(y) AS t (a)",
@@ -1343,7 +1433,7 @@ class TestDialect(Validator):
write={
"drill": "SELECT a, b FROM x CROSS JOIN UNNEST(y, z) AS t(a, b)",
"presto": "SELECT a, b FROM x CROSS JOIN UNNEST(y, z) AS t(a, b)",
- "spark": "SELECT a, b FROM x LATERAL VIEW EXPLODE(y) t AS a LATERAL VIEW EXPLODE(z) t AS b",
+ "spark": "SELECT a, b FROM x LATERAL VIEW INLINE(ARRAYS_ZIP(y, z)) t AS a, b",
},
)
self.validate_all(
@@ -1409,12 +1499,14 @@ class TestDialect(Validator):
"SELECT * FROM a INTERSECT SELECT * FROM b",
read={
"bigquery": "SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b",
+ "clickhouse": "SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b",
"duckdb": "SELECT * FROM a INTERSECT SELECT * FROM b",
"presto": "SELECT * FROM a INTERSECT SELECT * FROM b",
"spark": "SELECT * FROM a INTERSECT SELECT * FROM b",
},
write={
"bigquery": "SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b",
+ "clickhouse": "SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b",
"duckdb": "SELECT * FROM a INTERSECT SELECT * FROM b",
"presto": "SELECT * FROM a INTERSECT SELECT * FROM b",
"spark": "SELECT * FROM a INTERSECT SELECT * FROM b",
@@ -1424,12 +1516,14 @@ class TestDialect(Validator):
"SELECT * FROM a EXCEPT SELECT * FROM b",
read={
"bigquery": "SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b",
+ "clickhouse": "SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b",
"duckdb": "SELECT * FROM a EXCEPT SELECT * FROM b",
"presto": "SELECT * FROM a EXCEPT SELECT * FROM b",
"spark": "SELECT * FROM a EXCEPT SELECT * FROM b",
},
write={
"bigquery": "SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b",
+ "clickhouse": "SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b",
"duckdb": "SELECT * FROM a EXCEPT SELECT * FROM b",
"presto": "SELECT * FROM a EXCEPT SELECT * FROM b",
"spark": "SELECT * FROM a EXCEPT SELECT * FROM b",
@@ -1448,6 +1542,7 @@ class TestDialect(Validator):
"SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b",
write={
"bigquery": "SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b",
+ "clickhouse": "SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b",
"duckdb": "SELECT * FROM a INTERSECT SELECT * FROM b",
"presto": "SELECT * FROM a INTERSECT SELECT * FROM b",
"spark": "SELECT * FROM a INTERSECT SELECT * FROM b",
@@ -1457,6 +1552,7 @@ class TestDialect(Validator):
"SELECT * FROM a INTERSECT ALL SELECT * FROM b",
write={
"bigquery": "SELECT * FROM a INTERSECT ALL SELECT * FROM b",
+ "clickhouse": "SELECT * FROM a INTERSECT SELECT * FROM b",
"duckdb": "SELECT * FROM a INTERSECT ALL SELECT * FROM b",
"presto": "SELECT * FROM a INTERSECT ALL SELECT * FROM b",
"spark": "SELECT * FROM a INTERSECT ALL SELECT * FROM b",
@@ -1466,6 +1562,7 @@ class TestDialect(Validator):
"SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b",
write={
"bigquery": "SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b",
+ "clickhouse": "SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b",
"duckdb": "SELECT * FROM a EXCEPT SELECT * FROM b",
"presto": "SELECT * FROM a EXCEPT SELECT * FROM b",
"spark": "SELECT * FROM a EXCEPT SELECT * FROM b",
@@ -1475,6 +1572,7 @@ class TestDialect(Validator):
"SELECT * FROM a EXCEPT ALL SELECT * FROM b",
read={
"bigquery": "SELECT * FROM a EXCEPT ALL SELECT * FROM b",
+ "clickhouse": "SELECT * FROM a EXCEPT ALL SELECT * FROM b",
"duckdb": "SELECT * FROM a EXCEPT ALL SELECT * FROM b",
"presto": "SELECT * FROM a EXCEPT ALL SELECT * FROM b",
"spark": "SELECT * FROM a EXCEPT ALL SELECT * FROM b",
@@ -1690,6 +1788,13 @@ class TestDialect(Validator):
},
)
self.validate_all(
+ "FILTER(the_array, x -> x > 0)",
+ write={
+ "presto": "FILTER(the_array, x -> x > 0)",
+ "starrocks": "ARRAY_FILTER(the_array, x -> x > 0)",
+ },
+ )
+ self.validate_all(
"a / b",
write={
"bigquery": "a / b",
@@ -2101,13 +2206,24 @@ SELECT
},
)
+ # needs to preserve the target alias in then WHEN condition but not in the THEN clause
+ self.validate_all(
+ """MERGE INTO foo AS target USING (SELECT a, b FROM tbl) AS src ON src.a = target.a
+ WHEN MATCHED AND target.a <> src.a THEN UPDATE SET target.b = 'FOO'
+ WHEN NOT MATCHED THEN INSERT (target.a, target.b) VALUES (src.a, src.b)""",
+ write={
+ "trino": """MERGE INTO foo AS target USING (SELECT a, b FROM tbl) AS src ON src.a = target.a WHEN MATCHED AND target.a <> src.a THEN UPDATE SET b = 'FOO' WHEN NOT MATCHED THEN INSERT (a, b) VALUES (src.a, src.b)""",
+ "postgres": """MERGE INTO foo AS target USING (SELECT a, b FROM tbl) AS src ON src.a = target.a WHEN MATCHED AND target.a <> src.a THEN UPDATE SET b = 'FOO' WHEN NOT MATCHED THEN INSERT (a, b) VALUES (src.a, src.b)""",
+ },
+ )
+
def test_substring(self):
self.validate_all(
"SUBSTR('123456', 2, 3)",
write={
- "bigquery": "SUBSTR('123456', 2, 3)",
+ "bigquery": "SUBSTRING('123456', 2, 3)",
"oracle": "SUBSTR('123456', 2, 3)",
- "postgres": "SUBSTR('123456', 2, 3)",
+ "postgres": "SUBSTRING('123456' FROM 2 FOR 3)",
},
)
self.validate_all(
@@ -2260,11 +2376,11 @@ SELECT
write={
"duckdb": "SELECT * FROM t QUALIFY COUNT(*) OVER () > 1",
"snowflake": "SELECT * FROM t QUALIFY COUNT(*) OVER () > 1",
- "clickhouse": "SELECT * FROM (SELECT *, COUNT(*) OVER () AS _w FROM t) AS _t WHERE _w > 1",
+ "clickhouse": "SELECT * FROM t QUALIFY COUNT(*) OVER () > 1",
"mysql": "SELECT * FROM (SELECT *, COUNT(*) OVER () AS _w FROM t) AS _t WHERE _w > 1",
"oracle": "SELECT * FROM (SELECT *, COUNT(*) OVER () AS _w FROM t) _t WHERE _w > 1",
"postgres": "SELECT * FROM (SELECT *, COUNT(*) OVER () AS _w FROM t) AS _t WHERE _w > 1",
- "tsql": "SELECT * FROM (SELECT *, COUNT(*) OVER () AS _w FROM t) AS _t WHERE _w > 1",
+ "tsql": "SELECT * FROM (SELECT *, COUNT_BIG(*) OVER () AS _w FROM t) AS _t WHERE _w > 1",
},
)
self.validate_all(
@@ -2272,11 +2388,11 @@ SELECT
write={
"duckdb": 'SELECT "user id", some_id, 1 AS other_id, 2 AS "2 nd id" FROM t QUALIFY COUNT(*) OVER () > 1',
"snowflake": 'SELECT "user id", some_id, 1 AS other_id, 2 AS "2 nd id" FROM t QUALIFY COUNT(*) OVER () > 1',
- "clickhouse": 'SELECT "user id", some_id, other_id, "2 nd id" FROM (SELECT "user id", some_id, 1 AS other_id, 2 AS "2 nd id", COUNT(*) OVER () AS _w FROM t) AS _t WHERE _w > 1',
+ "clickhouse": 'SELECT "user id", some_id, 1 AS other_id, 2 AS "2 nd id" FROM t QUALIFY COUNT(*) OVER () > 1',
"mysql": "SELECT `user id`, some_id, other_id, `2 nd id` FROM (SELECT `user id`, some_id, 1 AS other_id, 2 AS `2 nd id`, COUNT(*) OVER () AS _w FROM t) AS _t WHERE _w > 1",
"oracle": 'SELECT "user id", some_id, other_id, "2 nd id" FROM (SELECT "user id", some_id, 1 AS other_id, 2 AS "2 nd id", COUNT(*) OVER () AS _w FROM t) _t WHERE _w > 1',
"postgres": 'SELECT "user id", some_id, other_id, "2 nd id" FROM (SELECT "user id", some_id, 1 AS other_id, 2 AS "2 nd id", COUNT(*) OVER () AS _w FROM t) AS _t WHERE _w > 1',
- "tsql": "SELECT [user id], some_id, other_id, [2 nd id] FROM (SELECT [user id] AS [user id], some_id AS some_id, 1 AS other_id, 2 AS [2 nd id], COUNT(*) OVER () AS _w FROM t) AS _t WHERE _w > 1",
+ "tsql": "SELECT [user id], some_id, other_id, [2 nd id] FROM (SELECT [user id] AS [user id], some_id AS some_id, 1 AS other_id, 2 AS [2 nd id], COUNT_BIG(*) OVER () AS _w FROM t) AS _t WHERE _w > 1",
},
)
@@ -2559,3 +2675,219 @@ FROM subquery2""",
"""SELECT partition.d FROM t PARTITION (d)""",
"""SELECT partition.d FROM t AS PARTITION(d)""",
)
+
+ def test_string_functions(self):
+ for pad_func in ("LPAD", "RPAD"):
+ ch_alias = "LEFTPAD" if pad_func == "LPAD" else "RIGHTPAD"
+ for fill_pattern in ("", ", ' '"):
+ with self.subTest(f"Testing {pad_func}() with pattern {fill_pattern}"):
+ self.validate_all(
+ f"SELECT {pad_func}('bar', 5{fill_pattern})",
+ read={
+ "snowflake": f"SELECT {pad_func}('bar', 5{fill_pattern})",
+ "databricks": f"SELECT {pad_func}('bar', 5{fill_pattern})",
+ "spark": f"SELECT {pad_func}('bar', 5{fill_pattern})",
+ "postgres": f"SELECT {pad_func}('bar', 5{fill_pattern})",
+ "clickhouse": f"SELECT {ch_alias}('bar', 5{fill_pattern})",
+ },
+ write={
+ "": f"SELECT {pad_func}('bar', 5{fill_pattern})",
+ "spark": f"SELECT {pad_func}('bar', 5{fill_pattern})",
+ "postgres": f"SELECT {pad_func}('bar', 5{fill_pattern})",
+ "clickhouse": f"SELECT {pad_func}('bar', 5{fill_pattern})",
+ "snowflake": f"SELECT {pad_func}('bar', 5{fill_pattern})",
+ "databricks": f"SELECT {pad_func}('bar', 5{fill_pattern})",
+ "duckdb": f"SELECT {pad_func}('bar', 5, ' ')",
+ "mysql": f"SELECT {pad_func}('bar', 5, ' ')",
+ "hive": f"SELECT {pad_func}('bar', 5, ' ')",
+ "spark2": f"SELECT {pad_func}('bar', 5, ' ')",
+ "presto": f"SELECT {pad_func}('bar', 5, ' ')",
+ "trino": f"SELECT {pad_func}('bar', 5, ' ')",
+ },
+ )
+
+ def test_generate_date_array(self):
+ self.validate_all(
+ "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
+ write={
+ "bigquery": "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), INTERVAL '1' WEEK))",
+ "databricks": "SELECT * FROM EXPLODE(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), INTERVAL '1' WEEK))",
+ "duckdb": "SELECT * FROM UNNEST(CAST(GENERATE_SERIES(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (7 * INTERVAL '1' DAY)) AS DATE[]))",
+ "mysql": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) FROM _generated_dates WHERE CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates",
+ "postgres": "SELECT * FROM (SELECT CAST(value AS DATE) FROM GENERATE_SERIES(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), INTERVAL '1 WEEK') AS value) AS _unnested_generate_series",
+ "presto": "SELECT * FROM UNNEST(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (1 * INTERVAL '7' DAY)))",
+ "redshift": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates",
+ "snowflake": "SELECT * FROM (SELECT DATEADD(WEEK, CAST(value AS INT), CAST('2020-01-01' AS DATE)) AS value FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(WEEK, CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE)) + 1 - 1) + 1))) AS _u(seq, key, path, index, value, this))",
+ "spark": "SELECT * FROM EXPLODE(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), INTERVAL '1' WEEK))",
+ "trino": "SELECT * FROM UNNEST(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (1 * INTERVAL '7' DAY)))",
+ "tsql": "WITH _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_value AS date_value FROM _generated_dates) AS _generated_dates",
+ },
+ )
+ self.validate_all(
+ "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
+ write={
+ "mysql": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) FROM _generated_dates WHERE CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) <= CAST('2020-02-01' AS DATE)), dates AS (SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates) SELECT * FROM dates",
+ "redshift": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)), dates AS (SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates) SELECT * FROM dates",
+ "tsql": "WITH _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)), dates AS (SELECT * FROM (SELECT date_value AS date_value FROM _generated_dates) AS _generated_dates) SELECT * FROM dates",
+ },
+ )
+ self.validate_all(
+ "WITH dates1 AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))), dates2 AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-03-01', INTERVAL 1 MONTH))) SELECT * FROM dates1 CROSS JOIN dates2",
+ write={
+ "mysql": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) FROM _generated_dates WHERE CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) <= CAST('2020-02-01' AS DATE)), _generated_dates_1(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATE_ADD(date_value, INTERVAL 1 MONTH) AS DATE) FROM _generated_dates_1 WHERE CAST(DATE_ADD(date_value, INTERVAL 1 MONTH) AS DATE) <= CAST('2020-03-01' AS DATE)), dates1 AS (SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates), dates2 AS (SELECT * FROM (SELECT date_value FROM _generated_dates_1) AS _generated_dates_1) SELECT * FROM dates1 CROSS JOIN dates2",
+ "redshift": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)), _generated_dates_1(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(MONTH, 1, date_value) AS DATE) FROM _generated_dates_1 WHERE CAST(DATEADD(MONTH, 1, date_value) AS DATE) <= CAST('2020-03-01' AS DATE)), dates1 AS (SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates), dates2 AS (SELECT * FROM (SELECT date_value FROM _generated_dates_1) AS _generated_dates_1) SELECT * FROM dates1 CROSS JOIN dates2",
+ "tsql": "WITH _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)), _generated_dates_1(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(MONTH, 1, date_value) AS DATE) FROM _generated_dates_1 WHERE CAST(DATEADD(MONTH, 1, date_value) AS DATE) <= CAST('2020-03-01' AS DATE)), dates1 AS (SELECT * FROM (SELECT date_value AS date_value FROM _generated_dates) AS _generated_dates), dates2 AS (SELECT * FROM (SELECT date_value AS date_value FROM _generated_dates_1) AS _generated_dates_1) SELECT * FROM dates1 CROSS JOIN dates2",
+ },
+ )
+ self.validate_all(
+ "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK)) AS _q(date_week)",
+ write={
+ "mysql": "WITH RECURSIVE _generated_dates(date_week) AS (SELECT CAST('2020-01-01' AS DATE) AS date_week UNION ALL SELECT CAST(DATE_ADD(date_week, INTERVAL 1 WEEK) AS DATE) FROM _generated_dates WHERE CAST(DATE_ADD(date_week, INTERVAL 1 WEEK) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_week FROM _generated_dates) AS _generated_dates",
+ "redshift": "WITH RECURSIVE _generated_dates(date_week) AS (SELECT CAST('2020-01-01' AS DATE) AS date_week UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_week) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_week) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_week FROM _generated_dates) AS _generated_dates",
+ "snowflake": "SELECT * FROM (SELECT DATEADD(WEEK, CAST(date_week AS INT), CAST('2020-01-01' AS DATE)) AS date_week FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(WEEK, CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE)) + 1 - 1) + 1))) AS _q(seq, key, path, index, date_week, this)) AS _q(date_week)",
+ "tsql": "WITH _generated_dates(date_week) AS (SELECT CAST('2020-01-01' AS DATE) AS date_week UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_week) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_week) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_week AS date_week FROM _generated_dates) AS _generated_dates",
+ },
+ )
+
+ def test_set_operation_specifiers(self):
+ self.validate_all(
+ "SELECT 1 EXCEPT ALL SELECT 1",
+ write={
+ "": "SELECT 1 EXCEPT ALL SELECT 1",
+ "bigquery": UnsupportedError,
+ "clickhouse": "SELECT 1 EXCEPT SELECT 1",
+ "databricks": "SELECT 1 EXCEPT ALL SELECT 1",
+ "duckdb": "SELECT 1 EXCEPT ALL SELECT 1",
+ "mysql": "SELECT 1 EXCEPT ALL SELECT 1",
+ "oracle": "SELECT 1 EXCEPT ALL SELECT 1",
+ "postgres": "SELECT 1 EXCEPT ALL SELECT 1",
+ "presto": UnsupportedError,
+ "redshift": UnsupportedError,
+ "snowflake": UnsupportedError,
+ "spark": "SELECT 1 EXCEPT ALL SELECT 1",
+ "sqlite": UnsupportedError,
+ "starrocks": UnsupportedError,
+ "trino": UnsupportedError,
+ "tsql": UnsupportedError,
+ },
+ )
+
+ def test_normalize(self):
+ for form in ("", ", nfkc"):
+ with self.subTest(f"Testing NORMALIZE('str'{form}) roundtrip"):
+ self.validate_all(
+ f"SELECT NORMALIZE('str'{form})",
+ read={
+ "presto": f"SELECT NORMALIZE('str'{form})",
+ "trino": f"SELECT NORMALIZE('str'{form})",
+ "bigquery": f"SELECT NORMALIZE('str'{form})",
+ },
+ write={
+ "presto": f"SELECT NORMALIZE('str'{form})",
+ "trino": f"SELECT NORMALIZE('str'{form})",
+ "bigquery": f"SELECT NORMALIZE('str'{form})",
+ },
+ )
+
+ self.assertIsInstance(parse_one("NORMALIZE('str', NFD)").args.get("form"), exp.Var)
+
+ def test_coalesce(self):
+ """
+ Validate that "expressions" is a list for all the exp.Coalesce instances; This is important
+ as some optimizer rules are coalesce specific and will iterate on "expressions"
+ """
+
+ # Check the 2-arg aliases
+ for func in ("COALESCE", "IFNULL", "NVL"):
+ self.assertIsInstance(self.parse_one(f"{func}(1, 2)").expressions, list)
+
+ # Check the varlen case
+ coalesce = self.parse_one("COALESCE(x, y, z)")
+ self.assertIsInstance(coalesce.expressions, list)
+ self.assertIsNone(coalesce.args.get("is_nvl"))
+
+ # Check Oracle's NVL which is decoupled from COALESCE
+ oracle_nvl = parse_one("NVL(x, y)", read="oracle")
+ self.assertIsInstance(oracle_nvl.expressions, list)
+ self.assertTrue(oracle_nvl.args.get("is_nvl"))
+
+ # Check T-SQL's ISNULL which is parsed into exp.Coalesce
+ self.assertIsInstance(parse_one("ISNULL(x, y)", read="tsql").expressions, list)
+
+ def test_trim(self):
+ self.validate_all(
+ "TRIM('abc', 'a')",
+ read={
+ "bigquery": "TRIM('abc', 'a')",
+ "snowflake": "TRIM('abc', 'a')",
+ },
+ write={
+ "bigquery": "TRIM('abc', 'a')",
+ "snowflake": "TRIM('abc', 'a')",
+ },
+ )
+
+ self.validate_all(
+ "LTRIM('Hello World', 'H')",
+ read={
+ "oracle": "LTRIM('Hello World', 'H')",
+ "clickhouse": "TRIM(LEADING 'H' FROM 'Hello World')",
+ "snowflake": "LTRIM('Hello World', 'H')",
+ "bigquery": "LTRIM('Hello World', 'H')",
+ "": "LTRIM('Hello World', 'H')",
+ },
+ write={
+ "clickhouse": "TRIM(LEADING 'H' FROM 'Hello World')",
+ "oracle": "LTRIM('Hello World', 'H')",
+ "snowflake": "LTRIM('Hello World', 'H')",
+ "bigquery": "LTRIM('Hello World', 'H')",
+ },
+ )
+
+ self.validate_all(
+ "RTRIM('Hello World', 'd')",
+ read={
+ "clickhouse": "TRIM(TRAILING 'd' FROM 'Hello World')",
+ "oracle": "RTRIM('Hello World', 'd')",
+ "snowflake": "RTRIM('Hello World', 'd')",
+ "bigquery": "RTRIM('Hello World', 'd')",
+ "": "RTRIM('Hello World', 'd')",
+ },
+ write={
+ "clickhouse": "TRIM(TRAILING 'd' FROM 'Hello World')",
+ "oracle": "RTRIM('Hello World', 'd')",
+ "snowflake": "RTRIM('Hello World', 'd')",
+ "bigquery": "RTRIM('Hello World', 'd')",
+ },
+ )
+
+ def test_uuid(self):
+ self.validate_all(
+ "UUID()",
+ read={
+ "hive": "UUID()",
+ "spark2": "UUID()",
+ "spark": "UUID()",
+ "databricks": "UUID()",
+ "duckdb": "UUID()",
+ "presto": "UUID()",
+ "trino": "UUID()",
+ "mysql": "UUID()",
+ "postgres": "GEN_RANDOM_UUID()",
+ "bigquery": "GENERATE_UUID()",
+ "snowflake": "UUID_STRING()",
+ },
+ write={
+ "hive": "UUID()",
+ "spark2": "UUID()",
+ "spark": "UUID()",
+ "databricks": "UUID()",
+ "duckdb": "UUID()",
+ "presto": "UUID()",
+ "trino": "UUID()",
+ "mysql": "UUID()",
+ "postgres": "GEN_RANDOM_UUID()",
+ "bigquery": "GENERATE_UUID()",
+ "snowflake": "UUID_STRING()",
+ },
+ )
diff --git a/tests/dialects/test_doris.py b/tests/dialects/test_doris.py
index 8180d05..99076ba 100644
--- a/tests/dialects/test_doris.py
+++ b/tests/dialects/test_doris.py
@@ -56,6 +56,34 @@ class TestDoris(Validator):
"postgres": "SELECT STRING_AGG('aa', ',')",
},
)
+ self.validate_all(
+ "SELECT LAG(1, 1, NULL) OVER (ORDER BY 1)",
+ read={
+ "doris": "SELECT LAG(1, 1, NULL) OVER (ORDER BY 1)",
+ "postgres": "SELECT LAG(1) OVER (ORDER BY 1)",
+ },
+ )
+ self.validate_all(
+ "SELECT LAG(1, 2, NULL) OVER (ORDER BY 1)",
+ read={
+ "doris": "SELECT LAG(1, 2, NULL) OVER (ORDER BY 1)",
+ "postgres": "SELECT LAG(1, 2) OVER (ORDER BY 1)",
+ },
+ )
+ self.validate_all(
+ "SELECT LEAD(1, 1, NULL) OVER (ORDER BY 1)",
+ read={
+ "doris": "SELECT LEAD(1, 1, NULL) OVER (ORDER BY 1)",
+ "postgres": "SELECT LEAD(1) OVER (ORDER BY 1)",
+ },
+ )
+ self.validate_all(
+ "SELECT LEAD(1, 2, NULL) OVER (ORDER BY 1)",
+ read={
+ "doris": "SELECT LEAD(1, 2, NULL) OVER (ORDER BY 1)",
+ "postgres": "SELECT LEAD(1, 2) OVER (ORDER BY 1)",
+ },
+ )
def test_identity(self):
self.validate_identity("COALECSE(a, b, c, d)")
diff --git a/tests/dialects/test_duckdb.py b/tests/dialects/test_duckdb.py
index 2bde478..e4788ec 100644
--- a/tests/dialects/test_duckdb.py
+++ b/tests/dialects/test_duckdb.py
@@ -18,36 +18,35 @@ class TestDuckDB(Validator):
"WITH _data AS (SELECT [STRUCT(1 AS a, 2 AS b), STRUCT(2 AS a, 3 AS b)] AS col) SELECT col.b FROM _data, UNNEST(_data.col) AS col WHERE col.a = 1",
)
- self.validate_all(
- "SELECT straight_join",
- write={
- "duckdb": "SELECT straight_join",
- "mysql": "SELECT `straight_join`",
- },
+ struct_array_type = exp.maybe_parse(
+ "STRUCT(k TEXT, v STRUCT(v_str TEXT, v_int INT, v_int_arr INT[]))[]",
+ into=exp.DataType,
+ dialect="duckdb",
+ )
+ self.assertEqual(
+ struct_array_type.sql("duckdb"),
+ "STRUCT(k TEXT, v STRUCT(v_str TEXT, v_int INT, v_int_arr INT[]))[]",
)
+
self.validate_all(
- "SELECT CAST('2020-01-01 12:05:01' AS TIMESTAMP)",
- read={
- "duckdb": "SELECT CAST('2020-01-01 12:05:01' AS TIMESTAMP)",
- "snowflake": "SELECT CAST('2020-01-01 12:05:01' AS TIMESTAMPNTZ)",
+ "CAST(x AS UUID)",
+ write={
+ "bigquery": "CAST(x AS STRING)",
+ "duckdb": "CAST(x AS UUID)",
},
)
self.validate_all(
- "SELECT CAST('2020-01-01' AS DATE) + INTERVAL (day_offset) DAY FROM t",
+ """SELECT CASE WHEN JSON_VALID('{"x: 1}') THEN '{"x: 1}' ELSE NULL END""",
read={
- "duckdb": "SELECT CAST('2020-01-01' AS DATE) + INTERVAL (day_offset) DAY FROM t",
- "mysql": "SELECT DATE '2020-01-01' + INTERVAL day_offset DAY FROM t",
+ "duckdb": """SELECT CASE WHEN JSON_VALID('{"x: 1}') THEN '{"x: 1}' ELSE NULL END""",
+ "snowflake": """SELECT TRY_PARSE_JSON('{"x: 1}')""",
},
)
self.validate_all(
- "SELECT CAST('09:05:03' AS TIME) + INTERVAL 2 HOUR",
- read={
- "bigquery": "SELECT TIME_ADD(CAST('09:05:03' AS TIME), INTERVAL 2 HOUR)",
- "snowflake": "SELECT TIMEADD(HOUR, 2, TO_TIME('09:05:03'))",
- },
+ "SELECT straight_join",
write={
- "duckdb": "SELECT CAST('09:05:03' AS TIME) + INTERVAL '2' HOUR",
- "snowflake": "SELECT CAST('09:05:03' AS TIME) + INTERVAL '2 HOUR'",
+ "duckdb": "SELECT straight_join",
+ "mysql": "SELECT `straight_join`",
},
)
self.validate_all(
@@ -103,7 +102,9 @@ class TestDuckDB(Validator):
self.validate_all(
"CREATE TEMPORARY FUNCTION f1(a, b) AS (a + b)",
- read={"bigquery": "CREATE TEMP FUNCTION f1(a INT64, b INT64) AS (a + b)"},
+ read={
+ "bigquery": "CREATE TEMP FUNCTION f1(a INT64, b INT64) AS (a + b)",
+ },
)
self.validate_identity("SELECT 1 WHERE x > $1")
self.validate_identity("SELECT 1 WHERE x > $name")
@@ -119,13 +120,17 @@ class TestDuckDB(Validator):
)
self.validate_all(
- "{'a': 1, 'b': '2'}", write={"presto": "CAST(ROW(1, '2') AS ROW(a INTEGER, b VARCHAR))"}
+ "{'a': 1, 'b': '2'}",
+ write={
+ "presto": "CAST(ROW(1, '2') AS ROW(a INTEGER, b VARCHAR))",
+ },
)
self.validate_all(
"struct_pack(a := 1, b := 2)",
- write={"presto": "CAST(ROW(1, 2) AS ROW(a INTEGER, b INTEGER))"},
+ write={
+ "presto": "CAST(ROW(1, 2) AS ROW(a INTEGER, b INTEGER))",
+ },
)
-
self.validate_all(
"struct_pack(a := 1, b := x)",
write={
@@ -243,10 +248,6 @@ class TestDuckDB(Validator):
},
)
- self.validate_identity("INSERT INTO x BY NAME SELECT 1 AS y")
- self.validate_identity("SELECT 1 AS x UNION ALL BY NAME SELECT 2 AS x")
- self.validate_identity("SELECT SUM(x) FILTER (x = 1)", "SELECT SUM(x) FILTER(WHERE x = 1)")
-
# https://github.com/duckdb/duckdb/releases/tag/v0.8.0
self.assertEqual(
parse_one("a / b", read="duckdb").assert_is(exp.Div).sql(dialect="duckdb"), "a / b"
@@ -255,6 +256,11 @@ class TestDuckDB(Validator):
parse_one("a // b", read="duckdb").assert_is(exp.IntDiv).sql(dialect="duckdb"), "a // b"
)
+ self.validate_identity("CREATE TABLE tbl1 (u UNION(num INT, str TEXT))")
+ self.validate_identity("INSERT INTO x BY NAME SELECT 1 AS y")
+ self.validate_identity("SELECT 1 AS x UNION ALL BY NAME SELECT 2 AS x")
+ self.validate_identity("SELECT SUM(x) FILTER (x = 1)", "SELECT SUM(x) FILTER(WHERE x = 1)")
+ self.validate_identity("SELECT * FROM GLOB(x)")
self.validate_identity("SELECT MAP(['key1', 'key2', 'key3'], [10, 20, 30])")
self.validate_identity("SELECT MAP {'x': 1}")
self.validate_identity("SELECT (MAP {'x': 1})['x']")
@@ -286,10 +292,39 @@ class TestDuckDB(Validator):
self.validate_identity("x -> '$.family'")
self.validate_identity("CREATE TABLE color (name ENUM('RED', 'GREEN', 'BLUE'))")
self.validate_identity("SELECT * FROM foo WHERE bar > $baz AND bla = $bob")
+ self.validate_identity("SUMMARIZE tbl").assert_is(exp.Summarize)
+ self.validate_identity("SUMMARIZE SELECT * FROM tbl").assert_is(exp.Summarize)
+ self.validate_identity("CREATE TABLE tbl_summary AS SELECT * FROM (SUMMARIZE tbl)")
+ self.validate_identity("UNION_VALUE(k1 := 1)").find(exp.PropertyEQ).this.assert_is(
+ exp.Identifier
+ )
+ self.validate_identity(
+ "SELECT species, island, COUNT(*) FROM t GROUP BY GROUPING SETS (species), GROUPING SETS (island)"
+ )
+ self.validate_identity(
+ "SELECT species, island, COUNT(*) FROM t GROUP BY CUBE (species), CUBE (island)"
+ )
+ self.validate_identity(
+ "SELECT species, island, COUNT(*) FROM t GROUP BY ROLLUP (species), ROLLUP (island)"
+ )
+ self.validate_identity(
+ "SUMMARIZE TABLE 'https://blobs.duckdb.org/data/Star_Trek-Season_1.csv'"
+ ).assert_is(exp.Summarize)
self.validate_identity(
"SELECT * FROM x LEFT JOIN UNNEST(y)", "SELECT * FROM x LEFT JOIN UNNEST(y) ON TRUE"
)
self.validate_identity(
+ """SELECT '{ "family": "anatidae", "species": [ "duck", "goose", "swan", null ] }' ->> ['$.family', '$.species']""",
+ )
+ self.validate_identity(
+ """SELECT JSON_EXTRACT_STRING('{ "family": "anatidae", "species": [ "duck", "goose", "swan", null ] }', ['$.family', '$.species'])""",
+ """SELECT '{ "family": "anatidae", "species": [ "duck", "goose", "swan", null ] }' ->> ['$.family', '$.species']""",
+ )
+ self.validate_identity(
+ "SELECT col FROM t WHERE JSON_EXTRACT_STRING(col, '$.id') NOT IN ('b')",
+ "SELECT col FROM t WHERE NOT (col ->> '$.id') IN ('b')",
+ )
+ self.validate_identity(
"SELECT a, LOGICAL_OR(b) FROM foo GROUP BY a",
"SELECT a, BOOL_OR(b) FROM foo GROUP BY a",
)
@@ -302,6 +337,14 @@ class TestDuckDB(Validator):
"SELECT (c -> '$.k1') = 'v1'",
)
self.validate_identity(
+ "SELECT JSON_EXTRACT(c, '$[*].id')[0:2]",
+ "SELECT (c -> '$[*].id')[0 : 2]",
+ )
+ self.validate_identity(
+ "SELECT JSON_EXTRACT_STRING(c, '$[*].id')[0:2]",
+ "SELECT (c ->> '$[*].id')[0 : 2]",
+ )
+ self.validate_identity(
"""SELECT '{"foo": [1, 2, 3]}' -> 'foo' -> 0""",
"""SELECT '{"foo": [1, 2, 3]}' -> '$.foo' -> '$[0]'""",
)
@@ -354,6 +397,10 @@ class TestDuckDB(Validator):
self.validate_identity(
"SELECT * FROM (PIVOT Cities ON Year USING SUM(Population) GROUP BY Country) AS pivot_alias"
)
+ self.validate_identity(
+ # QUALIFY comes after WINDOW
+ "SELECT schema_name, function_name, ROW_NUMBER() OVER my_window AS function_rank FROM DUCKDB_FUNCTIONS() WINDOW my_window AS (PARTITION BY schema_name ORDER BY function_name) QUALIFY ROW_NUMBER() OVER my_window < 3"
+ )
self.validate_identity("DATE_SUB('YEAR', col, '2020-01-01')").assert_is(exp.Anonymous)
self.validate_identity("DATESUB('YEAR', col, '2020-01-01')").assert_is(exp.Anonymous)
@@ -488,8 +535,8 @@ class TestDuckDB(Validator):
write={
"duckdb": "STR_SPLIT(x, 'a')",
"presto": "SPLIT(x, 'a')",
- "hive": "SPLIT(x, CONCAT('\\\\Q', 'a'))",
- "spark": "SPLIT(x, CONCAT('\\\\Q', 'a'))",
+ "hive": "SPLIT(x, CONCAT('\\\\Q', 'a', '\\\\E'))",
+ "spark": "SPLIT(x, CONCAT('\\\\Q', 'a', '\\\\E'))",
},
)
self.validate_all(
@@ -497,8 +544,8 @@ class TestDuckDB(Validator):
write={
"duckdb": "STR_SPLIT(x, 'a')",
"presto": "SPLIT(x, 'a')",
- "hive": "SPLIT(x, CONCAT('\\\\Q', 'a'))",
- "spark": "SPLIT(x, CONCAT('\\\\Q', 'a'))",
+ "hive": "SPLIT(x, CONCAT('\\\\Q', 'a', '\\\\E'))",
+ "spark": "SPLIT(x, CONCAT('\\\\Q', 'a', '\\\\E'))",
},
)
self.validate_all(
@@ -677,11 +724,11 @@ class TestDuckDB(Validator):
},
)
self.validate_all(
- "SELECT CAST('2020-05-06' AS DATE) - INTERVAL 5 DAY",
+ "SELECT CAST('2020-05-06' AS DATE) - INTERVAL '5' DAY",
read={"bigquery": "SELECT DATE_SUB(CAST('2020-05-06' AS DATE), INTERVAL 5 DAY)"},
)
self.validate_all(
- "SELECT CAST('2020-05-06' AS DATE) + INTERVAL 5 DAY",
+ "SELECT CAST('2020-05-06' AS DATE) + INTERVAL '5' DAY",
read={"bigquery": "SELECT DATE_ADD(CAST('2020-05-06' AS DATE), INTERVAL 5 DAY)"},
)
self.validate_identity(
@@ -786,6 +833,32 @@ class TestDuckDB(Validator):
},
)
+ self.validate_identity("SELECT LENGTH(foo)")
+ self.validate_identity("SELECT ARRAY[1, 2, 3]", "SELECT [1, 2, 3]")
+
+ self.validate_identity("SELECT * FROM (DESCRIBE t)")
+
+ self.validate_identity("SELECT UNNEST([*COLUMNS('alias_.*')]) AS column_name")
+ self.validate_identity(
+ "SELECT COALESCE(*COLUMNS(*)) FROM (SELECT NULL, 2, 3) AS t(a, b, c)"
+ )
+ self.validate_identity(
+ "SELECT id, STRUCT_PACK(*COLUMNS('m\\d')) AS measurements FROM many_measurements",
+ """SELECT id, {'_0': *COLUMNS('m\\d')} AS measurements FROM many_measurements""",
+ )
+ self.validate_identity("SELECT COLUMNS(c -> c LIKE '%num%') FROM numbers")
+ self.validate_identity(
+ "SELECT MIN(COLUMNS(* REPLACE (number + id AS number))), COUNT(COLUMNS(* EXCLUDE (number))) FROM numbers"
+ )
+ self.validate_identity("SELECT COLUMNS(*) + COLUMNS(*) FROM numbers")
+ self.validate_identity("SELECT COLUMNS('(id|numbers?)') FROM numbers")
+ self.validate_identity(
+ "SELECT COALESCE(COLUMNS(['a', 'b', 'c'])) AS result FROM (SELECT NULL AS a, 42 AS b, TRUE AS c)"
+ )
+ self.validate_identity(
+ "SELECT COALESCE(*COLUMNS(['a', 'b', 'c'])) AS result FROM (SELECT NULL AS a, 42 AS b, TRUE AS c)"
+ )
+
def test_array_index(self):
with self.assertLogs(helper_logger) as cm:
self.validate_all(
@@ -818,10 +891,10 @@ class TestDuckDB(Validator):
self.assertEqual(
cm.output,
[
- "WARNING:sqlglot:Applying array index offset (-1)",
- "WARNING:sqlglot:Applying array index offset (1)",
- "WARNING:sqlglot:Applying array index offset (1)",
- "WARNING:sqlglot:Applying array index offset (1)",
+ "INFO:sqlglot:Applying array index offset (-1)",
+ "INFO:sqlglot:Applying array index offset (1)",
+ "INFO:sqlglot:Applying array index offset (1)",
+ "INFO:sqlglot:Applying array index offset (1)",
],
)
@@ -847,7 +920,7 @@ class TestDuckDB(Validator):
read={"bigquery": "SELECT DATE(PARSE_DATE('%m/%d/%Y', '05/06/2020'))"},
)
self.validate_all(
- "SELECT CAST('2020-01-01' AS DATE) + INTERVAL (-1) DAY",
+ "SELECT CAST('2020-01-01' AS DATE) + INTERVAL '-1' DAY",
read={"mysql": "SELECT DATE '2020-01-01' + INTERVAL -1 DAY"},
)
self.validate_all(
@@ -855,7 +928,7 @@ class TestDuckDB(Validator):
write={"duckdb": "SELECT (90 * INTERVAL '1' DAY)"},
)
self.validate_all(
- "SELECT ((DATE_TRUNC('DAY', CAST(CAST(DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS DATE) AS TIMESTAMP) + INTERVAL (0 - ((DAYOFWEEK(CAST(CAST(DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS DATE) AS TIMESTAMP)) % 7) - 1 + 7) % 7) DAY) + (7 * INTERVAL (-5) DAY))) AS t1",
+ "SELECT ((DATE_TRUNC('DAY', CAST(CAST(DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS DATE) AS TIMESTAMP) + INTERVAL (0 - ((ISODOW(CAST(CAST(DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS DATE) AS TIMESTAMP)) % 7) - 1 + 7) % 7) DAY) + (7 * INTERVAL (-5) DAY))) AS t1",
read={
"presto": "SELECT ((DATE_ADD('week', -5, DATE_TRUNC('DAY', DATE_ADD('day', (0 - MOD((DAY_OF_WEEK(CAST(CAST(DATE_TRUNC('DAY', NOW()) AS DATE) AS TIMESTAMP)) % 7) - 1 + 7, 7)), CAST(CAST(DATE_TRUNC('DAY', NOW()) AS DATE) AS TIMESTAMP)))))) AS t1",
},
@@ -876,12 +949,12 @@ class TestDuckDB(Validator):
"EPOCH_MS(x)",
write={
"bigquery": "TIMESTAMP_MILLIS(x)",
+ "clickhouse": "fromUnixTimestamp64Milli(CAST(x AS Nullable(Int64)))",
"duckdb": "EPOCH_MS(x)",
+ "mysql": "FROM_UNIXTIME(x / POWER(10, 3))",
+ "postgres": "TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / 10 ^ 3)",
"presto": "FROM_UNIXTIME(CAST(x AS DOUBLE) / POW(10, 3))",
"spark": "TIMESTAMP_MILLIS(x)",
- "clickhouse": "fromUnixTimestamp64Milli(CAST(x AS Int64))",
- "postgres": "TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / 10 ^ 3)",
- "mysql": "FROM_UNIXTIME(x / POWER(10, 3))",
},
)
self.validate_all(
@@ -925,7 +998,7 @@ class TestDuckDB(Validator):
self.validate_all(
"STRPTIME(x, '%-m/%-d/%y %-I:%M %p')",
write={
- "bigquery": "PARSE_TIMESTAMP('%-m/%-d/%y %-I:%M %p', x)",
+ "bigquery": "PARSE_TIMESTAMP('%-m/%e/%y %-I:%M %p', x)",
"duckdb": "STRPTIME(x, '%-m/%-d/%y %-I:%M %p')",
"presto": "DATE_PARSE(x, '%c/%e/%y %l:%i %p')",
"hive": "CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, 'M/d/yy h:mm a')) AS TIMESTAMP)",
@@ -984,12 +1057,20 @@ class TestDuckDB(Validator):
"duckdb": "SELECT * FROM example TABLESAMPLE RESERVOIR (3 ROWS) REPEATABLE (82)",
},
)
+ self.validate_all(
+ "SELECT * FROM (SELECT * FROM t) AS t1 TABLESAMPLE (1 ROWS), (SELECT * FROM t) AS t2 TABLESAMPLE (2 ROWS)",
+ write={
+ "duckdb": "SELECT * FROM (SELECT * FROM t) AS t1 TABLESAMPLE RESERVOIR (1 ROWS), (SELECT * FROM t) AS t2 TABLESAMPLE RESERVOIR (2 ROWS)",
+ "spark": "SELECT * FROM (SELECT * FROM t) TABLESAMPLE (1 ROWS) AS t1, (SELECT * FROM t) TABLESAMPLE (2 ROWS) AS t2",
+ },
+ )
def test_array(self):
self.validate_identity("ARRAY(SELECT id FROM t)")
self.validate_identity("ARRAY((SELECT id FROM t))")
def test_cast(self):
+ self.validate_identity("x::int[3]", "CAST(x AS INT[3])")
self.validate_identity("CAST(x AS REAL)")
self.validate_identity("CAST(x AS UINTEGER)")
self.validate_identity("CAST(x AS UBIGINT)")
@@ -1029,14 +1110,53 @@ class TestDuckDB(Validator):
)
self.validate_identity(
"CAST([[STRUCT_PACK(a := 1)]] AS STRUCT(a BIGINT)[][])",
- "CAST([[{'a': 1}]] AS STRUCT(a BIGINT)[][])",
+ "CAST([[ROW(1)]] AS STRUCT(a BIGINT)[][])",
)
self.validate_identity(
"CAST([STRUCT_PACK(a := 1)] AS STRUCT(a BIGINT)[])",
- "CAST([{'a': 1}] AS STRUCT(a BIGINT)[])",
+ "CAST([ROW(1)] AS STRUCT(a BIGINT)[])",
+ )
+ self.validate_identity(
+ "STRUCT_PACK(a := 'b')::json",
+ "CAST({'a': 'b'} AS JSON)",
+ )
+ self.validate_identity(
+ "STRUCT_PACK(a := 'b')::STRUCT(a TEXT)",
+ "CAST(ROW('b') AS STRUCT(a TEXT))",
)
self.validate_all(
+ "CAST(x AS TIME)",
+ read={
+ "duckdb": "CAST(x AS TIME)",
+ "presto": "CAST(x AS TIME(6))",
+ },
+ )
+ self.validate_all(
+ "SELECT CAST('2020-01-01 12:05:01' AS TIMESTAMP)",
+ read={
+ "duckdb": "SELECT CAST('2020-01-01 12:05:01' AS TIMESTAMP)",
+ "snowflake": "SELECT CAST('2020-01-01 12:05:01' AS TIMESTAMPNTZ)",
+ },
+ )
+ self.validate_all(
+ "SELECT CAST('2020-01-01' AS DATE) + INTERVAL (day_offset) DAY FROM t",
+ read={
+ "duckdb": "SELECT CAST('2020-01-01' AS DATE) + INTERVAL (day_offset) DAY FROM t",
+ "mysql": "SELECT DATE '2020-01-01' + INTERVAL day_offset DAY FROM t",
+ },
+ )
+ self.validate_all(
+ "SELECT CAST('09:05:03' AS TIME) + INTERVAL 2 HOUR",
+ read={
+ "snowflake": "SELECT TIMEADD(HOUR, 2, TO_TIME('09:05:03'))",
+ },
+ write={
+ "duckdb": "SELECT CAST('09:05:03' AS TIME) + INTERVAL '2' HOUR",
+ "snowflake": "SELECT CAST('09:05:03' AS TIME) + INTERVAL '2 HOUR'",
+ },
+ )
+ self.validate_all(
"CAST(x AS VARCHAR(5))",
write={
"duckdb": "CAST(x AS TEXT)",
@@ -1116,6 +1236,12 @@ class TestDuckDB(Validator):
},
)
+ self.validate_identity("SELECT x::INT[3][3]", "SELECT CAST(x AS INT[3][3])")
+ self.validate_identity(
+ """SELECT ARRAY[[[1]]]::INT[1][1][1]""",
+ """SELECT CAST([[[1]]] AS INT[1][1][1])""",
+ )
+
def test_encode_decode(self):
self.validate_all(
"ENCODE(x)",
@@ -1194,3 +1320,20 @@ class TestDuckDB(Validator):
read={"bigquery": "SELECT @foo"},
write={"bigquery": "SELECT @foo", "duckdb": "SELECT $foo"},
)
+
+ def test_ignore_nulls(self):
+ # Note that DuckDB differentiates window functions (e.g. LEAD, LAG) from aggregate functions (e.g. SUM)
+ from sqlglot.dialects.duckdb import WINDOW_FUNCS_WITH_IGNORE_NULLS
+
+ agg_funcs = (exp.Sum, exp.Max, exp.Min)
+
+ for func_type in WINDOW_FUNCS_WITH_IGNORE_NULLS + agg_funcs:
+ func = func_type(this=exp.to_identifier("col"))
+ ignore_null = exp.IgnoreNulls(this=func)
+ windowed_ignore_null = exp.Window(this=ignore_null)
+
+ if func_type in WINDOW_FUNCS_WITH_IGNORE_NULLS:
+ self.assertIn("IGNORE NULLS", windowed_ignore_null.sql("duckdb"))
+ else:
+ self.assertEqual(ignore_null.sql("duckdb"), func.sql("duckdb"))
+ self.assertNotIn("IGNORE NULLS", windowed_ignore_null.sql("duckdb"))
diff --git a/tests/dialects/test_hive.py b/tests/dialects/test_hive.py
index 0311336..136ea60 100644
--- a/tests/dialects/test_hive.py
+++ b/tests/dialects/test_hive.py
@@ -171,6 +171,16 @@ class TestHive(Validator):
self.validate_identity(
"""CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY<DATE>) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')"""
)
+ self.validate_identity("ALTER VIEW v1 AS SELECT x, UPPER(s) AS s FROM t2")
+ self.validate_identity("ALTER VIEW v1 (c1, c2) AS SELECT x, UPPER(s) AS s FROM t2")
+ self.validate_identity(
+ "ALTER VIEW v7 (c1 COMMENT 'Comment for c1', c2) AS SELECT t1.c1, t1.c2 FROM t1"
+ )
+ self.validate_identity("ALTER VIEW db1.v1 RENAME TO db2.v2")
+ self.validate_identity("ALTER VIEW v1 SET TBLPROPERTIES ('tblp1'='1', 'tblp2'='2')")
+ self.validate_identity(
+ "ALTER VIEW v1 UNSET TBLPROPERTIES ('tblp1', 'tblp2')", check_command_warning=True
+ )
def test_lateral_view(self):
self.validate_all(
@@ -372,7 +382,7 @@ class TestHive(Validator):
"UNIX_TIMESTAMP(x)",
write={
"duckdb": "EPOCH(STRPTIME(x, '%Y-%m-%d %H:%M:%S'))",
- "presto": "TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST(x AS VARCHAR), '%Y-%m-%d %T')), PARSE_DATETIME(CAST(x AS VARCHAR), 'yyyy-MM-dd HH:mm:ss')))",
+ "presto": "TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST(x AS VARCHAR), '%Y-%m-%d %T')), PARSE_DATETIME(DATE_FORMAT(x, '%Y-%m-%d %T'), 'yyyy-MM-dd HH:mm:ss')))",
"hive": "UNIX_TIMESTAMP(x)",
"spark": "UNIX_TIMESTAMP(x)",
"": "STR_TO_UNIX(x, '%Y-%m-%d %H:%M:%S')",
@@ -402,6 +412,7 @@ class TestHive(Validator):
)
def test_hive(self):
+ self.validate_identity("SELECT * FROM t WHERE col IN ('stream')")
self.validate_identity("SET hiveconf:some_var = 5", check_command_warning=True)
self.validate_identity("(VALUES (1 AS a, 2 AS b, 3))")
self.validate_identity("SELECT * FROM my_table TIMESTAMP AS OF DATE_ADD(CURRENT_DATE, -1)")
@@ -705,8 +716,8 @@ class TestHive(Validator):
"presto": "ARRAY_AGG(x)",
},
write={
- "duckdb": "ARRAY_AGG(x)",
- "presto": "ARRAY_AGG(x)",
+ "duckdb": "ARRAY_AGG(x) FILTER(WHERE x IS NOT NULL)",
+ "presto": "ARRAY_AGG(x) FILTER(WHERE x IS NOT NULL)",
"hive": "COLLECT_LIST(x)",
"spark": "COLLECT_LIST(x)",
},
@@ -754,6 +765,24 @@ class TestHive(Validator):
"presto": "SELECT DATE_TRUNC('MONTH', TRY_CAST(ds AS TIMESTAMP)) AS mm FROM tbl WHERE ds BETWEEN '2023-10-01' AND '2024-02-29'",
},
)
+ self.validate_all(
+ "REGEXP_EXTRACT('abc', '(a)(b)(c)')",
+ read={
+ "hive": "REGEXP_EXTRACT('abc', '(a)(b)(c)')",
+ "spark2": "REGEXP_EXTRACT('abc', '(a)(b)(c)')",
+ "spark": "REGEXP_EXTRACT('abc', '(a)(b)(c)')",
+ "databricks": "REGEXP_EXTRACT('abc', '(a)(b)(c)')",
+ },
+ write={
+ "hive": "REGEXP_EXTRACT('abc', '(a)(b)(c)')",
+ "spark2": "REGEXP_EXTRACT('abc', '(a)(b)(c)')",
+ "spark": "REGEXP_EXTRACT('abc', '(a)(b)(c)')",
+ "databricks": "REGEXP_EXTRACT('abc', '(a)(b)(c)')",
+ "presto": "REGEXP_EXTRACT('abc', '(a)(b)(c)', 1)",
+ "trino": "REGEXP_EXTRACT('abc', '(a)(b)(c)', 1)",
+ "duckdb": "REGEXP_EXTRACT('abc', '(a)(b)(c)', 1)",
+ },
+ )
def test_escapes(self) -> None:
self.validate_identity("'\n'", "'\\n'")
diff --git a/tests/dialects/test_mysql.py b/tests/dialects/test_mysql.py
index 280ebbf..835ee7c 100644
--- a/tests/dialects/test_mysql.py
+++ b/tests/dialects/test_mysql.py
@@ -1,3 +1,6 @@
+import unittest
+import sys
+
from sqlglot import expressions as exp
from sqlglot.dialects.mysql import MySQL
from tests.dialects.test_dialect import Validator
@@ -24,6 +27,21 @@ class TestMySQL(Validator):
self.validate_identity("ALTER TABLE t ADD INDEX `i` (`c`)")
self.validate_identity("ALTER TABLE t ADD UNIQUE `i` (`c`)")
self.validate_identity("ALTER TABLE test_table MODIFY COLUMN test_column LONGTEXT")
+ self.validate_identity("ALTER VIEW v AS SELECT a, b, c, d FROM foo")
+ self.validate_identity("ALTER VIEW v AS SELECT * FROM foo WHERE c > 100")
+ self.validate_identity(
+ "ALTER ALGORITHM = MERGE VIEW v AS SELECT * FROM foo", check_command_warning=True
+ )
+ self.validate_identity(
+ "ALTER DEFINER = 'admin'@'localhost' VIEW v AS SELECT * FROM foo",
+ check_command_warning=True,
+ )
+ self.validate_identity(
+ "ALTER SQL SECURITY = DEFINER VIEW v AS SELECT * FROM foo", check_command_warning=True
+ )
+ self.validate_identity(
+ "INSERT INTO things (a, b) VALUES (1, 2) AS new_data ON DUPLICATE KEY UPDATE id = LAST_INSERT_ID(id), a = new_data.a, b = new_data.b"
+ )
self.validate_identity(
"CREATE TABLE `oauth_consumer` (`key` VARCHAR(32) NOT NULL, UNIQUE `OAUTH_CONSUMER_KEY` (`key`))"
)
@@ -64,6 +82,10 @@ class TestMySQL(Validator):
"CREATE OR REPLACE VIEW my_view AS SELECT column1 AS `boo`, column2 AS `foo` FROM my_table WHERE column3 = 'some_value' UNION SELECT q.* FROM fruits_table, JSON_TABLE(Fruits, '$[*]' COLUMNS(id VARCHAR(255) PATH '$.$id', value VARCHAR(255) PATH '$.value')) AS q",
)
self.validate_identity(
+ "CREATE TABLE t (name VARCHAR)",
+ "CREATE TABLE t (name TEXT)",
+ )
+ self.validate_identity(
"ALTER TABLE t ADD KEY `i` (`c`)",
"ALTER TABLE t ADD INDEX `i` (`c`)",
)
@@ -117,6 +139,7 @@ class TestMySQL(Validator):
)
def test_identity(self):
+ self.validate_identity("SELECT CAST(COALESCE(`id`, 'NULL') AS CHAR CHARACTER SET binary)")
self.validate_identity("SELECT e.* FROM e STRAIGHT_JOIN p ON e.x = p.y")
self.validate_identity("ALTER TABLE test_table ALTER COLUMN test_column SET DEFAULT 1")
self.validate_identity("SELECT DATE_FORMAT(NOW(), '%Y-%m-%d %H:%i:00.0000')")
@@ -157,6 +180,10 @@ class TestMySQL(Validator):
"REPLACE INTO table SELECT id FROM table2 WHERE cnt > 100", check_command_warning=True
)
self.validate_identity(
+ "CAST(x AS VARCHAR)",
+ "CAST(x AS CHAR)",
+ )
+ self.validate_identity(
"""SELECT * FROM foo WHERE 3 MEMBER OF(info->'$.value')""",
"""SELECT * FROM foo WHERE 3 MEMBER OF(JSON_EXTRACT(info, '$.value'))""",
)
@@ -222,7 +249,7 @@ class TestMySQL(Validator):
"SET @@GLOBAL.sort_buffer_size = 1000000, @@LOCAL.sort_buffer_size = 1000000"
)
self.validate_identity("INTERVAL '1' YEAR")
- self.validate_identity("DATE_ADD(x, INTERVAL 1 YEAR)")
+ self.validate_identity("DATE_ADD(x, INTERVAL '1' YEAR)")
self.validate_identity("CHAR(0)")
self.validate_identity("CHAR(77, 121, 83, 81, '76')")
self.validate_identity("CHAR(77, 77.3, '77.3' USING utf8mb4)")
@@ -520,9 +547,16 @@ class TestMySQL(Validator):
},
)
self.validate_all(
- "SELECT DATE_FORMAT('2009-10-04 22:23:00', '%W %M %Y')",
+ "SELECT DATE_FORMAT('2024-08-22 14:53:12', '%a')",
+ write={
+ "mysql": "SELECT DATE_FORMAT('2024-08-22 14:53:12', '%a')",
+ "snowflake": "SELECT TO_CHAR(CAST('2024-08-22 14:53:12' AS TIMESTAMP), 'DY')",
+ },
+ )
+ self.validate_all(
+ "SELECT DATE_FORMAT('2009-10-04 22:23:00', '%a %M %Y')",
write={
- "mysql": "SELECT DATE_FORMAT('2009-10-04 22:23:00', '%W %M %Y')",
+ "mysql": "SELECT DATE_FORMAT('2009-10-04 22:23:00', '%a %M %Y')",
"snowflake": "SELECT TO_CHAR(CAST('2009-10-04 22:23:00' AS TIMESTAMP), 'DY mmmm yyyy')",
},
)
@@ -536,7 +570,7 @@ class TestMySQL(Validator):
self.validate_all(
"SELECT DATE_FORMAT('1900-10-04 22:23:00', '%d %y %a %d %m %b')",
write={
- "mysql": "SELECT DATE_FORMAT('1900-10-04 22:23:00', '%d %y %W %d %m %b')",
+ "mysql": "SELECT DATE_FORMAT('1900-10-04 22:23:00', '%d %y %a %d %m %b')",
"snowflake": "SELECT TO_CHAR(CAST('1900-10-04 22:23:00' AS TIMESTAMP), 'DD yy DY DD mm mon')",
},
)
@@ -621,6 +655,53 @@ class TestMySQL(Validator):
},
)
+ # No timezone, make sure DATETIME captures the correct precision
+ self.validate_identity(
+ "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15.123456+00:00')",
+ write_sql="SELECT CAST('2023-01-01 13:14:15.123456+00:00' AS DATETIME(6))",
+ )
+ self.validate_identity(
+ "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15.123+00:00')",
+ write_sql="SELECT CAST('2023-01-01 13:14:15.123+00:00' AS DATETIME(3))",
+ )
+ self.validate_identity(
+ "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15+00:00')",
+ write_sql="SELECT CAST('2023-01-01 13:14:15+00:00' AS DATETIME)",
+ )
+
+ # With timezone, make sure the TIMESTAMP constructor is used
+ # also TIMESTAMP doesnt have the subsecond precision truncation issue that DATETIME does so we dont need to TIMESTAMP(6)
+ self.validate_identity(
+ "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15-08:00', 'America/Los_Angeles')",
+ write_sql="SELECT TIMESTAMP('2023-01-01 13:14:15-08:00')",
+ )
+ self.validate_identity(
+ "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15-08:00', 'America/Los_Angeles')",
+ write_sql="SELECT TIMESTAMP('2023-01-01 13:14:15-08:00')",
+ )
+
+ @unittest.skipUnless(
+ sys.version_info >= (3, 11),
+ "Python 3.11 relaxed datetime.fromisoformat() parsing with regards to microseconds",
+ )
+ def test_mysql_time_python311(self):
+ self.validate_identity(
+ "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15.12345+00:00')",
+ write_sql="SELECT CAST('2023-01-01 13:14:15.12345+00:00' AS DATETIME(6))",
+ )
+ self.validate_identity(
+ "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15.1234+00:00')",
+ write_sql="SELECT CAST('2023-01-01 13:14:15.1234+00:00' AS DATETIME(6))",
+ )
+ self.validate_identity(
+ "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15.12+00:00')",
+ write_sql="SELECT CAST('2023-01-01 13:14:15.12+00:00' AS DATETIME(3))",
+ )
+ self.validate_identity(
+ "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15.1+00:00')",
+ write_sql="SELECT CAST('2023-01-01 13:14:15.1+00:00' AS DATETIME(3))",
+ )
+
def test_mysql(self):
self.validate_all(
"SELECT CONCAT('11', '22')",
@@ -1176,3 +1257,39 @@ COMMENT='客户账户表'"""
"mysql": f"DATE_ADD('0000-01-01 00:00:00', INTERVAL (TIMESTAMPDIFF({unit}, '0000-01-01 00:00:00', CAST('2001-02-16 20:38:40' AS DATETIME))) {unit})",
},
)
+
+ def test_at_time_zone(self):
+ with self.assertLogs() as cm:
+ # Check AT TIME ZONE doesnt discard the column name and also raises a warning
+ self.validate_identity(
+ "SELECT foo AT TIME ZONE 'UTC'",
+ write_sql="SELECT foo",
+ )
+ assert "AT TIME ZONE is not supported" in cm.output[0]
+
+ def test_json_value(self):
+ json_doc = """'{"item": "shoes", "price": "49.95"}'"""
+ self.validate_identity(f"""SELECT JSON_VALUE({json_doc}, '$.price')""")
+ self.validate_identity(
+ f"""SELECT JSON_VALUE({json_doc}, '$.price' RETURNING DECIMAL(4, 2))"""
+ )
+
+ for on_option in ("NULL", "ERROR", "DEFAULT 1"):
+ self.validate_identity(
+ f"""SELECT JSON_VALUE({json_doc}, '$.price' RETURNING DECIMAL(4, 2) {on_option} ON EMPTY {on_option} ON ERROR) AS price"""
+ )
+
+ def test_grant(self):
+ grant_cmds = [
+ "GRANT 'role1', 'role2' TO 'user1'@'localhost', 'user2'@'localhost'",
+ "GRANT SELECT ON world.* TO 'role3'",
+ "GRANT SELECT ON db2.invoice TO 'jeffrey'@'localhost'",
+ "GRANT INSERT ON `d%`.* TO u",
+ "GRANT ALL ON test.* TO ''@'localhost'",
+ "GRANT SELECT (col1), INSERT (col1, col2) ON mydb.mytbl TO 'someuser'@'somehost'",
+ "GRANT SELECT, INSERT, UPDATE ON *.* TO u2",
+ ]
+
+ for sql in grant_cmds:
+ with self.subTest(f"Testing MySQL's GRANT command statement: {sql}"):
+ self.validate_identity(sql, check_command_warning=True)
diff --git a/tests/dialects/test_oracle.py b/tests/dialects/test_oracle.py
index 7cc4d72..8675086 100644
--- a/tests/dialects/test_oracle.py
+++ b/tests/dialects/test_oracle.py
@@ -1,5 +1,4 @@
from sqlglot import exp, UnsupportedError
-from sqlglot.dialects.oracle import eliminate_join_marks
from tests.dialects.test_dialect import Validator
@@ -7,15 +6,17 @@ class TestOracle(Validator):
dialect = "oracle"
def test_oracle(self):
+ self.validate_identity("1 /* /* */")
self.validate_all(
"SELECT CONNECT_BY_ROOT x y",
write={
- "": "SELECT CONNECT_BY_ROOT(x) AS y",
+ "": "SELECT CONNECT_BY_ROOT x AS y",
"oracle": "SELECT CONNECT_BY_ROOT x AS y",
},
)
- self.parse_one("ALTER TABLE tbl_name DROP FOREIGN KEY fk_symbol").assert_is(exp.AlterTable)
+ self.parse_one("ALTER TABLE tbl_name DROP FOREIGN KEY fk_symbol").assert_is(exp.Alter)
+ self.validate_identity("SYSDATE")
self.validate_identity("CREATE GLOBAL TEMPORARY TABLE t AS SELECT * FROM orders")
self.validate_identity("CREATE PRIVATE TEMPORARY TABLE t AS SELECT * FROM orders")
self.validate_identity("REGEXP_REPLACE('source', 'search')")
@@ -45,6 +46,9 @@ class TestOracle(Validator):
self.validate_identity("SELECT * FROM V$SESSION")
self.validate_identity("SELECT TO_DATE('January 15, 1989, 11:00 A.M.')")
self.validate_identity(
+ "SELECT * FROM test UNPIVOT INCLUDE NULLS (value FOR Description IN (col AS 'PREFIX ' || CHR(38) || ' SUFFIX'))"
+ )
+ self.validate_identity(
"SELECT last_name, employee_id, manager_id, LEVEL FROM employees START WITH employee_id = 100 CONNECT BY PRIOR employee_id = manager_id ORDER SIBLINGS BY last_name"
)
self.validate_identity(
@@ -66,6 +70,10 @@ class TestOracle(Validator):
"SELECT MIN(column_name) KEEP (DENSE_RANK FIRST ORDER BY column_name DESC) FROM table_name"
)
self.validate_identity(
+ "SELECT TRUNC(SYSDATE)",
+ "SELECT TRUNC(SYSDATE, 'DD')",
+ )
+ self.validate_identity(
"""SELECT JSON_OBJECT(KEY 'key1' IS emp.column1, KEY 'key2' IS emp.column1) "emp_key" FROM emp""",
"""SELECT JSON_OBJECT('key1': emp.column1, 'key2': emp.column1) AS "emp_key" FROM emp""",
)
@@ -74,10 +82,6 @@ class TestOracle(Validator):
"SELECT JSON_OBJECTAGG(department_name: department_id) FROM dep WHERE id <= 30",
)
self.validate_identity(
- "SYSDATE",
- "CURRENT_TIMESTAMP",
- )
- self.validate_identity(
"SELECT last_name, department_id, salary, MIN(salary) KEEP (DENSE_RANK FIRST ORDER BY commission_pct) "
'OVER (PARTITION BY department_id) AS "Worst", MAX(salary) KEEP (DENSE_RANK LAST ORDER BY commission_pct) '
'OVER (PARTITION BY department_id) AS "Best" FROM employees ORDER BY department_id, salary, last_name'
@@ -87,8 +91,7 @@ class TestOracle(Validator):
"SELECT DISTINCT col1, col2 FROM table",
)
self.validate_identity(
- "SELECT * FROM T ORDER BY I OFFSET nvl(:variable1, 10) ROWS FETCH NEXT nvl(:variable2, 10) ROWS ONLY",
- "SELECT * FROM T ORDER BY I OFFSET COALESCE(:variable1, 10) ROWS FETCH NEXT COALESCE(:variable2, 10) ROWS ONLY",
+ "SELECT * FROM T ORDER BY I OFFSET NVL(:variable1, 10) ROWS FETCH NEXT NVL(:variable2, 10) ROWS ONLY",
)
self.validate_identity(
"SELECT * FROM t SAMPLE (.25)",
@@ -100,6 +103,23 @@ class TestOracle(Validator):
)
self.validate_all(
+ "TRUNC(SYSDATE, 'YEAR')",
+ write={
+ "clickhouse": "DATE_TRUNC('YEAR', CURRENT_TIMESTAMP())",
+ "oracle": "TRUNC(SYSDATE, 'YEAR')",
+ },
+ )
+ self.validate_all(
+ "SELECT * FROM test WHERE MOD(col1, 4) = 3",
+ read={
+ "duckdb": "SELECT * FROM test WHERE col1 % 4 = 3",
+ },
+ write={
+ "duckdb": "SELECT * FROM test WHERE col1 % 4 = 3",
+ "oracle": "SELECT * FROM test WHERE MOD(col1, 4) = 3",
+ },
+ )
+ self.validate_all(
"CURRENT_TIMESTAMP BETWEEN TO_DATE(f.C_SDATE, 'yyyy/mm/dd') AND TO_DATE(f.C_EDATE, 'yyyy/mm/dd')",
read={
"postgres": "CURRENT_TIMESTAMP BETWEEN TO_DATE(f.C_SDATE, 'yyyy/mm/dd') AND TO_DATE(f.C_EDATE, 'yyyy/mm/dd')",
@@ -191,13 +211,6 @@ class TestOracle(Validator):
},
)
self.validate_all(
- "NVL(NULL, 1)",
- write={
- "": "COALESCE(NULL, 1)",
- "oracle": "COALESCE(NULL, 1)",
- },
- )
- self.validate_all(
"DATE '2022-01-01'",
write={
"": "DATE_STR_TO_DATE('2022-01-01')",
@@ -245,6 +258,25 @@ class TestOracle(Validator):
"duckdb": "SELECT CAST(STRPTIME('2024-12-12', '%Y-%m-%d') AS DATE)",
},
)
+ self.validate_identity(
+ """SELECT * FROM t ORDER BY a ASC NULLS LAST, b ASC NULLS FIRST, c DESC NULLS LAST, d DESC NULLS FIRST""",
+ """SELECT * FROM t ORDER BY a ASC, b ASC NULLS FIRST, c DESC NULLS LAST, d DESC""",
+ )
+ self.validate_all(
+ "NVL(NULL, 1)",
+ write={
+ "oracle": "NVL(NULL, 1)",
+ "": "COALESCE(NULL, 1)",
+ "clickhouse": "COALESCE(NULL, 1)",
+ },
+ )
+ self.validate_all(
+ "TRIM(BOTH 'h' FROM 'Hello World')",
+ write={
+ "oracle": "TRIM(BOTH 'h' FROM 'Hello World')",
+ "clickhouse": "TRIM(BOTH 'h' FROM 'Hello World')",
+ },
+ )
def test_join_marker(self):
self.validate_identity("SELECT e1.x, e2.x FROM e e1, e e2 WHERE e1.y (+) = e2.y")
@@ -332,7 +364,7 @@ FROM warehouses, XMLTABLE(
FROM XMLTABLE(
'ROWSET/ROW'
PASSING
- dbms_xmlgen.GETXMLTYPE('SELECT table_name, column_name, data_default FROM user_tab_columns')
+ dbms_xmlgen.getxmltype('SELECT table_name, column_name, data_default FROM user_tab_columns')
COLUMNS
table_name VARCHAR2(128) PATH '*[1]',
column_name VARCHAR2(128) PATH '*[2]',
@@ -416,59 +448,6 @@ WHERE
for query in (f"{body}{start}{connect}", f"{body}{connect}{start}"):
self.validate_identity(query, pretty, pretty=True)
- def test_eliminate_join_marks(self):
- test_sql = [
- (
- "SELECT T1.d, T2.c FROM T1, T2 WHERE T1.x = T2.x (+) and T2.y (+) > 5",
- "SELECT T1.d, T2.c FROM T1 LEFT JOIN T2 ON T1.x = T2.x AND T2.y > 5",
- ),
- (
- "SELECT T1.d, T2.c FROM T1, T2 WHERE T1.x = T2.x (+) and T2.y (+) IS NULL",
- "SELECT T1.d, T2.c FROM T1 LEFT JOIN T2 ON T1.x = T2.x AND T2.y IS NULL",
- ),
- (
- "SELECT T1.d, T2.c FROM T1, T2 WHERE T1.x = T2.x (+) and T2.y IS NULL",
- "SELECT T1.d, T2.c FROM T1 LEFT JOIN T2 ON T1.x = T2.x WHERE T2.y IS NULL",
- ),
- (
- "SELECT T1.d, T2.c FROM T1, T2 WHERE T1.x = T2.x (+) and T1.Z > 4",
- "SELECT T1.d, T2.c FROM T1 LEFT JOIN T2 ON T1.x = T2.x WHERE T1.Z > 4",
- ),
- (
- "SELECT * FROM table1, table2 WHERE table1.column = table2.column(+)",
- "SELECT * FROM table1 LEFT JOIN table2 ON table1.column = table2.column",
- ),
- (
- "SELECT * FROM table1, table2, table3, table4 WHERE table1.column = table2.column(+) and table2.column >= table3.column(+) and table1.column = table4.column(+)",
- "SELECT * FROM table1 LEFT JOIN table2 ON table1.column = table2.column LEFT JOIN table3 ON table2.column >= table3.column LEFT JOIN table4 ON table1.column = table4.column",
- ),
- (
- "SELECT * FROM table1, table2, table3 WHERE table1.column = table2.column(+) and table2.column >= table3.column(+)",
- "SELECT * FROM table1 LEFT JOIN table2 ON table1.column = table2.column LEFT JOIN table3 ON table2.column >= table3.column",
- ),
- (
- "SELECT table1.id, table2.cloumn1, table3.id FROM table1, table2, (SELECT tableInner1.id FROM tableInner1, tableInner2 WHERE tableInner1.id = tableInner2.id(+)) AS table3 WHERE table1.id = table2.id(+) and table1.id = table3.id(+)",
- "SELECT table1.id, table2.cloumn1, table3.id FROM table1 LEFT JOIN table2 ON table1.id = table2.id LEFT JOIN (SELECT tableInner1.id FROM tableInner1 LEFT JOIN tableInner2 ON tableInner1.id = tableInner2.id) table3 ON table1.id = table3.id",
- ),
- # 2 join marks on one side of predicate
- (
- "SELECT * FROM table1, table2 WHERE table1.column = table2.column1(+) + table2.column2(+)",
- "SELECT * FROM table1 LEFT JOIN table2 ON table1.column = table2.column1 + table2.column2",
- ),
- # join mark and expression
- (
- "SELECT * FROM table1, table2 WHERE table1.column = table2.column1(+) + 25",
- "SELECT * FROM table1 LEFT JOIN table2 ON table1.column = table2.column1 + 25",
- ),
- ]
-
- for original, expected in test_sql:
- with self.subTest(original):
- self.assertEqual(
- eliminate_join_marks(self.parse_one(original)).sql(dialect=self.dialect),
- expected,
- )
-
def test_query_restrictions(self):
for restriction in ("READ ONLY", "CHECK OPTION"):
for constraint_name in (" CONSTRAINT name", ""):
@@ -477,3 +456,111 @@ WHERE
self.validate_identity(
f"CREATE VIEW view AS SELECT * FROM tbl WITH {restriction}{constraint_name}"
)
+
+ def test_multitable_inserts(self):
+ self.maxDiff = None
+ self.validate_identity(
+ "INSERT ALL "
+ "INTO dest_tab1 (id, description) VALUES (id, description) "
+ "INTO dest_tab2 (id, description) VALUES (id, description) "
+ "INTO dest_tab3 (id, description) VALUES (id, description) "
+ "SELECT id, description FROM source_tab"
+ )
+
+ self.validate_identity(
+ "INSERT ALL "
+ "INTO pivot_dest (id, day, val) VALUES (id, 'mon', mon_val) "
+ "INTO pivot_dest (id, day, val) VALUES (id, 'tue', tue_val) "
+ "INTO pivot_dest (id, day, val) VALUES (id, 'wed', wed_val) "
+ "INTO pivot_dest (id, day, val) VALUES (id, 'thu', thu_val) "
+ "INTO pivot_dest (id, day, val) VALUES (id, 'fri', fri_val) "
+ "SELECT * "
+ "FROM pivot_source"
+ )
+
+ self.validate_identity(
+ "INSERT ALL "
+ "WHEN id <= 3 THEN "
+ "INTO dest_tab1 (id, description) VALUES (id, description) "
+ "WHEN id BETWEEN 4 AND 7 THEN "
+ "INTO dest_tab2 (id, description) VALUES (id, description) "
+ "WHEN id >= 8 THEN "
+ "INTO dest_tab3 (id, description) VALUES (id, description) "
+ "SELECT id, description "
+ "FROM source_tab"
+ )
+
+ self.validate_identity(
+ "INSERT ALL "
+ "WHEN id <= 3 THEN "
+ "INTO dest_tab1 (id, description) VALUES (id, description) "
+ "WHEN id BETWEEN 4 AND 7 THEN "
+ "INTO dest_tab2 (id, description) VALUES (id, description) "
+ "WHEN 1 = 1 THEN "
+ "INTO dest_tab3 (id, description) VALUES (id, description) "
+ "SELECT id, description "
+ "FROM source_tab"
+ )
+
+ self.validate_identity(
+ "INSERT FIRST "
+ "WHEN id <= 3 THEN "
+ "INTO dest_tab1 (id, description) VALUES (id, description) "
+ "WHEN id <= 5 THEN "
+ "INTO dest_tab2 (id, description) VALUES (id, description) "
+ "ELSE "
+ "INTO dest_tab3 (id, description) VALUES (id, description) "
+ "SELECT id, description "
+ "FROM source_tab"
+ )
+
+ self.validate_identity(
+ "INSERT FIRST "
+ "WHEN id <= 3 THEN "
+ "INTO dest_tab1 (id, description) VALUES (id, description) "
+ "ELSE "
+ "INTO dest_tab2 (id, description) VALUES (id, description) "
+ "INTO dest_tab3 (id, description) VALUES (id, description) "
+ "SELECT id, description "
+ "FROM source_tab"
+ )
+
+ self.validate_identity(
+ "/* COMMENT */ INSERT FIRST "
+ "WHEN salary > 4000 THEN INTO emp2 "
+ "WHEN salary > 5000 THEN INTO emp3 "
+ "WHEN salary > 6000 THEN INTO emp4 "
+ "SELECT salary FROM employees"
+ )
+
+ def test_json_functions(self):
+ for format_json in ("", " FORMAT JSON"):
+ for on_cond in (
+ "",
+ " TRUE ON ERROR",
+ " NULL ON EMPTY",
+ " DEFAULT 1 ON ERROR TRUE ON EMPTY",
+ ):
+ for passing in ("", " PASSING 'name1' AS \"var1\", 'name2' AS \"var2\""):
+ with self.subTest("Testing JSON_EXISTS()"):
+ self.validate_identity(
+ f"SELECT * FROM t WHERE JSON_EXISTS(name{format_json}, '$[1].middle'{passing}{on_cond})"
+ )
+
+ def test_grant(self):
+ grant_cmds = [
+ "GRANT purchases_reader_role TO george, maria",
+ "GRANT USAGE ON TYPE price TO finance_role",
+ "GRANT USAGE ON DERBY AGGREGATE types.maxPrice TO sales_role",
+ ]
+
+ for sql in grant_cmds:
+ with self.subTest(f"Testing Oracles's GRANT command statement: {sql}"):
+ self.validate_identity(sql, check_command_warning=True)
+
+ self.validate_identity("GRANT SELECT ON TABLE t TO maria, harry")
+ self.validate_identity("GRANT SELECT ON TABLE s.v TO PUBLIC")
+ self.validate_identity("GRANT SELECT ON TABLE t TO purchases_reader_role")
+ self.validate_identity("GRANT UPDATE, TRIGGER ON TABLE t TO anita, zhi")
+ self.validate_identity("GRANT EXECUTE ON PROCEDURE p TO george")
+ self.validate_identity("GRANT USAGE ON SEQUENCE order_id TO sales_role")
diff --git a/tests/dialects/test_postgres.py b/tests/dialects/test_postgres.py
index 071677d..63266a5 100644
--- a/tests/dialects/test_postgres.py
+++ b/tests/dialects/test_postgres.py
@@ -8,10 +8,15 @@ class TestPostgres(Validator):
dialect = "postgres"
def test_postgres(self):
- self.validate_identity("SHA384(x)")
- self.validate_identity(
- 'CREATE TABLE x (a TEXT COLLATE "de_DE")', "CREATE TABLE x (a TEXT COLLATE de_DE)"
+ self.validate_all(
+ "x ? y",
+ write={
+ "": "JSONB_CONTAINS(x, y)",
+ "postgres": "x ? y",
+ },
)
+
+ self.validate_identity("SHA384(x)")
self.validate_identity("1.x", "1. AS x")
self.validate_identity("|/ x", "SQRT(x)")
self.validate_identity("||/ x", "CBRT(x)")
@@ -23,7 +28,7 @@ class TestPostgres(Validator):
alter_table_only = """ALTER TABLE ONLY "Album" ADD CONSTRAINT "FK_AlbumArtistId" FOREIGN KEY ("ArtistId") REFERENCES "Artist" ("ArtistId") ON DELETE NO ACTION ON UPDATE NO ACTION"""
expr = self.parse_one(alter_table_only)
- self.assertIsInstance(expr, exp.AlterTable)
+ self.assertIsInstance(expr, exp.Alter)
self.assertEqual(expr.sql(dialect="postgres"), alter_table_only)
self.validate_identity("STRING_TO_ARRAY('xx~^~yy~^~zz', '~^~', 'yy')")
@@ -68,10 +73,6 @@ class TestPostgres(Validator):
self.validate_identity("SELECT CURRENT_USER")
self.validate_identity("SELECT * FROM ONLY t1")
self.validate_identity(
- "SELECT ARRAY[1, 2, 3] <@ ARRAY[1, 2]",
- "SELECT ARRAY[1, 2] @> ARRAY[1, 2, 3]",
- )
- self.validate_identity(
"""UPDATE "x" SET "y" = CAST('0 days 60.000000 seconds' AS INTERVAL) WHERE "x"."id" IN (2, 3)"""
)
self.validate_identity(
@@ -128,6 +129,14 @@ class TestPostgres(Validator):
"ORDER BY 2, 3"
)
self.validate_identity(
+ "/*+ some comment*/ SELECT b.foo, b.bar FROM baz AS b",
+ "/* + some comment */ SELECT b.foo, b.bar FROM baz AS b",
+ )
+ self.validate_identity(
+ "SELECT ARRAY[1, 2, 3] <@ ARRAY[1, 2]",
+ "SELECT ARRAY[1, 2] @> ARRAY[1, 2, 3]",
+ )
+ self.validate_identity(
"SELECT ARRAY[]::INT[] AS foo",
"SELECT CAST(ARRAY[] AS INT[]) AS foo",
)
@@ -540,37 +549,53 @@ class TestPostgres(Validator):
},
)
self.validate_all(
- "GENERATE_SERIES(a, b, ' 2 days ')",
+ "SELECT GENERATE_SERIES(1, 5)",
write={
- "postgres": "GENERATE_SERIES(a, b, INTERVAL '2 DAYS')",
- "presto": "SEQUENCE(a, b, INTERVAL '2' DAY)",
- "trino": "SEQUENCE(a, b, INTERVAL '2' DAY)",
+ "bigquery": UnsupportedError,
+ "postgres": "SELECT GENERATE_SERIES(1, 5)",
},
)
self.validate_all(
- "GENERATE_SERIES('2019-01-01'::TIMESTAMP, NOW(), '1day')",
+ "WITH dates AS (SELECT GENERATE_SERIES('2020-01-01'::DATE, '2024-01-01'::DATE, '1 day'::INTERVAL) AS date), date_table AS (SELECT DISTINCT DATE_TRUNC('MONTH', date) AS date FROM dates) SELECT * FROM date_table",
write={
- "postgres": "GENERATE_SERIES(CAST('2019-01-01' AS TIMESTAMP), CURRENT_TIMESTAMP, INTERVAL '1 DAY')",
- "presto": "SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' DAY)",
- "trino": "SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' DAY)",
+ "duckdb": "WITH dates AS (SELECT UNNEST(GENERATE_SERIES(CAST('2020-01-01' AS DATE), CAST('2024-01-01' AS DATE), CAST('1 day' AS INTERVAL))) AS date), date_table AS (SELECT DISTINCT DATE_TRUNC('MONTH', date) AS date FROM dates) SELECT * FROM date_table",
+ "postgres": "WITH dates AS (SELECT GENERATE_SERIES(CAST('2020-01-01' AS DATE), CAST('2024-01-01' AS DATE), CAST('1 day' AS INTERVAL)) AS date), date_table AS (SELECT DISTINCT DATE_TRUNC('MONTH', date) AS date FROM dates) SELECT * FROM date_table",
+ },
+ )
+ self.validate_all(
+ "GENERATE_SERIES(a, b, ' 2 days ')",
+ write={
+ "postgres": "GENERATE_SERIES(a, b, INTERVAL '2 DAYS')",
+ "presto": "UNNEST(SEQUENCE(a, b, INTERVAL '2' DAY))",
+ "trino": "UNNEST(SEQUENCE(a, b, INTERVAL '2' DAY))",
},
)
self.validate_all(
- "GENERATE_SERIES(a, b)",
+ "GENERATE_SERIES('2019-01-01'::TIMESTAMP, NOW(), '1day')",
write={
- "postgres": "GENERATE_SERIES(a, b)",
- "presto": "SEQUENCE(a, b)",
- "trino": "SEQUENCE(a, b)",
- "tsql": "GENERATE_SERIES(a, b)",
+ "databricks": "EXPLODE(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY))",
+ "hive": "EXPLODE(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY))",
+ "postgres": "GENERATE_SERIES(CAST('2019-01-01' AS TIMESTAMP), CURRENT_TIMESTAMP, INTERVAL '1 DAY')",
+ "presto": "UNNEST(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' DAY))",
+ "spark": "EXPLODE(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY))",
+ "spark2": "EXPLODE(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY))",
+ "trino": "UNNEST(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' DAY))",
},
)
self.validate_all(
- "GENERATE_SERIES(a, b)",
+ "SELECT * FROM GENERATE_SERIES(a, b)",
read={
- "postgres": "GENERATE_SERIES(a, b)",
- "presto": "SEQUENCE(a, b)",
- "trino": "SEQUENCE(a, b)",
- "tsql": "GENERATE_SERIES(a, b)",
+ "tsql": "SELECT * FROM GENERATE_SERIES(a, b)",
+ },
+ write={
+ "databricks": "SELECT * FROM EXPLODE(SEQUENCE(a, b))",
+ "hive": "SELECT * FROM EXPLODE(SEQUENCE(a, b))",
+ "postgres": "SELECT * FROM GENERATE_SERIES(a, b)",
+ "presto": "SELECT * FROM UNNEST(SEQUENCE(a, b))",
+ "spark": "SELECT * FROM EXPLODE(SEQUENCE(a, b))",
+ "spark2": "SELECT * FROM EXPLODE(SEQUENCE(a, b))",
+ "trino": "SELECT * FROM UNNEST(SEQUENCE(a, b))",
+ "tsql": "SELECT * FROM GENERATE_SERIES(a, b)",
},
)
self.validate_all(
@@ -651,17 +676,25 @@ class TestPostgres(Validator):
},
)
self.validate_all(
- """'{"a":1,"b":2}'::json->'b'""",
+ "TRIM(BOTH 'as' FROM 'as string as')",
write={
- "postgres": """CAST('{"a":1,"b":2}' AS JSON) -> 'b'""",
- "redshift": """JSON_EXTRACT_PATH_TEXT('{"a":1,"b":2}', 'b')""",
+ "postgres": "TRIM(BOTH 'as' FROM 'as string as')",
+ "spark": "TRIM(BOTH 'as' FROM 'as string as')",
},
)
+ self.validate_identity(
+ """SELECT TRIM(LEADING ' XXX ' COLLATE "de_DE")""",
+ """SELECT LTRIM(' XXX ' COLLATE "de_DE")""",
+ )
+ self.validate_identity(
+ """SELECT TRIM(TRAILING ' XXX ' COLLATE "de_DE")""",
+ """SELECT RTRIM(' XXX ' COLLATE "de_DE")""",
+ )
self.validate_all(
- "TRIM(BOTH 'as' FROM 'as string as')",
+ """'{"a":1,"b":2}'::json->'b'""",
write={
- "postgres": "TRIM(BOTH 'as' FROM 'as string as')",
- "spark": "TRIM(BOTH 'as' FROM 'as string as')",
+ "postgres": """CAST('{"a":1,"b":2}' AS JSON) -> 'b'""",
+ "redshift": """JSON_EXTRACT_PATH_TEXT('{"a":1,"b":2}', 'b')""",
},
)
self.validate_all(
@@ -747,6 +780,29 @@ class TestPostgres(Validator):
},
)
+ self.validate_all(
+ "SELECT TO_DATE('01/01/2000', 'MM/DD/YYYY')",
+ write={
+ "duckdb": "SELECT CAST(STRPTIME('01/01/2000', '%m/%d/%Y') AS DATE)",
+ "postgres": "SELECT TO_DATE('01/01/2000', 'MM/DD/YYYY')",
+ },
+ )
+
+ self.validate_identity(
+ 'SELECT js, js IS JSON AS "json?", js IS JSON VALUE AS "scalar?", js IS JSON SCALAR AS "scalar?", js IS JSON OBJECT AS "object?", js IS JSON ARRAY AS "array?" FROM t'
+ )
+ self.validate_identity(
+ 'SELECT js, js IS JSON ARRAY WITH UNIQUE KEYS AS "array w. UK?", js IS JSON ARRAY WITHOUT UNIQUE KEYS AS "array w/o UK?", js IS JSON ARRAY UNIQUE KEYS AS "array w UK 2?" FROM t'
+ )
+ self.validate_identity(
+ "MERGE INTO target_table USING source_table AS source ON target.id = source.id WHEN MATCHED THEN DO NOTHING WHEN NOT MATCHED THEN DO NOTHING RETURNING MERGE_ACTION(), *"
+ )
+ self.validate_identity(
+ "SELECT 1 FROM ((VALUES (1)) AS vals(id) LEFT OUTER JOIN tbl ON vals.id = tbl.id)"
+ )
+ self.validate_identity("SELECT OVERLAY(a PLACING b FROM 1)")
+ self.validate_identity("SELECT OVERLAY(a PLACING b FROM 1 FOR 1)")
+
def test_ddl(self):
# Checks that user-defined types are parsed into DataType instead of Identifier
self.parse_one("CREATE TABLE t (a udt)").this.expressions[0].args["kind"].assert_is(
@@ -763,6 +819,9 @@ class TestPostgres(Validator):
cdef.args["kind"].assert_is(exp.DataType)
self.assertEqual(expr.sql(dialect="postgres"), "CREATE TABLE t (x INTERVAL DAY)")
+ self.validate_identity('ALTER INDEX "IX_Ratings_Column1" RENAME TO "IX_Ratings_Column2"')
+ self.validate_identity('CREATE TABLE x (a TEXT COLLATE "de_DE")')
+ self.validate_identity('CREATE TABLE x (a TEXT COLLATE pg_catalog."default")')
self.validate_identity("CREATE TABLE t (col INT[3][5])")
self.validate_identity("CREATE TABLE t (col INT[3])")
self.validate_identity("CREATE INDEX IF NOT EXISTS ON t(c)")
@@ -796,6 +855,9 @@ class TestPostgres(Validator):
self.validate_identity("ALTER TABLE t1 SET TABLESPACE tablespace")
self.validate_identity("ALTER TABLE t1 SET (fillfactor = 5, autovacuum_enabled = TRUE)")
self.validate_identity(
+ "ALTER TABLE tested_table ADD CONSTRAINT unique_example UNIQUE (column_name) NOT VALID"
+ )
+ self.validate_identity(
"CREATE FUNCTION pymax(a INT, b INT) RETURNS INT LANGUAGE plpython3u AS $$\n if a > b:\n return a\n return b\n$$",
)
self.validate_identity(
@@ -965,6 +1027,42 @@ class TestPostgres(Validator):
},
)
+ self.validate_identity("CREATE TABLE tbl (col INT UNIQUE NULLS NOT DISTINCT DEFAULT 9.99)")
+ self.validate_identity("CREATE TABLE tbl (col UUID UNIQUE DEFAULT GEN_RANDOM_UUID())")
+ self.validate_identity("CREATE TABLE tbl (col UUID, UNIQUE NULLS NOT DISTINCT (col))")
+
+ self.validate_identity("CREATE INDEX CONCURRENTLY ix_table_id ON tbl USING btree(id)")
+ self.validate_identity(
+ "CREATE INDEX CONCURRENTLY IF NOT EXISTS ix_table_id ON tbl USING btree(id)"
+ )
+ self.validate_identity("DROP INDEX ix_table_id")
+ self.validate_identity("DROP INDEX IF EXISTS ix_table_id")
+ self.validate_identity("DROP INDEX CONCURRENTLY ix_table_id")
+ self.validate_identity("DROP INDEX CONCURRENTLY IF EXISTS ix_table_id")
+
+ self.validate_identity(
+ """
+ CREATE TABLE IF NOT EXISTS public.rental
+ (
+ inventory_id INT NOT NULL,
+ CONSTRAINT rental_customer_id_fkey FOREIGN KEY (customer_id)
+ REFERENCES public.customer (customer_id) MATCH FULL
+ ON UPDATE CASCADE
+ ON DELETE RESTRICT,
+ CONSTRAINT rental_inventory_id_fkey FOREIGN KEY (inventory_id)
+ REFERENCES public.inventory (inventory_id) MATCH PARTIAL
+ ON UPDATE CASCADE
+ ON DELETE RESTRICT,
+ CONSTRAINT rental_staff_id_fkey FOREIGN KEY (staff_id)
+ REFERENCES public.staff (staff_id) MATCH SIMPLE
+ ON UPDATE CASCADE
+ ON DELETE RESTRICT,
+ INITIALLY IMMEDIATE
+ )
+ """,
+ "CREATE TABLE IF NOT EXISTS public.rental (inventory_id INT NOT NULL, CONSTRAINT rental_customer_id_fkey FOREIGN KEY (customer_id) REFERENCES public.customer (customer_id) MATCH FULL ON UPDATE CASCADE ON DELETE RESTRICT, CONSTRAINT rental_inventory_id_fkey FOREIGN KEY (inventory_id) REFERENCES public.inventory (inventory_id) MATCH PARTIAL ON UPDATE CASCADE ON DELETE RESTRICT, CONSTRAINT rental_staff_id_fkey FOREIGN KEY (staff_id) REFERENCES public.staff (staff_id) MATCH SIMPLE ON UPDATE CASCADE ON DELETE RESTRICT, INITIALLY IMMEDIATE)",
+ )
+
with self.assertRaises(ParseError):
transpile("CREATE TABLE products (price DECIMAL CHECK price > 0)", read="postgres")
with self.assertRaises(ParseError):
@@ -1011,10 +1109,10 @@ class TestPostgres(Validator):
self.assertEqual(
cm.output,
[
- "WARNING:sqlglot:Applying array index offset (-1)",
- "WARNING:sqlglot:Applying array index offset (1)",
- "WARNING:sqlglot:Applying array index offset (1)",
- "WARNING:sqlglot:Applying array index offset (1)",
+ "INFO:sqlglot:Applying array index offset (-1)",
+ "INFO:sqlglot:Applying array index offset (1)",
+ "INFO:sqlglot:Applying array index offset (1)",
+ "INFO:sqlglot:Applying array index offset (1)",
],
)
@@ -1118,3 +1216,12 @@ CROSS JOIN JSON_ARRAY_ELEMENTS(CAST(boxcrate AS JSON)) AS x(tbox)
CROSS JOIN JSON_ARRAY_ELEMENTS(CAST(JSON_EXTRACT_PATH(tbox, 'boxes') AS JSON)) AS y(boxes)"""
self.validate_all(expected_postgres, read={"trino": trino_input}, pretty=True)
+
+ def test_rows_from(self):
+ self.validate_identity("""SELECT * FROM ROWS FROM (FUNC1(col1, col2))""")
+ self.validate_identity(
+ """SELECT * FROM ROWS FROM (FUNC1(col1) AS alias1("col1" TEXT), FUNC2(col2) AS alias2("col2" INT)) WITH ORDINALITY"""
+ )
+ self.validate_identity(
+ """SELECT * FROM table1, ROWS FROM (FUNC1(col1) AS alias1("col1" TEXT)) WITH ORDINALITY AS alias3("col3" INT, "col4" TEXT)"""
+ )
diff --git a/tests/dialects/test_presto.py b/tests/dialects/test_presto.py
index ebb270a..3d5fbfe 100644
--- a/tests/dialects/test_presto.py
+++ b/tests/dialects/test_presto.py
@@ -14,6 +14,13 @@ class TestPresto(Validator):
self.validate_identity("CAST(x AS HYPERLOGLOG)")
self.validate_all(
+ "SELECT FROM_ISO8601_TIMESTAMP('2020-05-11T11:15:05')",
+ write={
+ "duckdb": "SELECT CAST('2020-05-11T11:15:05' AS TIMESTAMPTZ)",
+ "presto": "SELECT FROM_ISO8601_TIMESTAMP('2020-05-11T11:15:05')",
+ },
+ )
+ self.validate_all(
"CAST(x AS INTERVAL YEAR TO MONTH)",
write={
"oracle": "CAST(x AS INTERVAL YEAR TO MONTH)",
@@ -151,8 +158,8 @@ class TestPresto(Validator):
write={
"duckdb": "STR_SPLIT(x, 'a.')",
"presto": "SPLIT(x, 'a.')",
- "hive": "SPLIT(x, CONCAT('\\\\Q', 'a.'))",
- "spark": "SPLIT(x, CONCAT('\\\\Q', 'a.'))",
+ "hive": "SPLIT(x, CONCAT('\\\\Q', 'a.', '\\\\E'))",
+ "spark": "SPLIT(x, CONCAT('\\\\Q', 'a.', '\\\\E'))",
},
)
self.validate_all(
@@ -269,10 +276,19 @@ class TestPresto(Validator):
self.validate_all(
"DATE_PARSE(SUBSTR(x, 1, 10), '%Y-%m-%d')",
write={
- "duckdb": "STRPTIME(SUBSTR(x, 1, 10), '%Y-%m-%d')",
- "presto": "DATE_PARSE(SUBSTR(x, 1, 10), '%Y-%m-%d')",
- "hive": "CAST(SUBSTR(x, 1, 10) AS TIMESTAMP)",
- "spark": "TO_TIMESTAMP(SUBSTR(x, 1, 10), 'yyyy-MM-dd')",
+ "duckdb": "STRPTIME(SUBSTRING(x, 1, 10), '%Y-%m-%d')",
+ "presto": "DATE_PARSE(SUBSTRING(x, 1, 10), '%Y-%m-%d')",
+ "hive": "CAST(SUBSTRING(x, 1, 10) AS TIMESTAMP)",
+ "spark": "TO_TIMESTAMP(SUBSTRING(x, 1, 10), 'yyyy-MM-dd')",
+ },
+ )
+ self.validate_all(
+ "DATE_PARSE(SUBSTRING(x, 1, 10), '%Y-%m-%d')",
+ write={
+ "duckdb": "STRPTIME(SUBSTRING(x, 1, 10), '%Y-%m-%d')",
+ "presto": "DATE_PARSE(SUBSTRING(x, 1, 10), '%Y-%m-%d')",
+ "hive": "CAST(SUBSTRING(x, 1, 10) AS TIMESTAMP)",
+ "spark": "TO_TIMESTAMP(SUBSTRING(x, 1, 10), 'yyyy-MM-dd')",
},
)
self.validate_all(
@@ -322,11 +338,20 @@ class TestPresto(Validator):
},
)
self.validate_all(
- "DAY_OF_WEEK(timestamp '2012-08-08 01:00:00')",
- write={
+ "((DAY_OF_WEEK(CAST(TRY_CAST('2012-08-08 01:00:00' AS TIMESTAMP) AS DATE)) % 7) + 1)",
+ read={
"spark": "DAYOFWEEK(CAST('2012-08-08 01:00:00' AS TIMESTAMP))",
+ },
+ )
+ self.validate_all(
+ "DAY_OF_WEEK(CAST('2012-08-08 01:00:00' AS TIMESTAMP))",
+ read={
+ "duckdb": "ISODOW(CAST('2012-08-08 01:00:00' AS TIMESTAMP))",
+ },
+ write={
+ "spark": "((DAYOFWEEK(CAST('2012-08-08 01:00:00' AS TIMESTAMP)) % 7) + 1)",
"presto": "DAY_OF_WEEK(CAST('2012-08-08 01:00:00' AS TIMESTAMP))",
- "duckdb": "DAYOFWEEK(CAST('2012-08-08 01:00:00' AS TIMESTAMP))",
+ "duckdb": "ISODOW(CAST('2012-08-08 01:00:00' AS TIMESTAMP))",
},
)
@@ -405,6 +430,27 @@ class TestPresto(Validator):
)
self.validate_identity("DATE_ADD('DAY', 1, y)")
+ self.validate_all(
+ "SELECT DATE_ADD('MINUTE', 30, col)",
+ write={
+ "presto": "SELECT DATE_ADD('MINUTE', 30, col)",
+ "trino": "SELECT DATE_ADD('MINUTE', 30, col)",
+ },
+ )
+
+ self.validate_identity("DATE_ADD('DAY', FLOOR(5), y)")
+ self.validate_identity(
+ """SELECT DATE_ADD('DAY', MOD(5, 2.5), y), DATE_ADD('DAY', CEIL(5.5), y)""",
+ """SELECT DATE_ADD('DAY', CAST(5 % 2.5 AS BIGINT), y), DATE_ADD('DAY', CAST(CEIL(5.5) AS BIGINT), y)""",
+ )
+
+ self.validate_all(
+ "DATE_ADD('MINUTE', CAST(FLOOR(CAST(EXTRACT(MINUTE FROM CURRENT_TIMESTAMP) AS DOUBLE) / NULLIF(30, 0)) * 30 AS BIGINT), col)",
+ read={
+ "spark": "TIMESTAMPADD(MINUTE, FLOOR(EXTRACT(MINUTE FROM CURRENT_TIMESTAMP)/30)*30, col)",
+ },
+ )
+
def test_ddl(self):
self.validate_all(
"CREATE TABLE test WITH (FORMAT = 'PARQUET') AS SELECT 1",
@@ -494,6 +540,9 @@ class TestPresto(Validator):
},
)
+ self.validate_identity("""CREATE OR REPLACE VIEW v SECURITY DEFINER AS SELECT id FROM t""")
+ self.validate_identity("""CREATE OR REPLACE VIEW v SECURITY INVOKER AS SELECT id FROM t""")
+
def test_quotes(self):
self.validate_all(
"''''",
@@ -581,6 +630,13 @@ class TestPresto(Validator):
)
def test_presto(self):
+ self.assertEqual(
+ exp.func("md5", exp.func("concat", exp.cast("x", "text"), exp.Literal.string("s"))).sql(
+ dialect="presto"
+ ),
+ "LOWER(TO_HEX(MD5(TO_UTF8(CONCAT(CAST(x AS VARCHAR), CAST('s' AS VARCHAR))))))",
+ )
+
with self.assertLogs(helper_logger):
self.validate_all(
"SELECT COALESCE(ELEMENT_AT(MAP_FROM_ENTRIES(ARRAY[(51, '1')]), id), quantity) FROM my_table",
@@ -599,6 +655,7 @@ class TestPresto(Validator):
},
)
+ self.validate_identity("SELECT a FROM t GROUP BY a, ROLLUP (b), ROLLUP (c), ROLLUP (d)")
self.validate_identity("SELECT a FROM test TABLESAMPLE BERNOULLI (50)")
self.validate_identity("SELECT a FROM test TABLESAMPLE SYSTEM (75)")
self.validate_identity("string_agg(x, ',')", "ARRAY_JOIN(ARRAY_AGG(x), ',')")
@@ -680,9 +737,6 @@ class TestPresto(Validator):
)
self.validate_all(
"SELECT ROW(1, 2)",
- read={
- "spark": "SELECT STRUCT(1, 2)",
- },
write={
"presto": "SELECT ROW(1, 2)",
"spark": "SELECT STRUCT(1, 2)",
@@ -801,12 +855,6 @@ class TestPresto(Validator):
},
)
self.validate_all(
- "SELECT a FROM t GROUP BY a, ROLLUP(b), ROLLUP(c), ROLLUP(d)",
- write={
- "presto": "SELECT a FROM t GROUP BY a, ROLLUP (b, c, d)",
- },
- )
- self.validate_all(
'SELECT a."b" FROM "foo"',
write={
"duckdb": 'SELECT a."b" FROM "foo"',
@@ -927,8 +975,8 @@ class TestPresto(Validator):
write={
"bigquery": "SELECT * FROM UNNEST(['7', '14'])",
"presto": "SELECT * FROM UNNEST(ARRAY['7', '14']) AS x",
- "hive": "SELECT * FROM UNNEST(ARRAY('7', '14')) AS x",
- "spark": "SELECT * FROM UNNEST(ARRAY('7', '14')) AS x",
+ "hive": "SELECT * FROM EXPLODE(ARRAY('7', '14')) AS x",
+ "spark": "SELECT * FROM EXPLODE(ARRAY('7', '14')) AS x",
},
)
self.validate_all(
@@ -936,8 +984,8 @@ class TestPresto(Validator):
write={
"bigquery": "SELECT * FROM UNNEST(['7', '14']) AS y",
"presto": "SELECT * FROM UNNEST(ARRAY['7', '14']) AS x(y)",
- "hive": "SELECT * FROM UNNEST(ARRAY('7', '14')) AS x(y)",
- "spark": "SELECT * FROM UNNEST(ARRAY('7', '14')) AS x(y)",
+ "hive": "SELECT * FROM EXPLODE(ARRAY('7', '14')) AS x(y)",
+ "spark": "SELECT * FROM EXPLODE(ARRAY('7', '14')) AS x(y)",
},
)
self.validate_all(
@@ -995,6 +1043,25 @@ class TestPresto(Validator):
"spark": "SELECT REGEXP_EXTRACT(TO_JSON(FROM_JSON('[[1, 2, 3]]', SCHEMA_OF_JSON('[[1, 2, 3]]'))), '^.(.*).$', 1)",
},
)
+ self.validate_all(
+ "REGEXP_EXTRACT('abc', '(a)(b)(c)')",
+ read={
+ "presto": "REGEXP_EXTRACT('abc', '(a)(b)(c)')",
+ "trino": "REGEXP_EXTRACT('abc', '(a)(b)(c)')",
+ "duckdb": "REGEXP_EXTRACT('abc', '(a)(b)(c)')",
+ "snowflake": "REGEXP_SUBSTR('abc', '(a)(b)(c)')",
+ },
+ write={
+ "presto": "REGEXP_EXTRACT('abc', '(a)(b)(c)')",
+ "trino": "REGEXP_EXTRACT('abc', '(a)(b)(c)')",
+ "duckdb": "REGEXP_EXTRACT('abc', '(a)(b)(c)')",
+ "snowflake": "REGEXP_SUBSTR('abc', '(a)(b)(c)')",
+ "hive": "REGEXP_EXTRACT('abc', '(a)(b)(c)', 0)",
+ "spark2": "REGEXP_EXTRACT('abc', '(a)(b)(c)', 0)",
+ "spark": "REGEXP_EXTRACT('abc', '(a)(b)(c)', 0)",
+ "databricks": "REGEXP_EXTRACT('abc', '(a)(b)(c)', 0)",
+ },
+ )
def test_encode_decode(self):
self.validate_identity("FROM_UTF8(x, y)")
@@ -1192,3 +1259,18 @@ MATCH_RECOGNIZE (
"starrocks": "SIGN(x)",
},
)
+
+ def test_json_vs_row_extract(self):
+ for dialect in ("trino", "presto"):
+ s = parse_one('SELECT col:x:y."special string"', read="snowflake")
+
+ dialect_json_extract_setting = f"{dialect}, variant_extract_is_json_extract=True"
+ dialect_row_access_setting = f"{dialect}, variant_extract_is_json_extract=False"
+
+ # By default, Snowflake VARIANT will generate JSON_EXTRACT() in Presto/Trino
+ json_extract_result = """SELECT JSON_EXTRACT(col, '$.x.y["special string"]')"""
+ self.assertEqual(s.sql(dialect), json_extract_result)
+ self.assertEqual(s.sql(dialect_json_extract_setting), json_extract_result)
+
+ # If the setting is overriden to False, then generate ROW access (dot notation)
+ self.assertEqual(s.sql(dialect_row_access_setting), 'SELECT col.x.y."special string"')
diff --git a/tests/dialects/test_prql.py b/tests/dialects/test_prql.py
index 5b438f1..2e35bb2 100644
--- a/tests/dialects/test_prql.py
+++ b/tests/dialects/test_prql.py
@@ -5,77 +5,165 @@ class TestPRQL(Validator):
dialect = "prql"
def test_prql(self):
- self.validate_identity("from x", "SELECT * FROM x")
- self.validate_identity("from x derive a + 1", "SELECT *, a + 1 FROM x")
- self.validate_identity("from x derive x = a + 1", "SELECT *, a + 1 AS x FROM x")
- self.validate_identity("from x derive {a + 1}", "SELECT *, a + 1 FROM x")
- self.validate_identity("from x derive {x = a + 1, b}", "SELECT *, a + 1 AS x, b FROM x")
- self.validate_identity(
- "from x derive {x = a + 1, b} select {y = x, 2}", "SELECT a + 1 AS y, 2 FROM x"
- )
- self.validate_identity("from x take 10", "SELECT * FROM x LIMIT 10")
- self.validate_identity("from x take 10 take 5", "SELECT * FROM x LIMIT 5")
- self.validate_identity("from x filter age > 25", "SELECT * FROM x WHERE age > 25")
- self.validate_identity(
+ self.validate_all(
+ "from x",
+ write={
+ "": "SELECT * FROM x",
+ },
+ )
+ self.validate_all(
+ "from x derive a + 1",
+ write={
+ "": "SELECT *, a + 1 FROM x",
+ },
+ )
+ self.validate_all(
+ "from x derive x = a + 1",
+ write={
+ "": "SELECT *, a + 1 AS x FROM x",
+ },
+ )
+ self.validate_all(
+ "from x derive {a + 1}",
+ write={
+ "": "SELECT *, a + 1 FROM x",
+ },
+ )
+ self.validate_all(
+ "from x derive {x = a + 1, b}",
+ write={
+ "": "SELECT *, a + 1 AS x, b FROM x",
+ },
+ )
+ self.validate_all(
+ "from x derive {x = a + 1, b} select {y = x, 2}",
+ write={"": "SELECT a + 1 AS y, 2 FROM x"},
+ )
+ self.validate_all(
+ "from x take 10",
+ write={
+ "": "SELECT * FROM x LIMIT 10",
+ },
+ )
+ self.validate_all(
+ "from x take 10 take 5",
+ write={
+ "": "SELECT * FROM x LIMIT 5",
+ },
+ )
+ self.validate_all(
+ "from x filter age > 25",
+ write={
+ "": "SELECT * FROM x WHERE age > 25",
+ },
+ )
+ self.validate_all(
"from x derive {x = a + 1, b} filter age > 25",
- "SELECT *, a + 1 AS x, b FROM x WHERE age > 25",
+ write={
+ "": "SELECT *, a + 1 AS x, b FROM x WHERE age > 25",
+ },
)
- self.validate_identity("from x filter dept != 'IT'", "SELECT * FROM x WHERE dept <> 'IT'")
- self.validate_identity(
- "from x filter p == 'product' select { a, b }", "SELECT a, b FROM x WHERE p = 'product'"
+ self.validate_all(
+ "from x filter dept != 'IT'",
+ write={
+ "": "SELECT * FROM x WHERE dept <> 'IT'",
+ },
)
- self.validate_identity(
- "from x filter age > 25 filter age < 27", "SELECT * FROM x WHERE age > 25 AND age < 27"
+ self.validate_all(
+ "from x filter p == 'product' select { a, b }",
+ write={"": "SELECT a, b FROM x WHERE p = 'product'"},
)
- self.validate_identity(
- "from x filter (age > 25 && age < 27)", "SELECT * FROM x WHERE (age > 25 AND age < 27)"
+ self.validate_all(
+ "from x filter age > 25 filter age < 27",
+ write={"": "SELECT * FROM x WHERE age > 25 AND age < 27"},
)
- self.validate_identity(
- "from x filter (age > 25 || age < 27)", "SELECT * FROM x WHERE (age > 25 OR age < 27)"
+ self.validate_all(
+ "from x filter (age > 25 && age < 27)",
+ write={"": "SELECT * FROM x WHERE (age > 25 AND age < 27)"},
)
- self.validate_identity(
+ self.validate_all(
+ "from x filter (age > 25 || age < 27)",
+ write={"": "SELECT * FROM x WHERE (age > 25 OR age < 27)"},
+ )
+ self.validate_all(
"from x filter (age > 25 || age < 22) filter age > 26 filter age < 27",
- "SELECT * FROM x WHERE ((age > 25 OR age < 22) AND age > 26) AND age < 27",
+ write={
+ "": "SELECT * FROM x WHERE ((age > 25 OR age < 22) AND age > 26) AND age < 27",
+ },
)
- self.validate_identity(
+ self.validate_all(
"from x sort age",
- "SELECT * FROM x ORDER BY age",
+ write={
+ "": "SELECT * FROM x ORDER BY age",
+ },
)
- self.validate_identity(
+ self.validate_all(
"from x sort {-age}",
- "SELECT * FROM x ORDER BY age DESC",
+ write={
+ "": "SELECT * FROM x ORDER BY age DESC",
+ },
)
- self.validate_identity(
+ self.validate_all(
"from x sort {age, name}",
- "SELECT * FROM x ORDER BY age, name",
+ write={
+ "": "SELECT * FROM x ORDER BY age, name",
+ },
)
- self.validate_identity(
+ self.validate_all(
"from x sort {-age, +name}",
- "SELECT * FROM x ORDER BY age DESC, name",
- )
- self.validate_identity("from x append y", "SELECT * FROM x UNION ALL SELECT * FROM y")
- self.validate_identity("from x remove y", "SELECT * FROM x EXCEPT ALL SELECT * FROM y")
- self.validate_identity(
- "from x intersect y", "SELECT * FROM x INTERSECT ALL SELECT * FROM y"
- )
- self.validate_identity(
+ write={
+ "": "SELECT * FROM x ORDER BY age DESC, name",
+ },
+ )
+ self.validate_all(
+ "from x append y",
+ write={
+ "": "SELECT * FROM x UNION ALL SELECT * FROM y",
+ },
+ )
+ self.validate_all(
+ "from x remove y",
+ write={
+ "": "SELECT * FROM x EXCEPT ALL SELECT * FROM y",
+ },
+ )
+ self.validate_all(
+ "from x intersect y",
+ write={"": "SELECT * FROM x INTERSECT ALL SELECT * FROM y"},
+ )
+ self.validate_all(
"from x filter a == null filter null != b",
- "SELECT * FROM x WHERE a IS NULL AND NOT b IS NULL",
+ write={
+ "": "SELECT * FROM x WHERE a IS NULL AND NOT b IS NULL",
+ },
)
- self.validate_identity(
+ self.validate_all(
"from x filter (a > 1 || null != b || c != null)",
- "SELECT * FROM x WHERE (a > 1 OR NOT b IS NULL OR NOT c IS NULL)",
+ write={
+ "": "SELECT * FROM x WHERE (a > 1 OR NOT b IS NULL OR NOT c IS NULL)",
+ },
+ )
+ self.validate_all(
+ "from a aggregate { average x }",
+ write={
+ "": "SELECT AVG(x) FROM a",
+ },
)
- self.validate_identity("from a aggregate { average x }", "SELECT AVG(x) FROM a")
- self.validate_identity(
+ self.validate_all(
"from a aggregate { average x, min y, ct = sum z }",
- "SELECT AVG(x), MIN(y), COALESCE(SUM(z), 0) AS ct FROM a",
+ write={
+ "": "SELECT AVG(x), MIN(y), COALESCE(SUM(z), 0) AS ct FROM a",
+ },
)
- self.validate_identity(
+ self.validate_all(
"from a aggregate { average x, min y, sum z }",
- "SELECT AVG(x), MIN(y), COALESCE(SUM(z), 0) FROM a",
+ write={
+ "": "SELECT AVG(x), MIN(y), COALESCE(SUM(z), 0) FROM a",
+ },
)
- self.validate_identity(
+ self.validate_all(
"from a aggregate { min y, b = stddev x, max z }",
- "SELECT MIN(y), STDDEV(x) AS b, MAX(z) FROM a",
+ write={
+ "": "SELECT MIN(y), STDDEV(x) AS b, MAX(z) FROM a",
+ },
)
diff --git a/tests/dialects/test_redshift.py b/tests/dialects/test_redshift.py
index 69793c7..6f561da 100644
--- a/tests/dialects/test_redshift.py
+++ b/tests/dialects/test_redshift.py
@@ -6,7 +6,6 @@ class TestRedshift(Validator):
dialect = "redshift"
def test_redshift(self):
- self.validate_identity("1 div", "1 AS div")
self.validate_all(
"SELECT SPLIT_TO_ARRAY('12,345,6789')",
write={
@@ -28,7 +27,7 @@ class TestRedshift(Validator):
"""SELECT JSON_EXTRACT_PATH_TEXT('{ "farm": {"barn": { "color": "red", "feed stocked": true }}}', 'farm', 'barn', 'color')""",
write={
"bigquery": """SELECT JSON_EXTRACT_SCALAR('{ "farm": {"barn": { "color": "red", "feed stocked": true }}}', '$.farm.barn.color')""",
- "databricks": """SELECT GET_JSON_OBJECT('{ "farm": {"barn": { "color": "red", "feed stocked": true }}}', '$.farm.barn.color')""",
+ "databricks": """SELECT '{ "farm": {"barn": { "color": "red", "feed stocked": true }}}':farm.barn.color""",
"duckdb": """SELECT '{ "farm": {"barn": { "color": "red", "feed stocked": true }}}' ->> '$.farm.barn.color'""",
"postgres": """SELECT JSON_EXTRACT_PATH_TEXT('{ "farm": {"barn": { "color": "red", "feed stocked": true }}}', 'farm', 'barn', 'color')""",
"presto": """SELECT JSON_EXTRACT_SCALAR('{ "farm": {"barn": { "color": "red", "feed stocked": true }}}', '$.farm.barn.color')""",
@@ -228,7 +227,7 @@ class TestRedshift(Validator):
"drill": "SELECT a, b FROM (SELECT a, b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY c DESC NULLS FIRST) AS _row_number FROM x) AS _t WHERE _row_number = 1",
"hive": "SELECT a, b FROM (SELECT a, b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY c DESC NULLS FIRST) AS _row_number FROM x) AS _t WHERE _row_number = 1",
"mysql": "SELECT a, b FROM (SELECT a, b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY CASE WHEN c IS NULL THEN 1 ELSE 0 END DESC, c DESC) AS _row_number FROM x) AS _t WHERE _row_number = 1",
- "oracle": "SELECT a, b FROM (SELECT a, b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY c DESC NULLS FIRST) AS _row_number FROM x) _t WHERE _row_number = 1",
+ "oracle": "SELECT a, b FROM (SELECT a, b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY c DESC) AS _row_number FROM x) _t WHERE _row_number = 1",
"presto": "SELECT a, b FROM (SELECT a, b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY c DESC NULLS FIRST) AS _row_number FROM x) AS _t WHERE _row_number = 1",
"redshift": "SELECT a, b FROM (SELECT a, b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY c DESC) AS _row_number FROM x) AS _t WHERE _row_number = 1",
"snowflake": "SELECT a, b FROM (SELECT a, b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY c DESC) AS _row_number FROM x) AS _t WHERE _row_number = 1",
@@ -259,6 +258,12 @@ class TestRedshift(Validator):
"postgres": "COALESCE(a, b, c, d)",
},
)
+
+ self.validate_identity(
+ "DATEDIFF(days, a, b)",
+ "DATEDIFF(DAY, a, b)",
+ )
+
self.validate_all(
"DATEDIFF('day', a, b)",
write={
@@ -300,7 +305,16 @@ class TestRedshift(Validator):
},
)
+ self.validate_all(
+ "SELECT EXTRACT(EPOCH FROM CURRENT_DATE)",
+ write={
+ "snowflake": "SELECT DATE_PART(EPOCH, CURRENT_DATE)",
+ "redshift": "SELECT EXTRACT(EPOCH FROM CURRENT_DATE)",
+ },
+ )
+
def test_identity(self):
+ self.validate_identity("1 div", "1 AS div")
self.validate_identity("LISTAGG(DISTINCT foo, ', ')")
self.validate_identity("CREATE MATERIALIZED VIEW orders AUTO REFRESH YES AS SELECT 1")
self.validate_identity("SELECT DATEADD(DAY, 1, 'today')")
@@ -324,6 +338,10 @@ class TestRedshift(Validator):
"""SELECT JSON_EXTRACT_PATH_TEXT('{"f2":{"f3":1},"f4":{"f5":99,"f6":"star"}', 'f4', 'f6', TRUE)"""
)
self.validate_identity(
+ 'DATE_PART(year, "somecol")',
+ 'EXTRACT(year FROM "somecol")',
+ ).this.assert_is(exp.Var)
+ self.validate_identity(
"SELECT CONCAT('abc', 'def')",
"SELECT 'abc' || 'def'",
)
@@ -416,6 +434,14 @@ ORDER BY
)
self.validate_identity("SELECT JSON_PARSE('[]')")
+ self.validate_identity("SELECT ARRAY(1, 2, 3)")
+ self.validate_identity("SELECT ARRAY[1, 2, 3]")
+
+ self.validate_identity(
+ """SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')""",
+ """SELECT CONVERT_TIMEZONE('UTC', 'America/New_York', '2024-08-06 09:10:00.000')""",
+ )
+
def test_values(self):
# Test crazy-sized VALUES clause to UNION ALL conversion to ensure we don't get RecursionError
values = [str(v) for v in range(0, 10000)]
@@ -594,3 +620,42 @@ FROM (
"select a.foo, b.bar, a.baz from a, b where a.baz = b.baz (+)",
"SELECT a.foo, b.bar, a.baz FROM a, b WHERE a.baz = b.baz (+)",
)
+
+ def test_time(self):
+ self.validate_all(
+ "TIME_TO_STR(a, '%Y-%m-%d %H:%M:%S.%f')",
+ write={"redshift": "TO_CHAR(a, 'YYYY-MM-DD HH24:MI:SS.US')"},
+ )
+
+ def test_grant(self):
+ grant_cmds = [
+ "GRANT SELECT ON ALL TABLES IN SCHEMA qa_tickit TO fred",
+ "GRANT USAGE ON DATASHARE salesshare TO NAMESPACE '13b8833d-17c6-4f16-8fe4-1a018f5ed00d'",
+ "GRANT USAGE FOR SCHEMAS IN DATABASE Sales_db TO ROLE Sales",
+ "GRANT EXECUTE FOR FUNCTIONS IN SCHEMA Sales_schema TO bob",
+ "GRANT SELECT FOR TABLES IN DATABASE Sales_db TO alice WITH GRANT OPTION",
+ "GRANT ALL FOR TABLES IN SCHEMA ShareSchema DATABASE ShareDb TO ROLE Sales",
+ "GRANT ASSUMEROLE ON 'arn:aws:iam::123456789012:role/Redshift-Exfunc' TO reg_user1 FOR EXTERNAL FUNCTION",
+ "GRANT ROLE sample_role1 TO ROLE sample_role2",
+ ]
+
+ for sql in grant_cmds:
+ with self.subTest(f"Testing Redshift's GRANT command statement: {sql}"):
+ self.validate_identity(sql, check_command_warning=True)
+
+ self.validate_identity("GRANT SELECT ON TABLE sales TO fred")
+ self.validate_identity("GRANT ALL ON SCHEMA qa_tickit TO GROUP qa_users")
+ self.validate_identity("GRANT ALL ON TABLE qa_tickit.sales TO GROUP qa_users")
+ self.validate_identity(
+ "GRANT ALL ON TABLE qa_tickit.sales TO GROUP qa_users, GROUP ro_users"
+ )
+ self.validate_identity("GRANT ALL ON view_date TO view_user")
+ self.validate_identity(
+ "GRANT SELECT(cust_name, cust_phone), UPDATE(cust_contact_preference) ON cust_profile TO GROUP sales_group"
+ )
+ self.validate_identity(
+ "GRANT ALL(cust_name, cust_phone, cust_contact_preference) ON cust_profile TO GROUP sales_admin"
+ )
+ self.validate_identity("GRANT USAGE ON DATABASE sales_db TO Bob")
+ self.validate_identity("GRANT USAGE ON SCHEMA sales_schema TO ROLE Analyst_role")
+ self.validate_identity("GRANT SELECT ON sales_db.sales_schema.tickit_sales_redshift TO Bob")
diff --git a/tests/dialects/test_snowflake.py b/tests/dialects/test_snowflake.py
index 1286436..6cde86b 100644
--- a/tests/dialects/test_snowflake.py
+++ b/tests/dialects/test_snowflake.py
@@ -11,17 +11,10 @@ class TestSnowflake(Validator):
dialect = "snowflake"
def test_snowflake(self):
- self.validate_identity(
- "transform(x, a int -> a + a + 1)",
- "TRANSFORM(x, a -> CAST(a AS INT) + CAST(a AS INT) + 1)",
- )
-
- self.validate_all(
- "ARRAY_CONSTRUCT_COMPACT(1, null, 2)",
- write={
- "spark": "ARRAY_COMPACT(ARRAY(1, NULL, 2))",
- "snowflake": "ARRAY_CONSTRUCT_COMPACT(1, NULL, 2)",
- },
+ self.assertEqual(
+ # Ensures we don't fail when generating ParseJSON with the `safe` arg set to `True`
+ self.validate_identity("""SELECT TRY_PARSE_JSON('{"x: 1}')""").sql(),
+ """SELECT PARSE_JSON('{"x: 1}')""",
)
expr = parse_one("SELECT APPROX_TOP_K(C4, 3, 5) FROM t")
@@ -49,6 +42,9 @@ WHERE
)""",
)
+ self.validate_identity("exclude := [foo]")
+ self.validate_identity("SELECT CAST([1, 2, 3] AS VECTOR(FLOAT, 3))")
+ self.validate_identity("SELECT CONNECT_BY_ROOT test AS test_column_alias")
self.validate_identity("SELECT number").selects[0].assert_is(exp.Column)
self.validate_identity("INTERVAL '4 years, 5 months, 3 hours'")
self.validate_identity("ALTER TABLE table1 CLUSTER BY (name DESC)")
@@ -84,7 +80,6 @@ WHERE
self.validate_identity("WITH x AS (SELECT 1 AS foo) SELECT foo FROM IDENTIFIER('x')")
self.validate_identity("WITH x AS (SELECT 1 AS foo) SELECT IDENTIFIER('foo') FROM x")
self.validate_identity("INITCAP('iqamqinterestedqinqthisqtopic', 'q')")
- self.validate_identity("CAST(x AS GEOMETRY)")
self.validate_identity("OBJECT_CONSTRUCT(*)")
self.validate_identity("SELECT CAST('2021-01-01' AS DATE) + INTERVAL '1 DAY'")
self.validate_identity("SELECT HLL(*)")
@@ -101,6 +96,22 @@ WHERE
self.validate_identity("ALTER TABLE a SWAP WITH b")
self.validate_identity("SELECT MATCH_CONDITION")
self.validate_identity("SELECT * REPLACE (CAST(col AS TEXT) AS scol) FROM t")
+ self.validate_identity("1 /* /* */")
+ self.validate_identity(
+ "SELECT * FROM table AT (TIMESTAMP => '2024-07-24') UNPIVOT(a FOR b IN (c)) AS pivot_table"
+ )
+ self.validate_identity(
+ "SELECT * FROM quarterly_sales PIVOT(SUM(amount) FOR quarter IN ('2023_Q1', '2023_Q2', '2023_Q3', '2023_Q4', '2024_Q1') DEFAULT ON NULL (0)) ORDER BY empid"
+ )
+ self.validate_identity(
+ "SELECT * FROM quarterly_sales PIVOT(SUM(amount) FOR quarter IN (SELECT DISTINCT quarter FROM ad_campaign_types_by_quarter WHERE television = TRUE ORDER BY quarter)) ORDER BY empid"
+ )
+ self.validate_identity(
+ "SELECT * FROM quarterly_sales PIVOT(SUM(amount) FOR quarter IN (ANY ORDER BY quarter)) ORDER BY empid"
+ )
+ self.validate_identity(
+ "SELECT * FROM quarterly_sales PIVOT(SUM(amount) FOR quarter IN (ANY)) ORDER BY empid"
+ )
self.validate_identity(
"MERGE INTO my_db AS ids USING (SELECT new_id FROM my_model WHERE NOT col IS NULL) AS new_ids ON ids.type = new_ids.type AND ids.source = new_ids.source WHEN NOT MATCHED THEN INSERT VALUES (new_ids.new_id)"
)
@@ -114,6 +125,30 @@ WHERE
"SELECT * FROM DATA AS DATA_L ASOF JOIN DATA AS DATA_R MATCH_CONDITION (DATA_L.VAL > DATA_R.VAL) ON DATA_L.ID = DATA_R.ID"
)
self.validate_identity(
+ "CAST(x AS GEOGRAPHY)",
+ "TO_GEOGRAPHY(x)",
+ )
+ self.validate_identity(
+ "CAST(x AS GEOMETRY)",
+ "TO_GEOMETRY(x)",
+ )
+ self.validate_identity(
+ "transform(x, a int -> a + a + 1)",
+ "TRANSFORM(x, a -> CAST(a AS INT) + CAST(a AS INT) + 1)",
+ )
+ self.validate_identity(
+ "SELECT * FROM s WHERE c NOT IN (1, 2, 3)",
+ "SELECT * FROM s WHERE NOT c IN (1, 2, 3)",
+ )
+ self.validate_identity(
+ "SELECT * FROM s WHERE c NOT IN (SELECT * FROM t)",
+ "SELECT * FROM s WHERE c <> ALL (SELECT * FROM t)",
+ )
+ self.validate_identity(
+ "SELECT * FROM t1 INNER JOIN t2 USING (t1.col)",
+ "SELECT * FROM t1 INNER JOIN t2 USING (col)",
+ )
+ self.validate_identity(
"CURRENT_TIMESTAMP - INTERVAL '1 w' AND (1 = 1)",
"CURRENT_TIMESTAMP() - INTERVAL '1 WEEK' AND (1 = 1)",
)
@@ -183,18 +218,6 @@ WHERE
"""SELECT CAST(GET_PATH(PARSE_JSON('{"food":{"fruit":"banana"}}'), 'food.fruit') AS VARCHAR)""",
)
self.validate_identity(
- "SELECT * FROM foo at",
- "SELECT * FROM foo AS at",
- )
- self.validate_identity(
- "SELECT * FROM foo before",
- "SELECT * FROM foo AS before",
- )
- self.validate_identity(
- "SELECT * FROM foo at (col)",
- "SELECT * FROM foo AS at(col)",
- )
- self.validate_identity(
"SELECT * FROM unnest(x) with ordinality",
"SELECT * FROM TABLE(FLATTEN(INPUT => x)) AS _u(seq, key, path, index, value, this)",
)
@@ -283,6 +306,13 @@ WHERE
)
self.validate_all(
+ "ARRAY_CONSTRUCT_COMPACT(1, null, 2)",
+ write={
+ "spark": "ARRAY_COMPACT(ARRAY(1, NULL, 2))",
+ "snowflake": "ARRAY_CONSTRUCT_COMPACT(1, NULL, 2)",
+ },
+ )
+ self.validate_all(
"OBJECT_CONSTRUCT_KEEP_NULL('key_1', 'one', 'key_2', NULL)",
read={
"bigquery": "JSON_OBJECT(['key_1', 'key_2'], ['one', NULL])",
@@ -337,7 +367,7 @@ WHERE
"""SELECT PARSE_JSON('{"fruit":"banana"}'):fruit""",
write={
"bigquery": """SELECT JSON_EXTRACT(PARSE_JSON('{"fruit":"banana"}'), '$.fruit')""",
- "databricks": """SELECT GET_JSON_OBJECT('{"fruit":"banana"}', '$.fruit')""",
+ "databricks": """SELECT '{"fruit":"banana"}':fruit""",
"duckdb": """SELECT JSON('{"fruit":"banana"}') -> '$.fruit'""",
"mysql": """SELECT JSON_EXTRACT('{"fruit":"banana"}', '$.fruit')""",
"presto": """SELECT JSON_EXTRACT(JSON_PARSE('{"fruit":"banana"}'), '$.fruit')""",
@@ -572,12 +602,12 @@ WHERE
self.validate_all(
"DIV0(foo, bar)",
write={
- "snowflake": "IFF(bar = 0, 0, foo / bar)",
- "sqlite": "IIF(bar = 0, 0, CAST(foo AS REAL) / bar)",
- "presto": "IF(bar = 0, 0, CAST(foo AS DOUBLE) / bar)",
- "spark": "IF(bar = 0, 0, foo / bar)",
- "hive": "IF(bar = 0, 0, foo / bar)",
- "duckdb": "CASE WHEN bar = 0 THEN 0 ELSE foo / bar END",
+ "snowflake": "IFF(bar = 0 AND NOT foo IS NULL, 0, foo / bar)",
+ "sqlite": "IIF(bar = 0 AND NOT foo IS NULL, 0, CAST(foo AS REAL) / bar)",
+ "presto": "IF(bar = 0 AND NOT foo IS NULL, 0, CAST(foo AS DOUBLE) / bar)",
+ "spark": "IF(bar = 0 AND NOT foo IS NULL, 0, foo / bar)",
+ "hive": "IF(bar = 0 AND NOT foo IS NULL, 0, foo / bar)",
+ "duckdb": "CASE WHEN bar = 0 AND NOT foo IS NULL THEN 0 ELSE foo / bar END",
},
)
self.validate_all(
@@ -725,6 +755,8 @@ WHERE
write={
"spark": "SELECT COLLECT_LIST(DISTINCT a)",
"snowflake": "SELECT ARRAY_AGG(DISTINCT a)",
+ "duckdb": "SELECT ARRAY_AGG(DISTINCT a) FILTER(WHERE a IS NOT NULL)",
+ "presto": "SELECT ARRAY_AGG(DISTINCT a) FILTER(WHERE a IS NOT NULL)",
},
)
self.validate_all(
@@ -831,6 +863,71 @@ WHERE
},
)
+ self.validate_all(
+ "SELECT OBJECT_INSERT(OBJECT_INSERT(OBJECT_INSERT(OBJECT_CONSTRUCT('key5', 'value5'), 'key1', 5), 'key2', 2.2), 'key3', 'value3')",
+ write={
+ "snowflake": "SELECT OBJECT_INSERT(OBJECT_INSERT(OBJECT_INSERT(OBJECT_CONSTRUCT('key5', 'value5'), 'key1', 5), 'key2', 2.2), 'key3', 'value3')",
+ "duckdb": "SELECT STRUCT_INSERT(STRUCT_INSERT(STRUCT_INSERT({'key5': 'value5'}, key1 := 5), key2 := 2.2), key3 := 'value3')",
+ },
+ )
+
+ self.validate_all(
+ "SELECT OBJECT_INSERT(OBJECT_INSERT(OBJECT_INSERT(OBJECT_CONSTRUCT(), 'key1', 5), 'key2', 2.2), 'key3', 'value3')",
+ write={
+ "snowflake": "SELECT OBJECT_INSERT(OBJECT_INSERT(OBJECT_INSERT(OBJECT_CONSTRUCT(), 'key1', 5), 'key2', 2.2), 'key3', 'value3')",
+ "duckdb": "SELECT STRUCT_INSERT(STRUCT_INSERT(STRUCT_PACK(key1 := 5), key2 := 2.2), key3 := 'value3')",
+ },
+ )
+
+ self.validate_identity(
+ """SELECT ARRAY_CONSTRUCT('foo')::VARIANT[0]""",
+ """SELECT CAST(['foo'] AS VARIANT)[0]""",
+ )
+
+ self.validate_all(
+ "SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')",
+ write={
+ "snowflake": "SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')",
+ "spark": "SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')",
+ "databricks": "SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')",
+ "redshift": "SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')",
+ },
+ )
+
+ self.validate_all(
+ "SELECT CONVERT_TIMEZONE('America/Los_Angeles', 'America/New_York', '2024-08-06 09:10:00.000')",
+ write={
+ "snowflake": "SELECT CONVERT_TIMEZONE('America/Los_Angeles', 'America/New_York', '2024-08-06 09:10:00.000')",
+ "spark": "SELECT CONVERT_TIMEZONE('America/Los_Angeles', 'America/New_York', '2024-08-06 09:10:00.000')",
+ "databricks": "SELECT CONVERT_TIMEZONE('America/Los_Angeles', 'America/New_York', '2024-08-06 09:10:00.000')",
+ "redshift": "SELECT CONVERT_TIMEZONE('America/Los_Angeles', 'America/New_York', '2024-08-06 09:10:00.000')",
+ "mysql": "SELECT CONVERT_TZ('2024-08-06 09:10:00.000', 'America/Los_Angeles', 'America/New_York')",
+ "duckdb": "SELECT CAST('2024-08-06 09:10:00.000' AS TIMESTAMP) AT TIME ZONE 'America/Los_Angeles' AT TIME ZONE 'America/New_York'",
+ },
+ )
+
+ self.validate_identity(
+ "SELECT UUID_STRING(), UUID_STRING('fe971b24-9572-4005-b22f-351e9c09274d', 'foo')"
+ )
+
+ self.validate_all(
+ "UUID_STRING('fe971b24-9572-4005-b22f-351e9c09274d', 'foo')",
+ read={
+ "snowflake": "UUID_STRING('fe971b24-9572-4005-b22f-351e9c09274d', 'foo')",
+ },
+ write={
+ "hive": "UUID()",
+ "spark2": "UUID()",
+ "spark": "UUID()",
+ "databricks": "UUID()",
+ "duckdb": "UUID()",
+ "presto": "UUID()",
+ "trino": "UUID()",
+ "postgres": "GEN_RANDOM_UUID()",
+ "bigquery": "GENERATE_UUID()",
+ },
+ )
+
def test_null_treatment(self):
self.validate_all(
r"SELECT FIRST_VALUE(TABLE1.COLUMN1) OVER (PARTITION BY RANDOM_COLUMN1, RANDOM_COLUMN2 ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS MY_ALIAS FROM TABLE1",
@@ -903,6 +1000,11 @@ WHERE
"SELECT * FROM @foo/bar (FILE_FORMAT => ds_sandbox.test.my_csv_format, PATTERN => 'test') AS bla",
)
+ self.validate_identity(
+ "SELECT * FROM @test.public.thing/location/somefile.csv( FILE_FORMAT => 'fmt' )",
+ "SELECT * FROM @test.public.thing/location/somefile.csv (FILE_FORMAT => 'fmt')",
+ )
+
def test_sample(self):
self.validate_identity("SELECT * FROM testtable TABLESAMPLE BERNOULLI (20.3)")
self.validate_identity("SELECT * FROM testtable TABLESAMPLE SYSTEM (3) SEED (82)")
@@ -1196,6 +1298,17 @@ WHERE
"SELECT oldt.*, newt.* FROM my_table BEFORE (STATEMENT => '8e5d0ca9-005e-44e6-b858-a8f5b37c5726') AS oldt FULL OUTER JOIN my_table AT (STATEMENT => '8e5d0ca9-005e-44e6-b858-a8f5b37c5726') AS newt ON oldt.id = newt.id WHERE oldt.id IS NULL OR newt.id IS NULL",
)
+ # Make sure that the historical data keywords can still be used as aliases
+ for historical_data_prefix in ("AT", "BEFORE", "END", "CHANGES"):
+ for schema_suffix in ("", "(col)"):
+ with self.subTest(
+ f"Testing historical data prefix alias: {historical_data_prefix}{schema_suffix}"
+ ):
+ self.validate_identity(
+ f"SELECT * FROM foo {historical_data_prefix}{schema_suffix}",
+ f"SELECT * FROM foo AS {historical_data_prefix}{schema_suffix}",
+ )
+
def test_ddl(self):
for constraint_prefix in ("WITH ", ""):
with self.subTest(f"Constraint prefix: {constraint_prefix}"):
@@ -1216,6 +1329,7 @@ WHERE
"CREATE TABLE t (id INT TAG (key1='value_1', key2='value_2'))",
)
+ self.validate_identity("CREATE SECURE VIEW table1 AS (SELECT a FROM table2)")
self.validate_identity(
"""create external table et2(
col1 date as (parse_json(metadata$external_table_partition):COL1::date),
@@ -1241,6 +1355,9 @@ WHERE
"CREATE OR REPLACE TAG IF NOT EXISTS cost_center COMMENT='cost_center tag'"
).this.assert_is(exp.Identifier)
self.validate_identity(
+ "CREATE DYNAMIC TABLE product (pre_tax_profit, taxes, after_tax_profit) TARGET_LAG='20 minutes' WAREHOUSE=mywh AS SELECT revenue - cost, (revenue - cost) * tax_rate, (revenue - cost) * (1.0 - tax_rate) FROM staging_table"
+ )
+ self.validate_identity(
"ALTER TABLE db_name.schmaName.tblName ADD COLUMN COLUMN_1 VARCHAR NOT NULL TAG (key1='value_1')"
)
self.validate_identity(
@@ -1330,6 +1447,12 @@ WHERE
},
)
+ self.assertIsNotNone(
+ self.validate_identity("CREATE TABLE foo (bar INT AS (foo))").find(
+ exp.TransformColumnConstraint
+ )
+ )
+
def test_user_defined_functions(self):
self.validate_all(
"CREATE FUNCTION a(x DATE, y BIGINT) RETURNS ARRAY LANGUAGE JAVASCRIPT AS $$ SELECT 1 $$",
@@ -1608,16 +1731,27 @@ FROM persons AS p, LATERAL FLATTEN(input => p.c, path => 'contact') AS _flattene
"REGEXP_SUBSTR(subject, pattern)",
read={
"bigquery": "REGEXP_EXTRACT(subject, pattern)",
+ "snowflake": "REGEXP_EXTRACT(subject, pattern)",
+ },
+ write={
+ "bigquery": "REGEXP_EXTRACT(subject, pattern)",
+ "snowflake": "REGEXP_SUBSTR(subject, pattern)",
+ },
+ )
+ self.validate_all(
+ "REGEXP_SUBSTR(subject, pattern, 1, 1, 'c', 1)",
+ read={
"hive": "REGEXP_EXTRACT(subject, pattern)",
- "presto": "REGEXP_EXTRACT(subject, pattern)",
+ "spark2": "REGEXP_EXTRACT(subject, pattern)",
"spark": "REGEXP_EXTRACT(subject, pattern)",
+ "databricks": "REGEXP_EXTRACT(subject, pattern)",
},
write={
- "bigquery": "REGEXP_EXTRACT(subject, pattern)",
"hive": "REGEXP_EXTRACT(subject, pattern)",
- "presto": "REGEXP_EXTRACT(subject, pattern)",
- "snowflake": "REGEXP_SUBSTR(subject, pattern)",
+ "spark2": "REGEXP_EXTRACT(subject, pattern)",
"spark": "REGEXP_EXTRACT(subject, pattern)",
+ "databricks": "REGEXP_EXTRACT(subject, pattern)",
+ "snowflake": "REGEXP_SUBSTR(subject, pattern, 1, 1, 'c', 1)",
},
)
self.validate_all(
@@ -1885,7 +2019,7 @@ STORAGE_ALLOWED_LOCATIONS=('s3://mybucket1/path1/', 's3://mybucket2/path2/')""",
def test_swap(self):
ast = parse_one("ALTER TABLE a SWAP WITH b", read="snowflake")
- assert isinstance(ast, exp.AlterTable)
+ assert isinstance(ast, exp.Alter)
assert isinstance(ast.args["actions"][0], exp.SwapTable)
def test_try_cast(self):
@@ -2005,6 +2139,16 @@ SINGLE = TRUE""",
self.validate_identity("SELECT t.$23:a.b", "SELECT GET_PATH(t.$23, 'a.b')")
self.validate_identity("SELECT t.$17:a[0].b[0].c", "SELECT GET_PATH(t.$17, 'a[0].b[0].c')")
+ self.validate_all(
+ """
+ SELECT col:"customer's department"
+ """,
+ write={
+ "snowflake": """SELECT GET_PATH(col, '["customer\\'s department"]')""",
+ "postgres": "SELECT JSON_EXTRACT_PATH(col, 'customer''s department')",
+ },
+ )
+
def test_alter_set_unset(self):
self.validate_identity("ALTER TABLE tbl SET DATA_RETENTION_TIME_IN_DAYS=1")
self.validate_identity("ALTER TABLE tbl SET DEFAULT_DDL_COLLATION='test'")
@@ -2021,3 +2165,32 @@ SINGLE = TRUE""",
self.validate_identity("ALTER TABLE foo UNSET TAG a, b, c")
self.validate_identity("ALTER TABLE foo UNSET DATA_RETENTION_TIME_IN_DAYS, CHANGE_TRACKING")
+
+ def test_from_changes(self):
+ self.validate_identity(
+ """SELECT C1 FROM t1 CHANGES (INFORMATION => APPEND_ONLY) AT (STREAM => 's1') END (TIMESTAMP => $ts2)"""
+ )
+ self.validate_identity(
+ """SELECT C1 FROM t1 CHANGES (INFORMATION => APPEND_ONLY) BEFORE (STATEMENT => 'STMT_ID') END (TIMESTAMP => $ts2)"""
+ )
+ self.validate_identity(
+ """SELECT 1 FROM some_table CHANGES (INFORMATION => APPEND_ONLY) AT (TIMESTAMP => TO_TIMESTAMP_TZ('2024-07-01 00:00:00+00:00')) END (TIMESTAMP => TO_TIMESTAMP_TZ('2024-07-01 14:28:59.999999+00:00'))""",
+ """SELECT 1 FROM some_table CHANGES (INFORMATION => APPEND_ONLY) AT (TIMESTAMP => CAST('2024-07-01 00:00:00+00:00' AS TIMESTAMPTZ)) END (TIMESTAMP => CAST('2024-07-01 14:28:59.999999+00:00' AS TIMESTAMPTZ))""",
+ )
+
+ def test_grant(self):
+ grant_cmds = [
+ "GRANT SELECT ON FUTURE TABLES IN DATABASE d1 TO ROLE r1",
+ "GRANT INSERT, DELETE ON FUTURE TABLES IN SCHEMA d1.s1 TO ROLE r2",
+ "GRANT SELECT ON ALL TABLES IN SCHEMA mydb.myschema to ROLE analyst",
+ "GRANT SELECT, INSERT ON FUTURE TABLES IN SCHEMA mydb.myschema TO ROLE role1",
+ "GRANT CREATE MATERIALIZED VIEW ON SCHEMA mydb.myschema TO DATABASE ROLE mydb.dr1",
+ ]
+
+ for sql in grant_cmds:
+ with self.subTest(f"Testing Snowflake's GRANT command statement: {sql}"):
+ self.validate_identity(sql, check_command_warning=True)
+
+ self.validate_identity(
+ "GRANT ALL PRIVILEGES ON FUNCTION mydb.myschema.ADD5(number) TO ROLE analyst"
+ )
diff --git a/tests/dialects/test_spark.py b/tests/dialects/test_spark.py
index bff91bf..4fed68c 100644
--- a/tests/dialects/test_spark.py
+++ b/tests/dialects/test_spark.py
@@ -10,6 +10,7 @@ class TestSpark(Validator):
dialect = "spark"
def test_ddl(self):
+ self.validate_identity("INSERT OVERWRITE TABLE db1.tb1 TABLE db2.tb2")
self.validate_identity("CREATE TABLE foo AS WITH t AS (SELECT 1 AS col) SELECT col FROM t")
self.validate_identity("CREATE TEMPORARY VIEW test AS SELECT 1")
self.validate_identity("CREATE TABLE foo (col VARCHAR(50))")
@@ -129,6 +130,16 @@ TBLPROPERTIES (
"spark": "ALTER TABLE StudentInfo DROP COLUMNS (LastName, DOB)",
},
)
+ self.validate_identity("ALTER VIEW StudentInfoView AS SELECT * FROM StudentInfo")
+ self.validate_identity("ALTER VIEW StudentInfoView AS SELECT LastName FROM StudentInfo")
+ self.validate_identity("ALTER VIEW StudentInfoView RENAME TO StudentInfoViewRenamed")
+ self.validate_identity(
+ "ALTER VIEW StudentInfoView SET TBLPROPERTIES ('key1'='val1', 'key2'='val2')"
+ )
+ self.validate_identity(
+ "ALTER VIEW StudentInfoView UNSET TBLPROPERTIES ('key1', 'key2')",
+ check_command_warning=True,
+ )
def test_to_date(self):
self.validate_all(
@@ -245,7 +256,7 @@ TBLPROPERTIES (
self.validate_identity("SELECT TRANSFORM(ARRAY(1, 2, 3), x -> x + 1)")
self.validate_identity("SELECT TRANSFORM(ARRAY(1, 2, 3), (x, i) -> x + i)")
self.validate_identity("REFRESH TABLE a.b.c")
- self.validate_identity("INTERVAL -86 DAYS")
+ self.validate_identity("INTERVAL '-86' DAYS")
self.validate_identity("TRIM(' SparkSQL ')")
self.validate_identity("TRIM(BOTH 'SL' FROM 'SSparkSQLS')")
self.validate_identity("TRIM(LEADING 'SL' FROM 'SSparkSQLS')")
@@ -298,6 +309,20 @@ TBLPROPERTIES (
)
self.validate_all(
+ "SELECT ARRAY_AGG(x) FILTER (WHERE x = 5) FROM (SELECT 1 UNION ALL SELECT NULL) AS t(x)",
+ write={
+ "duckdb": "SELECT ARRAY_AGG(x) FILTER(WHERE x = 5 AND NOT x IS NULL) FROM (SELECT 1 UNION ALL SELECT NULL) AS t(x)",
+ "spark": "SELECT COLLECT_LIST(x) FILTER(WHERE x = 5) FROM (SELECT 1 UNION ALL SELECT NULL) AS t(x)",
+ },
+ )
+ self.validate_all(
+ "SELECT DATE_FORMAT(DATE '2020-01-01', 'EEEE') AS weekday",
+ write={
+ "presto": "SELECT DATE_FORMAT(CAST(CAST('2020-01-01' AS DATE) AS TIMESTAMP), '%W') AS weekday",
+ "spark": "SELECT DATE_FORMAT(CAST(CAST('2020-01-01' AS DATE) AS TIMESTAMP), 'EEEE') AS weekday",
+ },
+ )
+ self.validate_all(
"SELECT TRY_ELEMENT_AT(MAP(1, 'a', 2, 'b'), 2)",
read={
"databricks": "SELECT TRY_ELEMENT_AT(MAP(1, 'a', 2, 'b'), 2)",
@@ -467,7 +492,7 @@ TBLPROPERTIES (
)
self.validate_all(
"SELECT CAST(STRUCT('fooo') AS STRUCT<a: VARCHAR(2)>)",
- write={"spark": "SELECT CAST(STRUCT('fooo') AS STRUCT<a: STRING>)"},
+ write={"spark": "SELECT CAST(STRUCT('fooo' AS col1) AS STRUCT<a: STRING>)"},
)
self.validate_all(
"SELECT CAST(123456 AS VARCHAR(3))",
@@ -557,7 +582,10 @@ TBLPROPERTIES (
)
self.validate_all(
- "CAST(x AS TIMESTAMP)", read={"trino": "CAST(x AS TIMESTAMP(6) WITH TIME ZONE)"}
+ "CAST(x AS TIMESTAMP)",
+ read={
+ "trino": "CAST(x AS TIMESTAMP(6) WITH TIME ZONE)",
+ },
)
self.validate_all(
"SELECT DATE_ADD(my_date_column, 1)",
@@ -684,6 +712,32 @@ TBLPROPERTIES (
write={
"spark": "SELECT DATE_ADD(MONTH, 20, col)",
"databricks": "SELECT DATE_ADD(MONTH, 20, col)",
+ "presto": "SELECT DATE_ADD('MONTH', 20, col)",
+ "trino": "SELECT DATE_ADD('MONTH', 20, col)",
+ },
+ )
+ self.validate_identity("DESCRIBE schema.test PARTITION(ds = '2024-01-01')")
+
+ self.validate_all(
+ "SELECT ANY_VALUE(col, true), FIRST(col, true), FIRST_VALUE(col, true) OVER ()",
+ write={
+ "duckdb": "SELECT ANY_VALUE(col), FIRST(col), FIRST_VALUE(col IGNORE NULLS) OVER ()"
+ },
+ )
+
+ self.validate_all(
+ "SELECT STRUCT(1, 2)",
+ write={
+ "spark": "SELECT STRUCT(1 AS col1, 2 AS col2)",
+ "presto": "SELECT CAST(ROW(1, 2) AS ROW(col1 INTEGER, col2 INTEGER))",
+ "duckdb": "SELECT {'col1': 1, 'col2': 2}",
+ },
+ )
+ self.validate_all(
+ "SELECT STRUCT(x, 1, y AS col3, STRUCT(5)) FROM t",
+ write={
+ "spark": "SELECT STRUCT(x AS x, 1 AS col2, y AS col3, STRUCT(5 AS col1) AS col4) FROM t",
+ "duckdb": "SELECT {'x': x, 'col2': 1, 'col3': y, 'col4': {'col1': 5}} FROM t",
},
)
@@ -801,3 +855,24 @@ TBLPROPERTIES (
self.assertEqual(query.sql(name), with_modifiers)
else:
self.assertEqual(query.sql(name), without_modifiers)
+
+ def test_schema_binding_options(self):
+ for schema_binding in (
+ "BINDING",
+ "COMPENSATION",
+ "TYPE EVOLUTION",
+ "EVOLUTION",
+ ):
+ with self.subTest(f"Test roundtrip of VIEW schema binding {schema_binding}"):
+ self.validate_identity(
+ f"CREATE VIEW emp_v WITH SCHEMA {schema_binding} AS SELECT * FROM emp"
+ )
+
+ def test_minus(self):
+ self.validate_all(
+ "SELECT * FROM db.table1 MINUS SELECT * FROM db.table2",
+ write={
+ "spark": "SELECT * FROM db.table1 EXCEPT SELECT * FROM db.table2",
+ "databricks": "SELECT * FROM db.table1 EXCEPT SELECT * FROM db.table2",
+ },
+ )
diff --git a/tests/dialects/test_sqlite.py b/tests/dialects/test_sqlite.py
index 46bbadc..f2c9802 100644
--- a/tests/dialects/test_sqlite.py
+++ b/tests/dialects/test_sqlite.py
@@ -91,6 +91,10 @@ class TestSQLite(Validator):
read={"snowflake": "LEAST(x, y, z)"},
write={"snowflake": "LEAST(x, y, z)"},
)
+ self.validate_identity(
+ "SELECT * FROM station WHERE city IS NOT ''",
+ "SELECT * FROM station WHERE NOT city IS ''",
+ )
def test_strftime(self):
self.validate_identity("SELECT STRFTIME('%Y/%m/%d', 'now')")
diff --git a/tests/dialects/test_starrocks.py b/tests/dialects/test_starrocks.py
index 67aabb9..44c54a6 100644
--- a/tests/dialects/test_starrocks.py
+++ b/tests/dialects/test_starrocks.py
@@ -1,12 +1,41 @@
+from sqlglot.errors import UnsupportedError
from tests.dialects.test_dialect import Validator
class TestStarrocks(Validator):
dialect = "starrocks"
+ def test_ddl(self):
+ ddl_sqls = [
+ "DISTRIBUTED BY HASH (col1) BUCKETS 1",
+ "DISTRIBUTED BY HASH (col1)",
+ "DISTRIBUTED BY RANDOM BUCKETS 1",
+ "DISTRIBUTED BY RANDOM",
+ "DISTRIBUTED BY HASH (col1) ORDER BY (col1)",
+ "DISTRIBUTED BY HASH (col1) PROPERTIES ('replication_num'='1')",
+ "PRIMARY KEY (col1) DISTRIBUTED BY HASH (col1)",
+ "DUPLICATE KEY (col1, col2) DISTRIBUTED BY HASH (col1)",
+ ]
+
+ for properties in ddl_sqls:
+ with self.subTest(f"Testing create scheme: {properties}"):
+ self.validate_identity(f"CREATE TABLE foo (col1 BIGINT, col2 BIGINT) {properties}")
+ self.validate_identity(
+ f"CREATE TABLE foo (col1 BIGINT, col2 BIGINT) ENGINE=OLAP {properties}"
+ )
+
+ # Test the different wider DECIMAL types
+ self.validate_identity(
+ "CREATE TABLE foo (col0 DECIMAL(9, 1), col1 DECIMAL32(9, 1), col2 DECIMAL64(18, 10), col3 DECIMAL128(38, 10)) DISTRIBUTED BY HASH (col1) BUCKETS 1"
+ )
+
def test_identity(self):
self.validate_identity("SELECT CAST(`a`.`b` AS INT) FROM foo")
self.validate_identity("SELECT APPROX_COUNT_DISTINCT(a) FROM x")
+ self.validate_identity("SELECT [1, 2, 3]")
+ self.validate_identity(
+ """SELECT CAST(PARSE_JSON(fieldvalue) -> '00000000-0000-0000-0000-00000000' AS VARCHAR) AS `code` FROM (SELECT '{"00000000-0000-0000-0000-00000000":"code01"}') AS t(fieldvalue)"""
+ )
def test_time(self):
self.validate_identity("TIMESTAMP('2022-01-01')")
@@ -28,3 +57,61 @@ class TestStarrocks(Validator):
"mysql": "SELECT REGEXP_LIKE(abc, '%foo%')",
},
)
+
+ def test_unnest(self):
+ self.validate_identity(
+ "SELECT student, score, t.unnest FROM tests CROSS JOIN LATERAL UNNEST(scores) AS t",
+ "SELECT student, score, t.unnest FROM tests CROSS JOIN LATERAL UNNEST(scores) AS t(unnest)",
+ )
+ self.validate_all(
+ "SELECT student, score, unnest FROM tests CROSS JOIN LATERAL UNNEST(scores)",
+ write={
+ "spark": "SELECT student, score, unnest FROM tests LATERAL VIEW EXPLODE(scores) unnest AS unnest",
+ "starrocks": "SELECT student, score, unnest FROM tests CROSS JOIN LATERAL UNNEST(scores) AS unnest(unnest)",
+ },
+ )
+ self.validate_all(
+ r"""SELECT * FROM UNNEST(array['John','Jane','Jim','Jamie'], array[24,25,26,27]) AS t(name, age)""",
+ write={
+ "postgres": "SELECT * FROM UNNEST(ARRAY['John', 'Jane', 'Jim', 'Jamie'], ARRAY[24, 25, 26, 27]) AS t(name, age)",
+ "spark": "SELECT * FROM INLINE(ARRAYS_ZIP(ARRAY('John', 'Jane', 'Jim', 'Jamie'), ARRAY(24, 25, 26, 27))) AS t(name, age)",
+ "starrocks": "SELECT * FROM UNNEST(['John', 'Jane', 'Jim', 'Jamie'], [24, 25, 26, 27]) AS t(name, age)",
+ },
+ )
+
+ # Use UNNEST to convert into multiple columns
+ # see: https://docs.starrocks.io/docs/sql-reference/sql-functions/array-functions/unnest/
+ self.validate_all(
+ r"""SELECT id, t.type, t.scores FROM example_table, unnest(split(type, ";"), scores) AS t(type,scores)""",
+ write={
+ "postgres": "SELECT id, t.type, t.scores FROM example_table, UNNEST(SPLIT(type, ';'), scores) AS t(type, scores)",
+ "spark": r"""SELECT id, t.type, t.scores FROM example_table LATERAL VIEW INLINE(ARRAYS_ZIP(SPLIT(type, CONCAT('\\Q', ';', '\\E')), scores)) t AS type, scores""",
+ "databricks": r"""SELECT id, t.type, t.scores FROM example_table LATERAL VIEW INLINE(ARRAYS_ZIP(SPLIT(type, CONCAT('\\Q', ';', '\\E')), scores)) t AS type, scores""",
+ "starrocks": r"""SELECT id, t.type, t.scores FROM example_table, UNNEST(SPLIT(type, ';'), scores) AS t(type, scores)""",
+ "hive": UnsupportedError,
+ },
+ )
+
+ self.validate_all(
+ r"""SELECT id, t.type, t.scores FROM example_table_2 CROSS JOIN LATERAL unnest(split(type, ";"), scores) AS t(type,scores)""",
+ write={
+ "spark": r"""SELECT id, t.type, t.scores FROM example_table_2 LATERAL VIEW INLINE(ARRAYS_ZIP(SPLIT(type, CONCAT('\\Q', ';', '\\E')), scores)) t AS type, scores""",
+ "starrocks": r"""SELECT id, t.type, t.scores FROM example_table_2 CROSS JOIN LATERAL UNNEST(SPLIT(type, ';'), scores) AS t(type, scores)""",
+ "hive": UnsupportedError,
+ },
+ )
+
+ lateral_explode_sqls = [
+ "SELECT id, t.col FROM tbl, UNNEST(scores) AS t(col)",
+ "SELECT id, t.col FROM tbl CROSS JOIN LATERAL UNNEST(scores) AS t(col)",
+ ]
+
+ for sql in lateral_explode_sqls:
+ with self.subTest(f"Testing Starrocks roundtrip & transpilation of: {sql}"):
+ self.validate_all(
+ sql,
+ write={
+ "starrocks": sql,
+ "spark": "SELECT id, t.col FROM tbl LATERAL VIEW EXPLODE(scores) t AS col",
+ },
+ )
diff --git a/tests/dialects/test_teradata.py b/tests/dialects/test_teradata.py
index 74d5f88..466f5d5 100644
--- a/tests/dialects/test_teradata.py
+++ b/tests/dialects/test_teradata.py
@@ -1,3 +1,4 @@
+from sqlglot import exp
from tests.dialects.test_dialect import Validator
@@ -5,6 +6,13 @@ class TestTeradata(Validator):
dialect = "teradata"
def test_teradata(self):
+ self.validate_all(
+ "RANDOM(l, u)",
+ write={
+ "": "(u - l) * RAND() + l",
+ "teradata": "RANDOM(l, u)",
+ },
+ )
self.validate_identity("TO_NUMBER(expr, fmt, nlsparam)")
self.validate_identity("SELECT TOP 10 * FROM tbl")
self.validate_identity("SELECT * FROM tbl SAMPLE 5")
@@ -24,6 +32,10 @@ class TestTeradata(Validator):
},
)
+ self.validate_identity(
+ "RENAME TABLE emp TO employee", check_command_warning=True
+ ).assert_is(exp.Command)
+
def test_translate(self):
self.validate_all(
"TRANSLATE(x USING LATIN_TO_UNICODE)",
@@ -143,6 +155,15 @@ class TestTeradata(Validator):
"tsql": "CREATE TABLE a",
},
)
+ self.validate_identity(
+ "CREATE TABLE db.foo (id INT NOT NULL, valid_date DATE FORMAT 'YYYY-MM-DD', measurement INT COMPRESS)"
+ )
+ self.validate_identity(
+ "CREATE TABLE db.foo (id INT NOT NULL, valid_date DATE FORMAT 'YYYY-MM-DD', measurement INT COMPRESS (1, 2, 3))"
+ )
+ self.validate_identity(
+ "CREATE TABLE db.foo (id INT NOT NULL, valid_date DATE FORMAT 'YYYY-MM-DD' COMPRESS (CAST('9999-09-09' AS DATE)), measurement INT)"
+ )
def test_insert(self):
self.validate_all(
@@ -212,6 +233,8 @@ class TestTeradata(Validator):
)
def test_time(self):
+ self.validate_identity("CAST(CURRENT_TIMESTAMP(6) AS TIMESTAMP WITH TIME ZONE)")
+
self.validate_all(
"CURRENT_TIMESTAMP",
read={
diff --git a/tests/dialects/test_trino.py b/tests/dialects/test_trino.py
index ccc1407..0ebe749 100644
--- a/tests/dialects/test_trino.py
+++ b/tests/dialects/test_trino.py
@@ -16,3 +16,35 @@ class TestTrino(Validator):
"SELECT TRIM('!foo!', '!')",
"SELECT TRIM('!' FROM '!foo!')",
)
+
+ def test_ddl(self):
+ self.validate_identity("ALTER TABLE users RENAME TO people")
+ self.validate_identity("ALTER TABLE IF EXISTS users RENAME TO people")
+ self.validate_identity("ALTER TABLE users ADD COLUMN zip VARCHAR")
+ self.validate_identity("ALTER TABLE IF EXISTS users ADD COLUMN IF NOT EXISTS zip VARCHAR")
+ self.validate_identity("ALTER TABLE users DROP COLUMN zip")
+ self.validate_identity("ALTER TABLE IF EXISTS users DROP COLUMN IF EXISTS zip")
+ self.validate_identity("ALTER TABLE users RENAME COLUMN id TO user_id")
+ self.validate_identity("ALTER TABLE IF EXISTS users RENAME COLUMN IF EXISTS id TO user_id")
+ self.validate_identity("ALTER TABLE users ALTER COLUMN id SET DATA TYPE BIGINT")
+ self.validate_identity("ALTER TABLE users ALTER COLUMN id DROP NOT NULL")
+ self.validate_identity(
+ "ALTER TABLE people SET AUTHORIZATION alice", check_command_warning=True
+ )
+ self.validate_identity(
+ "ALTER TABLE people SET AUTHORIZATION ROLE PUBLIC", check_command_warning=True
+ )
+ self.validate_identity(
+ "ALTER TABLE people SET PROPERTIES x = 'y'", check_command_warning=True
+ )
+ self.validate_identity(
+ "ALTER TABLE people SET PROPERTIES foo = 123, 'foo bar' = 456",
+ check_command_warning=True,
+ )
+ self.validate_identity(
+ "ALTER TABLE people SET PROPERTIES x = DEFAULT", check_command_warning=True
+ )
+ self.validate_identity("ALTER VIEW people RENAME TO users")
+ self.validate_identity(
+ "ALTER VIEW people SET AUTHORIZATION alice", check_command_warning=True
+ )
diff --git a/tests/dialects/test_tsql.py b/tests/dialects/test_tsql.py
index 7455650..453cd5a 100644
--- a/tests/dialects/test_tsql.py
+++ b/tests/dialects/test_tsql.py
@@ -1,4 +1,4 @@
-from sqlglot import exp, parse
+from sqlglot import exp, parse, parse_one
from tests.dialects.test_dialect import Validator
from sqlglot.errors import ParseError
from sqlglot.optimizer.annotate_types import annotate_types
@@ -8,19 +8,14 @@ class TestTSQL(Validator):
dialect = "tsql"
def test_tsql(self):
- self.assertEqual(
- annotate_types(self.validate_identity("SELECT 1 WHERE EXISTS(SELECT 1)")).sql("tsql"),
- "SELECT 1 WHERE EXISTS(SELECT 1)",
- )
+ # https://learn.microsoft.com/en-us/previous-versions/sql/sql-server-2008-r2/ms187879(v=sql.105)?redirectedfrom=MSDN
+ # tsql allows .. which means use the default schema
+ self.validate_identity("SELECT * FROM a..b")
self.validate_identity("CREATE view a.b.c", "CREATE VIEW b.c")
self.validate_identity("DROP view a.b.c", "DROP VIEW b.c")
self.validate_identity("ROUND(x, 1, 0)")
self.validate_identity("EXEC MyProc @id=7, @name='Lochristi'", check_command_warning=True)
- # https://learn.microsoft.com/en-us/previous-versions/sql/sql-server-2008-r2/ms187879(v=sql.105)?redirectedfrom=MSDN
- # tsql allows .. which means use the default schema
- self.validate_identity("SELECT * FROM a..b")
-
self.validate_identity("SELECT TRIM(' test ') AS Result")
self.validate_identity("SELECT TRIM('.,! ' FROM ' # test .') AS Result")
self.validate_identity("SELECT * FROM t TABLESAMPLE (10 PERCENT)")
@@ -37,10 +32,36 @@ class TestTSQL(Validator):
self.validate_identity("CAST(x AS int) OR y", "CAST(x AS INTEGER) <> 0 OR y <> 0")
self.validate_identity("TRUNCATE TABLE t1 WITH (PARTITIONS(1, 2 TO 5, 10 TO 20, 84))")
self.validate_identity(
+ "SELECT TOP 10 s.RECORDID, n.c.value('(/*:FORM_ROOT/*:SOME_TAG)[1]', 'float') AS SOME_TAG_VALUE FROM source_table.dbo.source_data AS s(nolock) CROSS APPLY FormContent.nodes('/*:FORM_ROOT') AS N(C)"
+ )
+ self.validate_identity(
+ "CREATE CLUSTERED INDEX [IX_OfficeTagDetail_TagDetailID] ON [dbo].[OfficeTagDetail]([TagDetailID] ASC)"
+ )
+ self.validate_identity(
+ "CREATE INDEX [x] ON [y]([z] ASC) WITH (allow_page_locks=on) ON X([y])"
+ )
+ self.validate_identity(
+ "CREATE INDEX [x] ON [y]([z] ASC) WITH (allow_page_locks=on) ON PRIMARY"
+ )
+ self.validate_identity(
"COPY INTO test_1 FROM 'path' WITH (FORMAT_NAME = test, FILE_TYPE = 'CSV', CREDENTIAL = (IDENTITY='Shared Access Signature', SECRET='token'), FIELDTERMINATOR = ';', ROWTERMINATOR = '0X0A', ENCODING = 'UTF8', DATEFORMAT = 'ymd', MAXERRORS = 10, ERRORFILE = 'errorsfolder', IDENTITY_INSERT = 'ON')"
)
+ self.assertEqual(
+ annotate_types(self.validate_identity("SELECT 1 WHERE EXISTS(SELECT 1)")).sql("tsql"),
+ "SELECT 1 WHERE EXISTS(SELECT 1)",
+ )
self.validate_all(
+ "WITH A AS (SELECT 2 AS value), C AS (SELECT * FROM A) SELECT * INTO TEMP_NESTED_WITH FROM (SELECT * FROM C) AS temp",
+ read={
+ "snowflake": "CREATE TABLE TEMP_NESTED_WITH AS WITH C AS (WITH A AS (SELECT 2 AS value) SELECT * FROM A) SELECT * FROM C",
+ "tsql": "WITH A AS (SELECT 2 AS value), C AS (SELECT * FROM A) SELECT * INTO TEMP_NESTED_WITH FROM (SELECT * FROM C) AS temp",
+ },
+ write={
+ "snowflake": "CREATE TABLE TEMP_NESTED_WITH AS WITH A AS (SELECT 2 AS value), C AS (SELECT * FROM A) SELECT * FROM (SELECT * FROM C) AS temp",
+ },
+ )
+ self.validate_all(
"SELECT IIF(cond <> 0, 'True', 'False')",
read={
"spark": "SELECT IF(cond, 'True', 'False')",
@@ -383,6 +404,28 @@ class TestTSQL(Validator):
self.validate_identity("HASHBYTES('MD2', 'x')")
self.validate_identity("LOG(n, b)")
+ self.validate_all(
+ "STDEV(x)",
+ read={
+ "": "STDDEV(x)",
+ },
+ write={
+ "": "STDDEV(x)",
+ "tsql": "STDEV(x)",
+ },
+ )
+
+ # Check that TRUE and FALSE dont get expanded to (1=1) or (1=0) when used in a VALUES expression
+ self.validate_identity(
+ "SELECT val FROM (VALUES ((TRUE), (FALSE), (NULL))) AS t(val)",
+ write_sql="SELECT val FROM (VALUES ((1), (0), (NULL))) AS t(val)",
+ )
+ self.validate_identity("'a' + 'b'")
+ self.validate_identity(
+ "'a' || 'b'",
+ "'a' + 'b'",
+ )
+
def test_option(self):
possible_options = [
"HASH GROUP",
@@ -763,7 +806,7 @@ class TestTSQL(Validator):
self.validate_identity(f"CREATE VIEW a.b WITH {view_attr} AS SELECT * FROM x")
self.validate_identity("ALTER TABLE dbo.DocExe DROP CONSTRAINT FK_Column_B").assert_is(
- exp.AlterTable
+ exp.Alter
).args["actions"][0].assert_is(exp.Drop)
for clustered_keyword in ("CLUSTERED", "NONCLUSTERED"):
@@ -778,6 +821,7 @@ class TestTSQL(Validator):
f"UNIQUE {clustered_keyword} ([internal_id] ASC))",
)
+ self.validate_identity("CREATE VIEW t AS WITH cte AS (SELECT 1 AS c) SELECT c FROM cte")
self.validate_identity(
"ALTER TABLE tbl SET SYSTEM_VERSIONING=ON(HISTORY_TABLE=db.tbl, DATA_CONSISTENCY_CHECK=OFF, HISTORY_RETENTION_PERIOD=5 DAYS)"
)
@@ -792,6 +836,20 @@ class TestTSQL(Validator):
self.validate_identity("ALTER TABLE tbl SET DATA_DELETION=ON")
self.validate_identity("ALTER TABLE tbl SET DATA_DELETION=OFF")
+ self.validate_identity("ALTER VIEW v AS SELECT a, b, c, d FROM foo")
+ self.validate_identity("ALTER VIEW v AS SELECT * FROM foo WHERE c > 100")
+ self.validate_identity(
+ "ALTER VIEW v WITH SCHEMABINDING AS SELECT * FROM foo WHERE c > 100",
+ check_command_warning=True,
+ )
+ self.validate_identity(
+ "ALTER VIEW v WITH ENCRYPTION AS SELECT * FROM foo WHERE c > 100",
+ check_command_warning=True,
+ )
+ self.validate_identity(
+ "ALTER VIEW v WITH VIEW_METADATA AS SELECT * FROM foo WHERE c > 100",
+ check_command_warning=True,
+ )
self.validate_identity(
"CREATE PROCEDURE foo AS BEGIN DELETE FROM bla WHERE foo < CURRENT_TIMESTAMP - 7 END",
"CREATE PROCEDURE foo AS BEGIN DELETE FROM bla WHERE foo < GETDATE() - 7 END",
@@ -880,6 +938,14 @@ class TestTSQL(Validator):
},
)
+ for colstore in ("NONCLUSTERED COLUMNSTORE", "CLUSTERED COLUMNSTORE"):
+ self.validate_identity(f"CREATE {colstore} INDEX index_name ON foo.bar")
+
+ self.validate_identity(
+ "CREATE COLUMNSTORE INDEX index_name ON foo.bar",
+ "CREATE NONCLUSTERED COLUMNSTORE INDEX index_name ON foo.bar",
+ )
+
def test_insert_cte(self):
self.validate_all(
"INSERT INTO foo.bar WITH cte AS (SELECT 1 AS one) SELECT * FROM cte",
@@ -1108,6 +1174,11 @@ WHERE
self.validate_all("ISNULL(x, y)", write={"spark": "COALESCE(x, y)"})
def test_json(self):
+ self.validate_identity(
+ """JSON_QUERY(REPLACE(REPLACE(x , '''', '"'), '""', '"'))""",
+ """ISNULL(JSON_QUERY(REPLACE(REPLACE(x, '''', '"'), '""', '"'), '$'), JSON_VALUE(REPLACE(REPLACE(x, '''', '"'), '""', '"'), '$'))""",
+ )
+
self.validate_all(
"JSON_QUERY(r.JSON, '$.Attr_INT')",
write={
@@ -1470,6 +1541,15 @@ WHERE
},
)
+ # Check superfluous casts arent added. ref: https://github.com/TobikoData/sqlmesh/issues/2672
+ self.validate_all(
+ "SELECT DATEDIFF(DAY, CAST(a AS DATETIME2), CAST(b AS DATETIME2)) AS x FROM foo",
+ write={
+ "tsql": "SELECT DATEDIFF(DAY, CAST(a AS DATETIME2), CAST(b AS DATETIME2)) AS x FROM foo",
+ "clickhouse": "SELECT DATE_DIFF(DAY, CAST(a AS Nullable(DateTime)), CAST(b AS Nullable(DateTime))) AS x FROM foo",
+ },
+ )
+
def test_lateral_subquery(self):
self.validate_all(
"SELECT x.a, x.b, t.v, t.y FROM x CROSS APPLY (SELECT v, y FROM t) t(v, y)",
@@ -1511,8 +1591,8 @@ WHERE
self.validate_all(
"SELECT t.x, y.z FROM x OUTER APPLY a.b.tvfTest(t.x)y(z)",
write={
- "spark": "SELECT t.x, y.z FROM x LEFT JOIN LATERAL a.b.TVFTEST(t.x) AS y(z)",
- "tsql": "SELECT t.x, y.z FROM x OUTER APPLY a.b.TVFTEST(t.x) AS y(z)",
+ "spark": "SELECT t.x, y.z FROM x LEFT JOIN LATERAL a.b.tvfTest(t.x) AS y(z)",
+ "tsql": "SELECT t.x, y.z FROM x OUTER APPLY a.b.tvfTest(t.x) AS y(z)",
},
)
@@ -1607,7 +1687,7 @@ WHERE
},
write={
"bigquery": "LAST_DAY(CAST(CURRENT_TIMESTAMP() AS DATE))",
- "clickhouse": "LAST_DAY(CAST(CURRENT_TIMESTAMP() AS DATE))",
+ "clickhouse": "LAST_DAY(CAST(CURRENT_TIMESTAMP() AS Nullable(DATE)))",
"duckdb": "LAST_DAY(CAST(CURRENT_TIMESTAMP AS DATE))",
"mysql": "LAST_DAY(DATE(CURRENT_TIMESTAMP()))",
"postgres": "CAST(DATE_TRUNC('MONTH', CAST(CURRENT_TIMESTAMP AS DATE)) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)",
@@ -1622,11 +1702,11 @@ WHERE
"EOMONTH(GETDATE(), -1)",
write={
"bigquery": "LAST_DAY(DATE_ADD(CAST(CURRENT_TIMESTAMP() AS DATE), INTERVAL -1 MONTH))",
- "clickhouse": "LAST_DAY(DATE_ADD(MONTH, -1, CAST(CURRENT_TIMESTAMP() AS DATE)))",
+ "clickhouse": "LAST_DAY(DATE_ADD(MONTH, -1, CAST(CURRENT_TIMESTAMP() AS Nullable(DATE))))",
"duckdb": "LAST_DAY(CAST(CURRENT_TIMESTAMP AS DATE) + INTERVAL (-1) MONTH)",
"mysql": "LAST_DAY(DATE_ADD(CURRENT_TIMESTAMP(), INTERVAL -1 MONTH))",
"postgres": "CAST(DATE_TRUNC('MONTH', CAST(CURRENT_TIMESTAMP AS DATE) + INTERVAL '-1 MONTH') + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)",
- "presto": "LAST_DAY_OF_MONTH(DATE_ADD('MONTH', CAST(-1 AS BIGINT), CAST(CAST(CURRENT_TIMESTAMP AS TIMESTAMP) AS DATE)))",
+ "presto": "LAST_DAY_OF_MONTH(DATE_ADD('MONTH', -1, CAST(CAST(CURRENT_TIMESTAMP AS TIMESTAMP) AS DATE)))",
"redshift": "LAST_DAY(DATEADD(MONTH, -1, CAST(GETDATE() AS DATE)))",
"snowflake": "LAST_DAY(DATEADD(MONTH, -1, TO_DATE(CURRENT_TIMESTAMP())))",
"spark": "LAST_DAY(ADD_MONTHS(TO_DATE(CURRENT_TIMESTAMP()), -1))",
@@ -1868,3 +1948,60 @@ FROM OPENJSON(@json) WITH (
"DECLARE vendor_cursor CURSOR FOR SELECT VendorID, Name FROM Purchasing.Vendor WHERE PreferredVendorStatus = 1 ORDER BY VendorID",
check_command_warning=True,
)
+
+ def test_scope_resolution_op(self):
+ # we still want to support :: casting shorthand for tsql
+ self.validate_identity("x::int", "CAST(x AS INTEGER)")
+ self.validate_identity("x::varchar", "CAST(x AS VARCHAR)")
+ self.validate_identity("x::varchar(MAX)", "CAST(x AS VARCHAR(MAX))")
+
+ for lhs, rhs in (
+ ("", "FOO(a, b)"),
+ ("bar", "baZ(1, 2)"),
+ ("LOGIN", "EricKurjan"),
+ ("GEOGRAPHY", "Point(latitude, longitude, 4326)"),
+ (
+ "GEOGRAPHY",
+ "STGeomFromText('POLYGON((-122.358 47.653 , -122.348 47.649, -122.348 47.658, -122.358 47.658, -122.358 47.653))', 4326)",
+ ),
+ ):
+ with self.subTest(f"Scope resolution, LHS: {lhs}, RHS: {rhs}"):
+ expr = self.validate_identity(f"{lhs}::{rhs}")
+ base_sql = expr.sql()
+ self.assertEqual(base_sql, f"SCOPE_RESOLUTION({lhs + ', ' if lhs else ''}{rhs})")
+ self.assertEqual(parse_one(base_sql).sql("tsql"), f"{lhs}::{rhs}")
+
+ def test_count(self):
+ count = annotate_types(self.validate_identity("SELECT COUNT(1) FROM x"))
+ self.assertEqual(count.expressions[0].type.this, exp.DataType.Type.INT)
+
+ count_big = annotate_types(self.validate_identity("SELECT COUNT_BIG(1) FROM x"))
+ self.assertEqual(count_big.expressions[0].type.this, exp.DataType.Type.BIGINT)
+
+ self.validate_all(
+ "SELECT COUNT_BIG(1) FROM x",
+ read={
+ "duckdb": "SELECT COUNT(1) FROM x",
+ "spark": "SELECT COUNT(1) FROM x",
+ },
+ write={
+ "duckdb": "SELECT COUNT(1) FROM x",
+ "spark": "SELECT COUNT(1) FROM x",
+ "tsql": "SELECT COUNT_BIG(1) FROM x",
+ },
+ )
+ self.validate_all(
+ "SELECT COUNT(1) FROM x",
+ write={
+ "duckdb": "SELECT COUNT(1) FROM x",
+ "spark": "SELECT COUNT(1) FROM x",
+ "tsql": "SELECT COUNT(1) FROM x",
+ },
+ )
+
+ def test_grant(self):
+ self.validate_identity("GRANT EXECUTE ON TestProc TO User2")
+ self.validate_identity("GRANT EXECUTE ON TestProc TO TesterRole WITH GRANT OPTION")
+ self.validate_identity(
+ "GRANT EXECUTE ON TestProc TO User2 AS TesterRole", check_command_warning=True
+ )