summaryrefslogtreecommitdiffstats
path: root/tests
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--tests/dialects/test_athena.py211
-rw-r--r--tests/dialects/test_bigquery.py349
-rw-r--r--tests/dialects/test_clickhouse.py348
-rw-r--r--tests/dialects/test_databricks.py79
-rw-r--r--tests/dialects/test_dialect.py408
-rw-r--r--tests/dialects/test_doris.py28
-rw-r--r--tests/dialects/test_duckdb.py235
-rw-r--r--tests/dialects/test_hive.py35
-rw-r--r--tests/dialects/test_mysql.py125
-rw-r--r--tests/dialects/test_oracle.py227
-rw-r--r--tests/dialects/test_postgres.py179
-rw-r--r--tests/dialects/test_presto.py126
-rw-r--r--tests/dialects/test_prql.py186
-rw-r--r--tests/dialects/test_redshift.py71
-rw-r--r--tests/dialects/test_snowflake.py245
-rw-r--r--tests/dialects/test_spark.py81
-rw-r--r--tests/dialects/test_sqlite.py4
-rw-r--r--tests/dialects/test_starrocks.py87
-rw-r--r--tests/dialects/test_teradata.py23
-rw-r--r--tests/dialects/test_trino.py32
-rw-r--r--tests/dialects/test_tsql.py167
-rw-r--r--tests/fixtures/identity.sql31
-rw-r--r--tests/fixtures/optimizer/annotate_types.sql33
-rw-r--r--tests/fixtures/optimizer/canonicalize.sql4
-rw-r--r--tests/fixtures/optimizer/optimizer.sql46
-rw-r--r--tests/fixtures/optimizer/qualify_columns.sql61
-rw-r--r--tests/fixtures/optimizer/qualify_tables.sql7
-rw-r--r--tests/fixtures/optimizer/simplify.sql27
-rw-r--r--tests/fixtures/optimizer/tpc-ds/tpc-ds.sql180
-rw-r--r--tests/fixtures/optimizer/tpc-h/tpc-h.sql14
-rw-r--r--tests/fixtures/pretty.sql39
-rw-r--r--tests/test_build.py76
-rw-r--r--tests/test_diff.py58
-rw-r--r--tests/test_executor.py17
-rw-r--r--tests/test_expressions.py94
-rw-r--r--tests/test_generator.py4
-rw-r--r--tests/test_jsonpath.py11
-rw-r--r--tests/test_optimizer.py201
-rw-r--r--tests/test_parser.py117
-rw-r--r--tests/test_schema.py4
-rw-r--r--tests/test_time.py22
-rw-r--r--tests/test_transforms.py82
-rw-r--r--tests/test_transpile.py39
43 files changed, 3669 insertions, 744 deletions
diff --git a/tests/dialects/test_athena.py b/tests/dialects/test_athena.py
index 3288ada..ca91d4a 100644
--- a/tests/dialects/test_athena.py
+++ b/tests/dialects/test_athena.py
@@ -1,3 +1,4 @@
+from sqlglot import exp
from tests.dialects.test_dialect import Validator
@@ -7,6 +8,15 @@ class TestAthena(Validator):
def test_athena(self):
self.validate_identity(
+ "CREATE TABLE IF NOT EXISTS t (name STRING) LOCATION 's3://bucket/tmp/mytable/' TBLPROPERTIES ('table_type'='iceberg', 'FORMAT'='parquet')"
+ )
+ self.validate_identity(
+ "UNLOAD (SELECT name1, address1, comment1, key1 FROM table1) "
+ "TO 's3://amzn-s3-demo-bucket/ partitioned/' "
+ "WITH (format = 'TEXTFILE', partitioned_by = ARRAY['key1'])",
+ check_command_warning=True,
+ )
+ self.validate_identity(
"""USING EXTERNAL FUNCTION some_function(input VARBINARY)
RETURNS VARCHAR
LAMBDA 'some-name'
@@ -16,5 +26,204 @@ class TestAthena(Validator):
)
self.validate_identity(
- "CREATE TABLE IF NOT EXISTS t (name STRING) LOCATION 's3://bucket/tmp/mytable/' TBLPROPERTIES ('table_type'='iceberg', 'FORMAT'='parquet')"
+ "/* leading comment */CREATE SCHEMA foo",
+ write_sql="/* leading comment */ CREATE SCHEMA `foo`",
+ identify=True,
+ )
+ self.validate_identity(
+ "/* leading comment */SELECT * FROM foo",
+ write_sql='/* leading comment */ SELECT * FROM "foo"',
+ identify=True,
+ )
+
+ def test_ddl(self):
+ # Hive-like, https://docs.aws.amazon.com/athena/latest/ug/create-table.html
+ self.validate_identity("CREATE EXTERNAL TABLE foo (id INT) COMMENT 'test comment'")
+ self.validate_identity(
+ "CREATE EXTERNAL TABLE foo (id INT, val STRING) CLUSTERED BY (id, val) INTO 10 BUCKETS"
+ )
+ self.validate_identity(
+ "CREATE EXTERNAL TABLE foo (id INT, val STRING) STORED AS PARQUET LOCATION 's3://foo' TBLPROPERTIES ('has_encryped_data'='true', 'classification'='test')"
+ )
+ self.validate_identity(
+ "CREATE EXTERNAL TABLE IF NOT EXISTS foo (a INT, b STRING) ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe' WITH SERDEPROPERTIES ('case.insensitive'='FALSE') LOCATION 's3://table/path'"
+ )
+ self.validate_identity(
+ """CREATE EXTERNAL TABLE x (y INT) ROW FORMAT SERDE 'serde' ROW FORMAT DELIMITED FIELDS TERMINATED BY '1' WITH SERDEPROPERTIES ('input.regex'='')""",
+ )
+ self.validate_identity(
+ """CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY<DATE>) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')"""
+ )
+
+ # Iceberg, https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html
+ self.validate_identity(
+ "CREATE TABLE iceberg_table (`id` BIGINT, `data` STRING, category STRING) PARTITIONED BY (category, BUCKET(16, id)) LOCATION 's3://amzn-s3-demo-bucket/your-folder/' TBLPROPERTIES ('table_type'='ICEBERG', 'write_compression'='snappy')"
+ )
+
+ # CTAS goes to the Trino engine, where the table properties cant be encased in single quotes like they can for Hive
+ # ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties
+ self.validate_identity(
+ "CREATE TABLE foo WITH (table_type='ICEBERG', external_location='s3://foo/') AS SELECT * FROM a"
+ )
+ self.validate_identity(
+ "CREATE TABLE foo AS WITH foo AS (SELECT a, b FROM bar) SELECT * FROM foo"
+ )
+
+ # ALTER TABLE ADD COLUMN not supported, it needs to be generated as ALTER TABLE ADD COLUMNS
+ self.validate_identity(
+ "ALTER TABLE `foo`.`bar` ADD COLUMN `end_ts` BIGINT",
+ write_sql="ALTER TABLE `foo`.`bar` ADD COLUMNS (`end_ts` BIGINT)",
+ )
+
+ def test_dml(self):
+ self.validate_all(
+ "SELECT CAST(ds AS VARCHAR) AS ds FROM (VALUES ('2022-01-01')) AS t(ds)",
+ read={"": "SELECT CAST(ds AS STRING) AS ds FROM (VALUES ('2022-01-01')) AS t(ds)"},
+ write={
+ "hive": "SELECT CAST(ds AS STRING) AS ds FROM (VALUES ('2022-01-01')) AS t(ds)",
+ "trino": "SELECT CAST(ds AS VARCHAR) AS ds FROM (VALUES ('2022-01-01')) AS t(ds)",
+ "athena": "SELECT CAST(ds AS VARCHAR) AS ds FROM (VALUES ('2022-01-01')) AS t(ds)",
+ },
+ )
+
+ def test_ddl_quoting(self):
+ self.validate_identity("CREATE SCHEMA `foo`")
+ self.validate_identity("CREATE SCHEMA foo")
+
+ self.validate_identity("CREATE EXTERNAL TABLE `foo` (`id` INT) LOCATION 's3://foo/'")
+ self.validate_identity("CREATE EXTERNAL TABLE foo (id INT) LOCATION 's3://foo/'")
+ self.validate_identity(
+ "CREATE EXTERNAL TABLE foo (id INT) LOCATION 's3://foo/'",
+ write_sql="CREATE EXTERNAL TABLE `foo` (`id` INT) LOCATION 's3://foo/'",
+ identify=True,
+ )
+
+ self.validate_identity("CREATE TABLE foo AS SELECT * FROM a")
+ self.validate_identity('CREATE TABLE "foo" AS SELECT * FROM "a"')
+ self.validate_identity(
+ "CREATE TABLE `foo` AS SELECT * FROM `a`",
+ write_sql='CREATE TABLE "foo" AS SELECT * FROM "a"',
+ identify=True,
+ )
+
+ self.validate_identity("DROP TABLE `foo`")
+ self.validate_identity("DROP TABLE foo")
+ self.validate_identity("DROP TABLE foo", write_sql="DROP TABLE `foo`", identify=True)
+
+ self.validate_identity('CREATE VIEW "foo" AS SELECT "id" FROM "tbl"')
+ self.validate_identity("CREATE VIEW foo AS SELECT id FROM tbl")
+ self.validate_identity(
+ "CREATE VIEW foo AS SELECT id FROM tbl",
+ write_sql='CREATE VIEW "foo" AS SELECT "id" FROM "tbl"',
+ identify=True,
+ )
+
+ # As a side effect of being able to parse both quote types, we can also fix the quoting on incorrectly quoted source queries
+ self.validate_identity('CREATE SCHEMA "foo"', write_sql="CREATE SCHEMA `foo`")
+ self.validate_identity(
+ 'CREATE EXTERNAL TABLE "foo" ("id" INT) LOCATION \'s3://foo/\'',
+ write_sql="CREATE EXTERNAL TABLE `foo` (`id` INT) LOCATION 's3://foo/'",
+ )
+ self.validate_identity('DROP TABLE "foo"', write_sql="DROP TABLE `foo`")
+ self.validate_identity(
+ 'CREATE VIEW `foo` AS SELECT "id" FROM `tbl`',
+ write_sql='CREATE VIEW "foo" AS SELECT "id" FROM "tbl"',
+ )
+ self.validate_identity(
+ "DROP VIEW IF EXISTS `foo`.`bar`",
+ write_sql='DROP VIEW IF EXISTS "foo"."bar"',
+ )
+
+ self.validate_identity(
+ 'ALTER TABLE "foo" ADD COLUMNS ("id" STRING)',
+ write_sql="ALTER TABLE `foo` ADD COLUMNS (`id` STRING)",
+ )
+ self.validate_identity(
+ 'ALTER TABLE "foo" DROP COLUMN "id"', write_sql="ALTER TABLE `foo` DROP COLUMN `id`"
+ )
+
+ self.validate_identity(
+ 'CREATE TABLE "foo" AS WITH "foo" AS (SELECT "a", "b" FROM "bar") SELECT * FROM "foo"'
+ )
+ self.validate_identity(
+ 'CREATE TABLE `foo` AS WITH `foo` AS (SELECT "a", `b` FROM "bar") SELECT * FROM "foo"',
+ write_sql='CREATE TABLE "foo" AS WITH "foo" AS (SELECT "a", "b" FROM "bar") SELECT * FROM "foo"',
+ )
+
+ self.validate_identity("DESCRIBE foo.bar", write_sql="DESCRIBE `foo`.`bar`", identify=True)
+
+ def test_dml_quoting(self):
+ self.validate_identity("SELECT a AS foo FROM tbl")
+ self.validate_identity('SELECT "a" AS "foo" FROM "tbl"')
+ self.validate_identity(
+ 'SELECT `a` AS `foo` FROM "tbl"',
+ write_sql='SELECT "a" AS "foo" FROM "tbl"',
+ identify=True,
+ )
+
+ self.validate_identity("INSERT INTO foo (id) VALUES (1)")
+ self.validate_identity('INSERT INTO "foo" ("id") VALUES (1)')
+ self.validate_identity(
+ 'INSERT INTO `foo` ("id") VALUES (1)',
+ write_sql='INSERT INTO "foo" ("id") VALUES (1)',
+ identify=True,
+ )
+
+ self.validate_identity("UPDATE foo SET id = 3 WHERE id = 7")
+ self.validate_identity('UPDATE "foo" SET "id" = 3 WHERE "id" = 7')
+ self.validate_identity(
+ 'UPDATE `foo` SET "id" = 3 WHERE `id` = 7',
+ write_sql='UPDATE "foo" SET "id" = 3 WHERE "id" = 7',
+ identify=True,
+ )
+
+ self.validate_identity("DELETE FROM foo WHERE id > 10")
+ self.validate_identity('DELETE FROM "foo" WHERE "id" > 10')
+ self.validate_identity(
+ "DELETE FROM `foo` WHERE `id` > 10",
+ write_sql='DELETE FROM "foo" WHERE "id" > 10',
+ identify=True,
+ )
+
+ self.validate_identity("WITH foo AS (SELECT a, b FROM bar) SELECT * FROM foo")
+ self.validate_identity(
+ "WITH foo AS (SELECT a, b FROM bar) SELECT * FROM foo",
+ write_sql='WITH "foo" AS (SELECT "a", "b" FROM "bar") SELECT * FROM "foo"',
+ identify=True,
+ )
+
+ def test_ctas(self):
+ # Hive tables use 'external_location' to specify the table location, Iceberg tables use 'location' to specify the table location
+ # The 'table_type' property is used to determine if it's a Hive or an Iceberg table
+ # ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties
+ ctas_hive = exp.Create(
+ this=exp.to_table("foo.bar"),
+ kind="TABLE",
+ properties=exp.Properties(
+ expressions=[
+ exp.FileFormatProperty(this=exp.Literal.string("parquet")),
+ exp.LocationProperty(this=exp.Literal.string("s3://foo")),
+ ]
+ ),
+ expression=exp.select("1"),
+ )
+ self.assertEqual(
+ ctas_hive.sql(dialect=self.dialect, identify=True),
+ "CREATE TABLE \"foo\".\"bar\" WITH (format='parquet', external_location='s3://foo') AS SELECT 1",
+ )
+
+ ctas_iceberg = exp.Create(
+ this=exp.to_table("foo.bar"),
+ kind="TABLE",
+ properties=exp.Properties(
+ expressions=[
+ exp.Property(this=exp.var("table_type"), value=exp.Literal.string("iceberg")),
+ exp.LocationProperty(this=exp.Literal.string("s3://foo")),
+ ]
+ ),
+ expression=exp.select("1"),
+ )
+ self.assertEqual(
+ ctas_iceberg.sql(dialect=self.dialect, identify=True),
+ "CREATE TABLE \"foo\".\"bar\" WITH (table_type='iceberg', location='s3://foo') AS SELECT 1",
)
diff --git a/tests/dialects/test_bigquery.py b/tests/dialects/test_bigquery.py
index ae8ed16..e2adfea 100644
--- a/tests/dialects/test_bigquery.py
+++ b/tests/dialects/test_bigquery.py
@@ -1,4 +1,6 @@
from unittest import mock
+import datetime
+import pytz
from sqlglot import (
ErrorLevel,
@@ -20,6 +22,8 @@ class TestBigQuery(Validator):
maxDiff = None
def test_bigquery(self):
+ self.validate_identity("REGEXP_EXTRACT(x, '(?<)')")
+
self.validate_all(
"EXTRACT(HOUR FROM DATETIME(2008, 12, 25, 15, 30, 00))",
write={
@@ -103,8 +107,9 @@ LANGUAGE js AS
select_with_quoted_udf = self.validate_identity("SELECT `p.d.UdF`(data) FROM `p.d.t`")
self.assertEqual(select_with_quoted_udf.selects[0].name, "p.d.UdF")
+ self.validate_identity("SELECT * FROM READ_CSV('bla.csv')")
+ self.validate_identity("CAST(x AS STRUCT<list ARRAY<INT64>>)")
self.validate_identity("assert.true(1 = 1)")
- self.validate_identity("SELECT ARRAY_TO_STRING(list, '--') AS text")
self.validate_identity("SELECT jsondoc['some_key']")
self.validate_identity("SELECT `p.d.UdF`(data).* FROM `p.d.t`")
self.validate_identity("SELECT * FROM `my-project.my-dataset.my-table`")
@@ -137,7 +142,6 @@ LANGUAGE js AS
self.validate_identity("SELECT CAST(CURRENT_DATE AS STRING FORMAT 'DAY') AS current_day")
self.validate_identity("SAFE_CAST(encrypted_value AS STRING FORMAT 'BASE64')")
self.validate_identity("CAST(encrypted_value AS STRING FORMAT 'BASE64')")
- self.validate_identity("CAST(STRUCT<a INT64>(1) AS STRUCT<a INT64>)")
self.validate_identity("STRING_AGG(a)")
self.validate_identity("STRING_AGG(a, ' & ')")
self.validate_identity("STRING_AGG(DISTINCT a, ' & ')")
@@ -161,12 +165,9 @@ LANGUAGE js AS
self.validate_identity("x <> ''")
self.validate_identity("DATE_TRUNC(col, WEEK(MONDAY))")
self.validate_identity("SELECT b'abc'")
- self.validate_identity("""SELECT * FROM UNNEST(ARRAY<STRUCT<x INT64>>[])""")
self.validate_identity("SELECT AS STRUCT 1 AS a, 2 AS b")
self.validate_identity("SELECT DISTINCT AS STRUCT 1 AS a, 2 AS b")
self.validate_identity("SELECT AS VALUE STRUCT(1 AS a, 2 AS b)")
- self.validate_identity("SELECT STRUCT<ARRAY<STRING>>(['2023-01-17'])")
- self.validate_identity("SELECT STRUCT<STRING>((SELECT a FROM b.c LIMIT 1)).*")
self.validate_identity("SELECT * FROM q UNPIVOT(values FOR quarter IN (b, c))")
self.validate_identity("""CREATE TABLE x (a STRUCT<values ARRAY<INT64>>)""")
self.validate_identity("""CREATE TABLE x (a STRUCT<b STRING OPTIONS (description='b')>)""")
@@ -193,6 +194,9 @@ LANGUAGE js AS
self.validate_identity("CAST(x AS TIMESTAMPTZ)", "CAST(x AS TIMESTAMP)")
self.validate_identity("CAST(x AS RECORD)", "CAST(x AS STRUCT)")
self.validate_identity(
+ "MERGE INTO dataset.NewArrivals USING (SELECT * FROM UNNEST([('microwave', 10, 'warehouse #1'), ('dryer', 30, 'warehouse #1'), ('oven', 20, 'warehouse #2')])) ON FALSE WHEN NOT MATCHED THEN INSERT ROW WHEN NOT MATCHED BY SOURCE THEN DELETE"
+ )
+ self.validate_identity(
"SELECT * FROM `SOME_PROJECT_ID.SOME_DATASET_ID.INFORMATION_SCHEMA.SOME_VIEW`"
)
self.validate_identity(
@@ -292,8 +296,26 @@ LANGUAGE js AS
r"REGEXP_EXTRACT(svc_plugin_output, r'\\\((.*)')",
r"REGEXP_EXTRACT(svc_plugin_output, '\\\\\\((.*)')",
)
+ self.validate_identity(
+ "SELECT CAST(1 AS BYTEINT)",
+ "SELECT CAST(1 AS INT64)",
+ )
self.validate_all(
+ "SAFE_CAST(some_date AS DATE FORMAT 'DD MONTH YYYY')",
+ write={
+ "bigquery": "SAFE_CAST(some_date AS DATE FORMAT 'DD MONTH YYYY')",
+ "duckdb": "CAST(TRY_STRPTIME(some_date, '%d %B %Y') AS DATE)",
+ },
+ )
+ self.validate_all(
+ "SAFE_CAST(some_date AS DATE FORMAT 'YYYY-MM-DD') AS some_date",
+ write={
+ "bigquery": "SAFE_CAST(some_date AS DATE FORMAT 'YYYY-MM-DD') AS some_date",
+ "duckdb": "CAST(TRY_STRPTIME(some_date, '%Y-%m-%d') AS DATE) AS some_date",
+ },
+ )
+ self.validate_all(
"SELECT t.c1, h.c2, s.c3 FROM t1 AS t, UNNEST(t.t2) AS h, UNNEST(h.t3) AS s",
write={
"bigquery": "SELECT t.c1, h.c2, s.c3 FROM t1 AS t, UNNEST(t.t2) AS h, UNNEST(h.t3) AS s",
@@ -352,7 +374,7 @@ LANGUAGE js AS
},
write={
"bigquery": "SELECT SUM(x IGNORE NULLS) AS x",
- "duckdb": "SELECT SUM(x IGNORE NULLS) AS x",
+ "duckdb": "SELECT SUM(x) AS x",
"postgres": "SELECT SUM(x) IGNORE NULLS AS x",
"spark": "SELECT SUM(x) IGNORE NULLS AS x",
"snowflake": "SELECT SUM(x) IGNORE NULLS AS x",
@@ -387,7 +409,7 @@ LANGUAGE js AS
"SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a, b DESC LIMIT 10) AS x",
write={
"bigquery": "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a, b DESC LIMIT 10) AS x",
- "duckdb": "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a NULLS FIRST, b DESC LIMIT 10) AS x",
+ "duckdb": "SELECT ARRAY_AGG(DISTINCT x ORDER BY a NULLS FIRST, b DESC LIMIT 10) AS x",
"spark": "SELECT COLLECT_LIST(DISTINCT x ORDER BY a, b DESC LIMIT 10) IGNORE NULLS AS x",
},
)
@@ -395,7 +417,7 @@ LANGUAGE js AS
"SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a, b DESC LIMIT 1, 10) AS x",
write={
"bigquery": "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a, b DESC LIMIT 1, 10) AS x",
- "duckdb": "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a NULLS FIRST, b DESC LIMIT 1, 10) AS x",
+ "duckdb": "SELECT ARRAY_AGG(DISTINCT x ORDER BY a NULLS FIRST, b DESC LIMIT 1, 10) AS x",
"spark": "SELECT COLLECT_LIST(DISTINCT x ORDER BY a, b DESC LIMIT 1, 10) IGNORE NULLS AS x",
},
)
@@ -431,7 +453,7 @@ LANGUAGE js AS
write={
"bigquery": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE), MONTH)",
"duckdb": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))",
- "clickhouse": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))",
+ "clickhouse": "SELECT LAST_DAY(CAST('2008-11-25' AS Nullable(DATE)))",
"mysql": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))",
"oracle": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))",
"postgres": "SELECT CAST(DATE_TRUNC('MONTH', CAST('2008-11-25' AS DATE)) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)",
@@ -496,6 +518,20 @@ LANGUAGE js AS
},
)
self.validate_all(
+ "SELECT FORMAT_DATETIME('%Y%m%d %H:%M:%S', DATETIME '2023-12-25 15:30:00')",
+ write={
+ "bigquery": "SELECT FORMAT_DATETIME('%Y%m%d %H:%M:%S', CAST('2023-12-25 15:30:00' AS DATETIME))",
+ "duckdb": "SELECT STRFTIME(CAST('2023-12-25 15:30:00' AS TIMESTAMP), '%Y%m%d %H:%M:%S')",
+ },
+ )
+ self.validate_all(
+ "SELECT FORMAT_DATETIME('%x', '2023-12-25 15:30:00')",
+ write={
+ "bigquery": "SELECT FORMAT_DATETIME('%x', '2023-12-25 15:30:00')",
+ "duckdb": "SELECT STRFTIME(CAST('2023-12-25 15:30:00' AS TIMESTAMP), '%x')",
+ },
+ )
+ self.validate_all(
"SELECT COUNTIF(x)",
read={
"clickhouse": "SELECT countIf(x)",
@@ -599,8 +635,9 @@ LANGUAGE js AS
self.validate_all(
"SELECT DATETIME_ADD('2023-01-01T00:00:00', INTERVAL 1 MILLISECOND)",
write={
- "bigquery": "SELECT DATETIME_ADD('2023-01-01T00:00:00', INTERVAL 1 MILLISECOND)",
- "databricks": "SELECT TIMESTAMPADD(MILLISECOND, 1, '2023-01-01T00:00:00')",
+ "bigquery": "SELECT DATETIME_ADD('2023-01-01T00:00:00', INTERVAL '1' MILLISECOND)",
+ "databricks": "SELECT TIMESTAMPADD(MILLISECOND, '1', '2023-01-01T00:00:00')",
+ "duckdb": "SELECT CAST('2023-01-01T00:00:00' AS DATETIME) + INTERVAL '1' MILLISECOND",
},
),
)
@@ -608,8 +645,9 @@ LANGUAGE js AS
self.validate_all(
"SELECT DATETIME_SUB('2023-01-01T00:00:00', INTERVAL 1 MILLISECOND)",
write={
- "bigquery": "SELECT DATETIME_SUB('2023-01-01T00:00:00', INTERVAL 1 MILLISECOND)",
- "databricks": "SELECT TIMESTAMPADD(MILLISECOND, 1 * -1, '2023-01-01T00:00:00')",
+ "bigquery": "SELECT DATETIME_SUB('2023-01-01T00:00:00', INTERVAL '1' MILLISECOND)",
+ "databricks": "SELECT TIMESTAMPADD(MILLISECOND, '1' * -1, '2023-01-01T00:00:00')",
+ "duckdb": "SELECT CAST('2023-01-01T00:00:00' AS DATETIME) - INTERVAL '1' MILLISECOND",
},
),
)
@@ -619,6 +657,7 @@ LANGUAGE js AS
write={
"bigquery": "SELECT DATETIME_TRUNC('2023-01-01T01:01:01', HOUR)",
"databricks": "SELECT DATE_TRUNC('HOUR', '2023-01-01T01:01:01')",
+ "duckdb": "SELECT DATE_TRUNC('HOUR', CAST('2023-01-01T01:01:01' AS DATETIME))",
},
),
)
@@ -626,17 +665,24 @@ LANGUAGE js AS
self.validate_all(
'SELECT TIMESTAMP_ADD(TIMESTAMP "2008-12-25 15:30:00+00", INTERVAL 10 MINUTE)',
write={
- "bigquery": "SELECT TIMESTAMP_ADD(CAST('2008-12-25 15:30:00+00' AS TIMESTAMP), INTERVAL 10 MINUTE)",
- "databricks": "SELECT DATE_ADD(MINUTE, 10, CAST('2008-12-25 15:30:00+00' AS TIMESTAMP))",
- "mysql": "SELECT DATE_ADD(TIMESTAMP('2008-12-25 15:30:00+00'), INTERVAL 10 MINUTE)",
- "spark": "SELECT DATE_ADD(MINUTE, 10, CAST('2008-12-25 15:30:00+00' AS TIMESTAMP))",
+ "bigquery": "SELECT TIMESTAMP_ADD(CAST('2008-12-25 15:30:00+00' AS TIMESTAMP), INTERVAL '10' MINUTE)",
+ "databricks": "SELECT DATE_ADD(MINUTE, '10', CAST('2008-12-25 15:30:00+00' AS TIMESTAMP))",
+ "mysql": "SELECT DATE_ADD(TIMESTAMP('2008-12-25 15:30:00+00'), INTERVAL '10' MINUTE)",
+ "spark": "SELECT DATE_ADD(MINUTE, '10', CAST('2008-12-25 15:30:00+00' AS TIMESTAMP))",
},
)
self.validate_all(
'SELECT TIMESTAMP_SUB(TIMESTAMP "2008-12-25 15:30:00+00", INTERVAL 10 MINUTE)',
write={
- "bigquery": "SELECT TIMESTAMP_SUB(CAST('2008-12-25 15:30:00+00' AS TIMESTAMP), INTERVAL 10 MINUTE)",
- "mysql": "SELECT DATE_SUB(TIMESTAMP('2008-12-25 15:30:00+00'), INTERVAL 10 MINUTE)",
+ "bigquery": "SELECT TIMESTAMP_SUB(CAST('2008-12-25 15:30:00+00' AS TIMESTAMP), INTERVAL '10' MINUTE)",
+ "mysql": "SELECT DATE_SUB(TIMESTAMP('2008-12-25 15:30:00+00'), INTERVAL '10' MINUTE)",
+ },
+ )
+ self.validate_all(
+ "SELECT TIME_ADD(CAST('09:05:03' AS TIME), INTERVAL 2 HOUR)",
+ write={
+ "bigquery": "SELECT TIME_ADD(CAST('09:05:03' AS TIME), INTERVAL '2' HOUR)",
+ "duckdb": "SELECT CAST('09:05:03' AS TIME) + INTERVAL '2' HOUR",
},
)
self.validate_all(
@@ -779,6 +825,7 @@ LANGUAGE js AS
"presto": "SHA256(x)",
"redshift": "SHA2(x, 256)",
"trino": "SHA256(x)",
+ "duckdb": "SHA256(x)",
},
)
self.validate_all(
@@ -1005,7 +1052,7 @@ LANGUAGE js AS
write={
"bigquery": "SELECT * FROM UNNEST(['7', '14']) AS x",
"presto": "SELECT * FROM UNNEST(ARRAY['7', '14']) AS _t0(x)",
- "spark": "SELECT * FROM UNNEST(ARRAY('7', '14')) AS _t0(x)",
+ "spark": "SELECT * FROM EXPLODE(ARRAY('7', '14')) AS _t0(x)",
},
)
self.validate_all(
@@ -1192,29 +1239,28 @@ LANGUAGE js AS
"SELECT * FROM a WHERE b IN UNNEST([1, 2, 3])",
write={
"bigquery": "SELECT * FROM a WHERE b IN UNNEST([1, 2, 3])",
- "mysql": "SELECT * FROM a WHERE b IN (SELECT UNNEST(ARRAY(1, 2, 3)))",
"presto": "SELECT * FROM a WHERE b IN (SELECT UNNEST(ARRAY[1, 2, 3]))",
- "hive": "SELECT * FROM a WHERE b IN (SELECT UNNEST(ARRAY(1, 2, 3)))",
- "spark": "SELECT * FROM a WHERE b IN (SELECT UNNEST(ARRAY(1, 2, 3)))",
+ "hive": "SELECT * FROM a WHERE b IN (SELECT EXPLODE(ARRAY(1, 2, 3)))",
+ "spark": "SELECT * FROM a WHERE b IN (SELECT EXPLODE(ARRAY(1, 2, 3)))",
},
)
self.validate_all(
"DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY)",
write={
"postgres": "CURRENT_DATE - INTERVAL '1 DAY'",
- "bigquery": "DATE_SUB(CURRENT_DATE, INTERVAL 1 DAY)",
+ "bigquery": "DATE_SUB(CURRENT_DATE, INTERVAL '1' DAY)",
},
)
self.validate_all(
- "DATE_ADD(CURRENT_DATE(), INTERVAL 1 DAY)",
+ "DATE_ADD(CURRENT_DATE(), INTERVAL -1 DAY)",
write={
- "bigquery": "DATE_ADD(CURRENT_DATE, INTERVAL 1 DAY)",
- "duckdb": "CURRENT_DATE + INTERVAL 1 DAY",
- "mysql": "DATE_ADD(CURRENT_DATE, INTERVAL 1 DAY)",
- "postgres": "CURRENT_DATE + INTERVAL '1 DAY'",
- "presto": "DATE_ADD('DAY', 1, CURRENT_DATE)",
- "hive": "DATE_ADD(CURRENT_DATE, 1)",
- "spark": "DATE_ADD(CURRENT_DATE, 1)",
+ "bigquery": "DATE_ADD(CURRENT_DATE, INTERVAL '-1' DAY)",
+ "duckdb": "CURRENT_DATE + INTERVAL '-1' DAY",
+ "mysql": "DATE_ADD(CURRENT_DATE, INTERVAL '-1' DAY)",
+ "postgres": "CURRENT_DATE + INTERVAL '-1 DAY'",
+ "presto": "DATE_ADD('DAY', CAST('-1' AS BIGINT), CURRENT_DATE)",
+ "hive": "DATE_ADD(CURRENT_DATE, '-1')",
+ "spark": "DATE_ADD(CURRENT_DATE, '-1')",
},
)
self.validate_all(
@@ -1240,6 +1286,13 @@ LANGUAGE js AS
},
)
self.validate_all(
+ "DATE_DIFF('2021-01-01', '2020-01-01', DAY)",
+ write={
+ "bigquery": "DATE_DIFF('2021-01-01', '2020-01-01', DAY)",
+ "duckdb": "DATE_DIFF('DAY', CAST('2020-01-01' AS DATE), CAST('2021-01-01' AS DATE))",
+ },
+ )
+ self.validate_all(
"CURRENT_DATE('UTC')",
write={
"mysql": "CURRENT_DATE AT TIME ZONE 'UTC'",
@@ -1345,6 +1398,113 @@ WHERE
"bigquery": "SELECT CAST(x AS DATETIME)",
},
)
+ self.validate_all(
+ "SELECT TIME(foo, 'America/Los_Angeles')",
+ write={
+ "duckdb": "SELECT CAST(CAST(foo AS TIMESTAMPTZ) AT TIME ZONE 'America/Los_Angeles' AS TIME)",
+ "bigquery": "SELECT TIME(foo, 'America/Los_Angeles')",
+ },
+ )
+ self.validate_all(
+ "SELECT DATETIME('2020-01-01')",
+ write={
+ "duckdb": "SELECT CAST('2020-01-01' AS TIMESTAMP)",
+ "bigquery": "SELECT DATETIME('2020-01-01')",
+ },
+ )
+ self.validate_all(
+ "SELECT DATETIME('2020-01-01', TIME '23:59:59')",
+ write={
+ "duckdb": "SELECT CAST(CAST('2020-01-01' AS DATE) + CAST('23:59:59' AS TIME) AS TIMESTAMP)",
+ "bigquery": "SELECT DATETIME('2020-01-01', CAST('23:59:59' AS TIME))",
+ },
+ )
+ self.validate_all(
+ "SELECT DATETIME('2020-01-01', 'America/Los_Angeles')",
+ write={
+ "duckdb": "SELECT CAST(CAST('2020-01-01' AS TIMESTAMPTZ) AT TIME ZONE 'America/Los_Angeles' AS TIMESTAMP)",
+ "bigquery": "SELECT DATETIME('2020-01-01', 'America/Los_Angeles')",
+ },
+ )
+ self.validate_all(
+ "SELECT LENGTH(foo)",
+ read={
+ "bigquery": "SELECT LENGTH(foo)",
+ "snowflake": "SELECT LENGTH(foo)",
+ },
+ write={
+ "duckdb": "SELECT CASE TYPEOF(foo) WHEN 'VARCHAR' THEN LENGTH(CAST(foo AS TEXT)) WHEN 'BLOB' THEN OCTET_LENGTH(CAST(foo AS BLOB)) END",
+ "snowflake": "SELECT LENGTH(foo)",
+ "": "SELECT LENGTH(foo)",
+ },
+ )
+ self.validate_all(
+ "SELECT TIME_DIFF('12:00:00', '12:30:00', MINUTE)",
+ write={
+ "duckdb": "SELECT DATE_DIFF('MINUTE', CAST('12:30:00' AS TIME), CAST('12:00:00' AS TIME))",
+ "bigquery": "SELECT TIME_DIFF('12:00:00', '12:30:00', MINUTE)",
+ },
+ )
+ self.validate_all(
+ "ARRAY_CONCAT([1, 2], [3, 4], [5, 6])",
+ write={
+ "bigquery": "ARRAY_CONCAT([1, 2], [3, 4], [5, 6])",
+ "duckdb": "ARRAY_CONCAT([1, 2], ARRAY_CONCAT([3, 4], [5, 6]))",
+ "postgres": "ARRAY_CAT(ARRAY[1, 2], ARRAY_CAT(ARRAY[3, 4], ARRAY[5, 6]))",
+ "redshift": "ARRAY_CONCAT(ARRAY(1, 2), ARRAY_CONCAT(ARRAY(3, 4), ARRAY(5, 6)))",
+ "snowflake": "ARRAY_CAT([1, 2], ARRAY_CAT([3, 4], [5, 6]))",
+ "hive": "CONCAT(ARRAY(1, 2), ARRAY(3, 4), ARRAY(5, 6))",
+ "spark2": "CONCAT(ARRAY(1, 2), ARRAY(3, 4), ARRAY(5, 6))",
+ "spark": "CONCAT(ARRAY(1, 2), ARRAY(3, 4), ARRAY(5, 6))",
+ "databricks": "CONCAT(ARRAY(1, 2), ARRAY(3, 4), ARRAY(5, 6))",
+ "presto": "CONCAT(ARRAY[1, 2], ARRAY[3, 4], ARRAY[5, 6])",
+ "trino": "CONCAT(ARRAY[1, 2], ARRAY[3, 4], ARRAY[5, 6])",
+ },
+ )
+ self.validate_all(
+ "SELECT GENERATE_DATE_ARRAY('2016-10-05', '2016-10-08')",
+ write={
+ "duckdb": "SELECT CAST(GENERATE_SERIES(CAST('2016-10-05' AS DATE), CAST('2016-10-08' AS DATE), INTERVAL 1 DAY) AS DATE[])",
+ "bigquery": "SELECT GENERATE_DATE_ARRAY('2016-10-05', '2016-10-08', INTERVAL 1 DAY)",
+ },
+ )
+ self.validate_all(
+ "SELECT GENERATE_DATE_ARRAY('2016-10-05', '2016-10-08', INTERVAL '1' MONTH)",
+ write={
+ "duckdb": "SELECT CAST(GENERATE_SERIES(CAST('2016-10-05' AS DATE), CAST('2016-10-08' AS DATE), INTERVAL '1' MONTH) AS DATE[])",
+ "bigquery": "SELECT GENERATE_DATE_ARRAY('2016-10-05', '2016-10-08', INTERVAL '1' MONTH)",
+ },
+ )
+ self.validate_all(
+ "SELECT GENERATE_TIMESTAMP_ARRAY('2016-10-05 00:00:00', '2016-10-07 00:00:00', INTERVAL '1' DAY)",
+ write={
+ "duckdb": "SELECT GENERATE_SERIES(CAST('2016-10-05 00:00:00' AS TIMESTAMP), CAST('2016-10-07 00:00:00' AS TIMESTAMP), INTERVAL '1' DAY)",
+ "bigquery": "SELECT GENERATE_TIMESTAMP_ARRAY('2016-10-05 00:00:00', '2016-10-07 00:00:00', INTERVAL '1' DAY)",
+ },
+ )
+ self.validate_all(
+ "SELECT PARSE_DATE('%A %b %e %Y', 'Thursday Dec 25 2008')",
+ write={
+ "bigquery": "SELECT PARSE_DATE('%A %b %e %Y', 'Thursday Dec 25 2008')",
+ "duckdb": "SELECT CAST(STRPTIME('Thursday Dec 25 2008', '%A %b %-d %Y') AS DATE)",
+ },
+ )
+ self.validate_all(
+ "SELECT ARRAY_TO_STRING(['cake', 'pie', NULL], '--') AS text",
+ write={
+ "bigquery": "SELECT ARRAY_TO_STRING(['cake', 'pie', NULL], '--') AS text",
+ "duckdb": "SELECT ARRAY_TO_STRING(['cake', 'pie', NULL], '--') AS text",
+ },
+ )
+ self.validate_all(
+ "SELECT ARRAY_TO_STRING(['cake', 'pie', NULL], '--', 'MISSING') AS text",
+ write={
+ "bigquery": "SELECT ARRAY_TO_STRING(['cake', 'pie', NULL], '--', 'MISSING') AS text",
+ "duckdb": "SELECT ARRAY_TO_STRING(LIST_TRANSFORM(['cake', 'pie', NULL], x -> COALESCE(x, 'MISSING')), '--') AS text",
+ },
+ )
+
+ self.validate_identity("SELECT * FROM a-b c", "SELECT * FROM a-b AS c")
def test_errors(self):
with self.assertRaises(TokenError):
@@ -1372,6 +1532,12 @@ WHERE
transpile("DATE_ADD(x, day)", read="bigquery")
def test_warnings(self):
+ with self.assertLogs(parser_logger) as cm:
+ self.validate_identity(
+ "/* some comment */ DECLARE foo DATE DEFAULT DATE_SUB(current_date, INTERVAL 2 day)"
+ )
+ self.assertIn("contains unsupported syntax", cm.output[0])
+
with self.assertLogs(helper_logger) as cm:
self.validate_identity(
"WITH cte(c) AS (SELECT * FROM t) SELECT * FROM cte",
@@ -1552,7 +1718,7 @@ WHERE
"SELECT * FROM GAP_FILL(TABLE device_data, ts_column => 'time', bucket_width => INTERVAL '1' MINUTE, value_columns => [('signal', 'null')], origin => CAST('2023-11-01 09:30:01' AS DATETIME)) ORDER BY time"
)
self.validate_identity(
- "SELECT * FROM GAP_FILL(TABLE (SELECT * FROM UNNEST(ARRAY<STRUCT<device_id INT64, time DATETIME, signal INT64, state STRING>>[STRUCT(1, CAST('2023-11-01 09:34:01' AS DATETIME), 74, 'INACTIVE'), STRUCT(2, CAST('2023-11-01 09:36:00' AS DATETIME), 77, 'ACTIVE'), STRUCT(3, CAST('2023-11-01 09:37:00' AS DATETIME), 78, 'ACTIVE'), STRUCT(4, CAST('2023-11-01 09:38:01' AS DATETIME), 80, 'ACTIVE')])), ts_column => 'time', bucket_width => INTERVAL '1' MINUTE, value_columns => [('signal', 'linear')]) ORDER BY time"
+ "SELECT * FROM GAP_FILL(TABLE device_data, ts_column => 'time', bucket_width => INTERVAL '1' MINUTE, value_columns => [('signal', 'locf')]) ORDER BY time"
)
def test_models(self):
@@ -1702,3 +1868,120 @@ OPTIONS (
"MOD((a + 1), b)",
"MOD(a + 1, b)",
)
+
+ def test_inline_constructor(self):
+ self.validate_identity(
+ """SELECT STRUCT<ARRAY<STRING>>(["2023-01-17"])""",
+ """SELECT CAST(STRUCT(['2023-01-17']) AS STRUCT<ARRAY<STRING>>)""",
+ )
+ self.validate_identity(
+ """SELECT STRUCT<STRING>((SELECT 'foo')).*""",
+ """SELECT CAST(STRUCT((SELECT 'foo')) AS STRUCT<STRING>).*""",
+ )
+
+ self.validate_all(
+ "SELECT ARRAY<INT>[1, 2, 3]",
+ write={
+ "bigquery": "SELECT CAST([1, 2, 3] AS ARRAY<INT64>)",
+ "duckdb": "SELECT CAST([1, 2, 3] AS INT[])",
+ },
+ )
+ self.validate_all(
+ "CAST(STRUCT<a INT64>(1) AS STRUCT<a INT64>)",
+ write={
+ "bigquery": "CAST(CAST(STRUCT(1) AS STRUCT<a INT64>) AS STRUCT<a INT64>)",
+ "duckdb": "CAST(CAST(ROW(1) AS STRUCT(a BIGINT)) AS STRUCT(a BIGINT))",
+ },
+ )
+ self.validate_all(
+ "SELECT * FROM UNNEST(ARRAY<STRUCT<x INT64>>[])",
+ write={
+ "bigquery": "SELECT * FROM UNNEST(CAST([] AS ARRAY<STRUCT<x INT64>>))",
+ "duckdb": "SELECT * FROM (SELECT UNNEST(CAST([] AS STRUCT(x BIGINT)[]), max_depth => 2))",
+ },
+ )
+ self.validate_all(
+ "SELECT * FROM UNNEST(ARRAY<STRUCT<device_id INT64, time DATETIME, signal INT64, state STRING>>[STRUCT(1, DATETIME '2023-11-01 09:34:01', 74, 'INACTIVE'),STRUCT(4, DATETIME '2023-11-01 09:38:01', 80, 'ACTIVE')])",
+ write={
+ "bigquery": "SELECT * FROM UNNEST(CAST([STRUCT(1, CAST('2023-11-01 09:34:01' AS DATETIME), 74, 'INACTIVE'), STRUCT(4, CAST('2023-11-01 09:38:01' AS DATETIME), 80, 'ACTIVE')] AS ARRAY<STRUCT<device_id INT64, time DATETIME, signal INT64, state STRING>>))",
+ "duckdb": "SELECT * FROM (SELECT UNNEST(CAST([ROW(1, CAST('2023-11-01 09:34:01' AS TIMESTAMP), 74, 'INACTIVE'), ROW(4, CAST('2023-11-01 09:38:01' AS TIMESTAMP), 80, 'ACTIVE')] AS STRUCT(device_id BIGINT, time TIMESTAMP, signal BIGINT, state TEXT)[]), max_depth => 2))",
+ },
+ )
+ self.validate_all(
+ "SELECT STRUCT<a INT64, b STRUCT<c STRING>>(1, STRUCT('c_str'))",
+ write={
+ "bigquery": "SELECT CAST(STRUCT(1, STRUCT('c_str')) AS STRUCT<a INT64, b STRUCT<c STRING>>)",
+ "duckdb": "SELECT CAST(ROW(1, ROW('c_str')) AS STRUCT(a BIGINT, b STRUCT(c TEXT)))",
+ },
+ )
+
+ def test_convert(self):
+ for value, expected in [
+ (datetime.datetime(2023, 1, 1), "CAST('2023-01-01 00:00:00' AS DATETIME)"),
+ (datetime.datetime(2023, 1, 1, 12, 13, 14), "CAST('2023-01-01 12:13:14' AS DATETIME)"),
+ (
+ datetime.datetime(2023, 1, 1, 12, 13, 14, tzinfo=datetime.timezone.utc),
+ "CAST('2023-01-01 12:13:14+00:00' AS TIMESTAMP)",
+ ),
+ (
+ pytz.timezone("America/Los_Angeles").localize(
+ datetime.datetime(2023, 1, 1, 12, 13, 14)
+ ),
+ "CAST('2023-01-01 12:13:14-08:00' AS TIMESTAMP)",
+ ),
+ ]:
+ with self.subTest(value):
+ self.assertEqual(exp.convert(value).sql(dialect=self.dialect), expected)
+
+ def test_unnest(self):
+ self.validate_all(
+ "SELECT name, laps FROM UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps), STRUCT('Makhloufi' AS name, [24.5, 25.4, 26.6, 26.1] AS laps)])",
+ write={
+ "bigquery": "SELECT name, laps FROM UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps), STRUCT('Makhloufi' AS name, [24.5, 25.4, 26.6, 26.1] AS laps)])",
+ "duckdb": "SELECT name, laps FROM (SELECT UNNEST([{'name': 'Rudisha', 'laps': [23.4, 26.3, 26.4, 26.1]}, {'name': 'Makhloufi', 'laps': [24.5, 25.4, 26.6, 26.1]}], max_depth => 2))",
+ },
+ )
+ self.validate_all(
+ "WITH Races AS (SELECT '800M' AS race) SELECT race, name, laps FROM Races AS r CROSS JOIN UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)])",
+ write={
+ "bigquery": "WITH Races AS (SELECT '800M' AS race) SELECT race, name, laps FROM Races AS r CROSS JOIN UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)])",
+ "duckdb": "WITH Races AS (SELECT '800M' AS race) SELECT race, name, laps FROM Races AS r CROSS JOIN (SELECT UNNEST([{'name': 'Rudisha', 'laps': [23.4, 26.3, 26.4, 26.1]}], max_depth => 2))",
+ },
+ )
+ self.validate_all(
+ "SELECT participant FROM UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)]) AS participant",
+ write={
+ "bigquery": "SELECT participant FROM UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)]) AS participant",
+ "duckdb": "SELECT participant FROM (SELECT UNNEST([{'name': 'Rudisha', 'laps': [23.4, 26.3, 26.4, 26.1]}], max_depth => 2)) AS participant",
+ },
+ )
+ self.validate_all(
+ "WITH Races AS (SELECT '800M' AS race) SELECT race, participant FROM Races AS r CROSS JOIN UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)]) AS participant",
+ write={
+ "bigquery": "WITH Races AS (SELECT '800M' AS race) SELECT race, participant FROM Races AS r CROSS JOIN UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)]) AS participant",
+ "duckdb": "WITH Races AS (SELECT '800M' AS race) SELECT race, participant FROM Races AS r CROSS JOIN (SELECT UNNEST([{'name': 'Rudisha', 'laps': [23.4, 26.3, 26.4, 26.1]}], max_depth => 2)) AS participant",
+ },
+ )
+
+ def test_range_type(self):
+ for type, value in (
+ ("RANGE<DATE>", "'[2020-01-01, 2020-12-31)'"),
+ ("RANGE<DATE>", "'[UNBOUNDED, 2020-12-31)'"),
+ ("RANGE<DATETIME>", "'[2020-01-01 12:00:00, 2020-12-31 12:00:00)'"),
+ ("RANGE<TIMESTAMP>", "'[2020-10-01 12:00:00+08, 2020-12-31 12:00:00+08)'"),
+ ):
+ with self.subTest(f"Testing BigQuery's RANGE<T> type: {type} {value}"):
+ self.validate_identity(f"SELECT {type} {value}", f"SELECT CAST({value} AS {type})")
+
+ self.assertEqual(self.parse_one(type), exp.DataType.build(type, dialect="bigquery"))
+
+ self.validate_identity(
+ "SELECT RANGE(CAST('2022-12-01' AS DATE), CAST('2022-12-31' AS DATE))"
+ )
+ self.validate_identity("SELECT RANGE(NULL, CAST('2022-12-31' AS DATE))")
+ self.validate_identity(
+ "SELECT RANGE(CAST('2022-10-01 14:53:27' AS DATETIME), CAST('2022-10-01 16:00:00' AS DATETIME))"
+ )
+ self.validate_identity(
+ "SELECT RANGE(CAST('2022-10-01 14:53:27 America/Los_Angeles' AS TIMESTAMP), CAST('2022-10-01 16:00:00 America/Los_Angeles' AS TIMESTAMP))"
+ )
diff --git a/tests/dialects/test_clickhouse.py b/tests/dialects/test_clickhouse.py
index 72634a8..b4fc587 100644
--- a/tests/dialects/test_clickhouse.py
+++ b/tests/dialects/test_clickhouse.py
@@ -1,4 +1,7 @@
+from datetime import date
from sqlglot import exp, parse_one
+from sqlglot.dialects import ClickHouse
+from sqlglot.expressions import convert
from tests.dialects.test_dialect import Validator
from sqlglot.errors import ErrorLevel
@@ -7,41 +10,33 @@ class TestClickhouse(Validator):
dialect = "clickhouse"
def test_clickhouse(self):
- self.validate_all(
- "SELECT * FROM x PREWHERE y = 1 WHERE z = 2",
- write={
- "": "SELECT * FROM x WHERE z = 2",
- "clickhouse": "SELECT * FROM x PREWHERE y = 1 WHERE z = 2",
- },
- )
- self.validate_all(
- "SELECT * FROM x AS prewhere",
- read={
- "clickhouse": "SELECT * FROM x AS prewhere",
- "duckdb": "SELECT * FROM x prewhere",
- },
- )
-
- self.validate_identity("SELECT * FROM x LIMIT 1 UNION ALL SELECT * FROM y")
+ for string_type_enum in ClickHouse.Generator.STRING_TYPE_MAPPING:
+ self.validate_identity(f"CAST(x AS {string_type_enum.value})", "CAST(x AS String)")
- string_types = [
- "BLOB",
- "LONGBLOB",
- "LONGTEXT",
- "MEDIUMBLOB",
- "MEDIUMTEXT",
- "TINYBLOB",
- "TINYTEXT",
- "VARCHAR(255)",
- ]
+ # Arrays, maps and tuples can't be Nullable in ClickHouse
+ for non_nullable_type in ("ARRAY<INT>", "MAP<INT, INT>", "STRUCT(a: INT)"):
+ try_cast = parse_one(f"TRY_CAST(x AS {non_nullable_type})")
+ target_type = try_cast.to.sql("clickhouse")
+ self.assertEqual(try_cast.sql("clickhouse"), f"CAST(x AS {target_type})")
- for string_type in string_types:
- self.validate_identity(f"CAST(x AS {string_type})", "CAST(x AS String)")
+ for nullable_type in ("INT", "UINT", "BIGINT", "FLOAT", "DOUBLE", "TEXT", "DATE", "UUID"):
+ try_cast = parse_one(f"TRY_CAST(x AS {nullable_type})")
+ target_type = exp.DataType.build(nullable_type, dialect="clickhouse").sql("clickhouse")
+ self.assertEqual(try_cast.sql("clickhouse"), f"CAST(x AS Nullable({target_type}))")
expr = parse_one("count(x)")
self.assertEqual(expr.sql(dialect="clickhouse"), "COUNT(x)")
self.assertIsNone(expr._meta)
+ self.validate_identity("SELECT toString(CHAR(104.1, 101, 108.9, 108.9, 111, 32))")
+ self.validate_identity("@macro").assert_is(exp.Parameter).this.assert_is(exp.Var)
+ self.validate_identity("SELECT toFloat(like)")
+ self.validate_identity("SELECT like")
+ self.validate_identity("SELECT STR_TO_DATE(str, fmt, tz)")
+ self.validate_identity("SELECT STR_TO_DATE('05 12 2000', '%d %m %Y')")
+ self.validate_identity("SELECT EXTRACT(YEAR FROM toDateTime('2023-02-01'))")
+ self.validate_identity("extract(haystack, pattern)")
+ self.validate_identity("SELECT * FROM x LIMIT 1 UNION ALL SELECT * FROM y")
self.validate_identity("SELECT CAST(x AS Tuple(String, Array(Nullable(Float64))))")
self.validate_identity("countIf(x, y)")
self.validate_identity("x = y")
@@ -49,7 +44,7 @@ class TestClickhouse(Validator):
self.validate_identity("SELECT * FROM (SELECT a FROM b SAMPLE 0.01)")
self.validate_identity("SELECT * FROM (SELECT a FROM b SAMPLE 1 / 10 OFFSET 1 / 2)")
self.validate_identity("SELECT sum(foo * bar) FROM bla SAMPLE 10000000")
- self.validate_identity("CAST(x AS Nested(ID UInt32, Serial UInt32, EventTime DATETIME))")
+ self.validate_identity("CAST(x AS Nested(ID UInt32, Serial UInt32, EventTime DateTime))")
self.validate_identity("CAST(x AS Enum('hello' = 1, 'world' = 2))")
self.validate_identity("CAST(x AS Enum('hello', 'world'))")
self.validate_identity("CAST(x AS Enum('hello' = 1, 'world'))")
@@ -88,23 +83,21 @@ class TestClickhouse(Validator):
self.validate_identity("SELECT * FROM foo WHERE x GLOBAL IN (SELECT * FROM bar)")
self.validate_identity("position(haystack, needle)")
self.validate_identity("position(haystack, needle, position)")
- self.validate_identity("CAST(x AS DATETIME)")
+ self.validate_identity("CAST(x AS DATETIME)", "CAST(x AS DateTime)")
+ self.validate_identity("CAST(x AS TIMESTAMPTZ)", "CAST(x AS DateTime)")
self.validate_identity("CAST(x as MEDIUMINT)", "CAST(x AS Int32)")
self.validate_identity("SELECT arrayJoin([1, 2, 3] AS src) AS dst, 'Hello', src")
self.validate_identity("""SELECT JSONExtractString('{"x": {"y": 1}}', 'x', 'y')""")
self.validate_identity("SELECT * FROM table LIMIT 1 BY a, b")
self.validate_identity("SELECT * FROM table LIMIT 2 OFFSET 1 BY a, b")
+ self.validate_identity("TRUNCATE TABLE t1 ON CLUSTER test_cluster")
+ self.validate_identity("TRUNCATE DATABASE db")
+ self.validate_identity("TRUNCATE DATABASE db ON CLUSTER test_cluster")
self.validate_identity(
- "SELECT id, quantileGK(100, 0.95)(reading) OVER (PARTITION BY id ORDER BY id RANGE BETWEEN 30000 PRECEDING AND CURRENT ROW) AS window FROM table"
- )
-
- self.validate_identity(
- "SELECT $1$foo$1$",
- "SELECT 'foo'",
+ "SELECT number, COUNT() OVER (PARTITION BY number % 3) AS partition_count FROM numbers(10) WINDOW window_name AS (PARTITION BY number) QUALIFY partition_count = 4 ORDER BY number"
)
self.validate_identity(
- "SELECT * FROM table LIMIT 1, 2 BY a, b",
- "SELECT * FROM table LIMIT 2 OFFSET 1 BY a, b",
+ "SELECT id, quantileGK(100, 0.95)(reading) OVER (PARTITION BY id ORDER BY id RANGE BETWEEN 30000 PRECEDING AND CURRENT ROW) AS window FROM table"
)
self.validate_identity(
"SELECT * FROM table LIMIT 1 BY CONCAT(datalayerVariantNo, datalayerProductId, warehouse)"
@@ -134,10 +127,6 @@ class TestClickhouse(Validator):
"SELECT sum(1) AS impressions, (arrayJoin(arrayZip(cities, browsers)) AS t).1 AS city, t.2 AS browser FROM (SELECT ['Istanbul', 'Berlin', 'Bobruisk'] AS cities, ['Firefox', 'Chrome', 'Chrome'] AS browsers) GROUP BY 2, 3"
)
self.validate_identity(
- "SELECT SUM(1) AS impressions FROM (SELECT ['Istanbul', 'Berlin', 'Bobruisk'] AS cities) WHERE arrayJoin(cities) IN ['Istanbul', 'Berlin']",
- "SELECT SUM(1) AS impressions FROM (SELECT ['Istanbul', 'Berlin', 'Bobruisk'] AS cities) WHERE arrayJoin(cities) IN ('Istanbul', 'Berlin')",
- )
- self.validate_identity(
'SELECT CAST(tuple(1 AS "a", 2 AS "b", 3.0 AS "c").2 AS Nullable(String))'
)
self.validate_identity(
@@ -150,17 +139,80 @@ class TestClickhouse(Validator):
"CREATE MATERIALIZED VIEW test_view ON CLUSTER cl1 (id UInt8) ENGINE=AggregatingMergeTree() ORDER BY tuple() AS SELECT * FROM test_data"
)
self.validate_identity(
- "CREATE MATERIALIZED VIEW test_view ON CLUSTER cl1 (id UInt8) TO table1 AS SELECT * FROM test_data"
+ "CREATE MATERIALIZED VIEW test_view ON CLUSTER cl1 TO table1 AS SELECT * FROM test_data"
)
self.validate_identity(
- "CREATE MATERIALIZED VIEW test_view (id UInt8) TO db.table1 AS SELECT * FROM test_data"
+ "CREATE MATERIALIZED VIEW test_view TO db.table1 (id UInt8) AS SELECT * FROM test_data"
)
- self.validate_identity("TRUNCATE TABLE t1 ON CLUSTER test_cluster")
- self.validate_identity("TRUNCATE DATABASE db")
- self.validate_identity("TRUNCATE DATABASE db ON CLUSTER test_cluster")
self.validate_identity(
"CREATE TABLE t (foo String CODEC(LZ4HC(9), ZSTD, DELTA), size String ALIAS formatReadableSize(size_bytes), INDEX idx1 a TYPE bloom_filter(0.001) GRANULARITY 1, INDEX idx2 a TYPE set(100) GRANULARITY 2, INDEX idx3 a TYPE minmax GRANULARITY 3)"
)
+ self.validate_identity(
+ "SELECT $1$foo$1$",
+ "SELECT 'foo'",
+ )
+ self.validate_identity(
+ "SELECT * FROM table LIMIT 1, 2 BY a, b",
+ "SELECT * FROM table LIMIT 2 OFFSET 1 BY a, b",
+ )
+ self.validate_identity(
+ "SELECT SUM(1) AS impressions FROM (SELECT ['Istanbul', 'Berlin', 'Bobruisk'] AS cities) WHERE arrayJoin(cities) IN ['Istanbul', 'Berlin']",
+ "SELECT SUM(1) AS impressions FROM (SELECT ['Istanbul', 'Berlin', 'Bobruisk'] AS cities) WHERE arrayJoin(cities) IN ('Istanbul', 'Berlin')",
+ )
+
+ self.validate_all(
+ "CHAR(67) || CHAR(65) || CHAR(84)",
+ read={
+ "clickhouse": "CHAR(67) || CHAR(65) || CHAR(84)",
+ "oracle": "CHR(67) || CHR(65) || CHR(84)",
+ },
+ )
+ self.validate_all(
+ "SELECT lagInFrame(salary, 1, 0) OVER (ORDER BY hire_date) AS prev_sal FROM employees",
+ read={
+ "clickhouse": "SELECT lagInFrame(salary, 1, 0) OVER (ORDER BY hire_date) AS prev_sal FROM employees",
+ "oracle": "SELECT LAG(salary, 1, 0) OVER (ORDER BY hire_date) AS prev_sal FROM employees",
+ },
+ )
+ self.validate_all(
+ "SELECT leadInFrame(salary, 1, 0) OVER (ORDER BY hire_date) AS prev_sal FROM employees",
+ read={
+ "clickhouse": "SELECT leadInFrame(salary, 1, 0) OVER (ORDER BY hire_date) AS prev_sal FROM employees",
+ "oracle": "SELECT LEAD(salary, 1, 0) OVER (ORDER BY hire_date) AS prev_sal FROM employees",
+ },
+ )
+ self.validate_all(
+ "SELECT CAST(STR_TO_DATE('05 12 2000', '%d %m %Y') AS DATE)",
+ read={
+ "clickhouse": "SELECT CAST(STR_TO_DATE('05 12 2000', '%d %m %Y') AS DATE)",
+ "postgres": "SELECT TO_DATE('05 12 2000', 'DD MM YYYY')",
+ },
+ write={
+ "clickhouse": "SELECT CAST(STR_TO_DATE('05 12 2000', '%d %m %Y') AS DATE)",
+ "postgres": "SELECT CAST(CAST(TO_DATE('05 12 2000', 'DD MM YYYY') AS TIMESTAMP) AS DATE)",
+ },
+ )
+ self.validate_all(
+ "SELECT * FROM x PREWHERE y = 1 WHERE z = 2",
+ write={
+ "": "SELECT * FROM x WHERE z = 2",
+ "clickhouse": "SELECT * FROM x PREWHERE y = 1 WHERE z = 2",
+ },
+ )
+ self.validate_all(
+ "SELECT * FROM x AS prewhere",
+ read={
+ "clickhouse": "SELECT * FROM x AS prewhere",
+ "duckdb": "SELECT * FROM x prewhere",
+ },
+ )
+ self.validate_all(
+ "SELECT a, b FROM (SELECT * FROM x) AS t",
+ read={
+ "clickhouse": "SELECT a, b FROM (SELECT * FROM x) AS t",
+ "duckdb": "SELECT a, b FROM (SELECT * FROM x) AS t(a, b)",
+ },
+ )
self.validate_all(
"SELECT arrayJoin([1,2,3])",
write={
@@ -181,11 +233,16 @@ class TestClickhouse(Validator):
},
)
self.validate_all(
- "SELECT CAST('2020-01-01' AS TIMESTAMP) + INTERVAL '500' MICROSECOND",
+ "SELECT CAST('2020-01-01' AS Nullable(DateTime)) + INTERVAL '500' MICROSECOND",
read={
"duckdb": "SELECT TIMESTAMP '2020-01-01' + INTERVAL '500 us'",
"postgres": "SELECT TIMESTAMP '2020-01-01' + INTERVAL '500 us'",
},
+ write={
+ "clickhouse": "SELECT CAST('2020-01-01' AS Nullable(DateTime)) + INTERVAL '500' MICROSECOND",
+ "duckdb": "SELECT CAST('2020-01-01' AS DATETIME) + INTERVAL '500' MICROSECOND",
+ "postgres": "SELECT CAST('2020-01-01' AS TIMESTAMP) + INTERVAL '500 MICROSECOND'",
+ },
)
self.validate_all(
"SELECT CURRENT_DATE()",
@@ -413,15 +470,15 @@ class TestClickhouse(Validator):
self.validate_identity("SELECT FORMAT")
self.validate_identity("1 AS FORMAT").assert_is(exp.Alias)
- self.validate_identity("SELECT DATE_FORMAT(NOW(), '%Y-%m-%d', '%T')")
+ self.validate_identity("SELECT formatDateTime(NOW(), '%Y-%m-%d', '%T')")
self.validate_all(
- "SELECT DATE_FORMAT(NOW(), '%Y-%m-%d')",
+ "SELECT formatDateTime(NOW(), '%Y-%m-%d')",
read={
"clickhouse": "SELECT formatDateTime(NOW(), '%Y-%m-%d')",
"mysql": "SELECT DATE_FORMAT(NOW(), '%Y-%m-%d')",
},
write={
- "clickhouse": "SELECT DATE_FORMAT(NOW(), '%Y-%m-%d')",
+ "clickhouse": "SELECT formatDateTime(NOW(), '%Y-%m-%d')",
"mysql": "SELECT DATE_FORMAT(NOW(), '%Y-%m-%d')",
},
)
@@ -440,6 +497,66 @@ class TestClickhouse(Validator):
)
self.validate_identity("ALTER TABLE visits REPLACE PARTITION ID '201901' FROM visits_tmp")
self.validate_identity("ALTER TABLE visits ON CLUSTER test_cluster DROP COLUMN col1")
+ self.validate_identity("DELETE FROM tbl ON CLUSTER test_cluster WHERE date = '2019-01-01'")
+
+ self.assertIsInstance(
+ parse_one("Tuple(select Int64)", into=exp.DataType, read="clickhouse"), exp.DataType
+ )
+
+ self.validate_identity("INSERT INTO t (col1, col2) VALUES ('abcd', 1234)")
+ self.validate_all(
+ "INSERT INTO t (col1, col2) VALUES ('abcd', 1234)",
+ read={
+ # looks like values table function, but should be parsed as VALUES block
+ "clickhouse": "INSERT INTO t (col1, col2) values('abcd', 1234)"
+ },
+ write={
+ "clickhouse": "INSERT INTO t (col1, col2) VALUES ('abcd', 1234)",
+ "postgres": "INSERT INTO t (col1, col2) VALUES ('abcd', 1234)",
+ },
+ )
+ self.validate_identity("SELECT TRIM(TRAILING ')' FROM '( Hello, world! )')")
+ self.validate_identity("SELECT TRIM(LEADING '(' FROM '( Hello, world! )')")
+ self.validate_identity("current_timestamp").assert_is(exp.Column)
+
+ self.validate_identity("SELECT * APPLY(sum) FROM columns_transformers")
+ self.validate_identity("SELECT COLUMNS('[jk]') APPLY(toString) FROM columns_transformers")
+ self.validate_identity(
+ "SELECT COLUMNS('[jk]') APPLY(toString) APPLY(length) APPLY(max) FROM columns_transformers"
+ )
+ self.validate_identity("SELECT * APPLY(sum), COLUMNS('col') APPLY(sum) APPLY(avg) FROM t")
+ self.validate_identity(
+ "SELECT * FROM ABC WHERE hasAny(COLUMNS('.*field') APPLY(toUInt64) APPLY(to), (SELECT groupUniqArray(toUInt64(field))))"
+ )
+ self.validate_identity("SELECT col apply", "SELECT col AS apply")
+
+ def test_clickhouse_values(self):
+ values = exp.select("*").from_(
+ exp.values([exp.tuple_(1, 2, 3)], alias="subq", columns=["a", "b", "c"])
+ )
+ self.assertEqual(
+ values.sql("clickhouse"),
+ "SELECT * FROM (SELECT 1 AS a, 2 AS b, 3 AS c) AS subq",
+ )
+
+ self.validate_identity("INSERT INTO t (col1, col2) VALUES ('abcd', 1234)")
+ self.validate_identity(
+ "INSERT INTO t (col1, col2) FORMAT Values('abcd', 1234)",
+ "INSERT INTO t (col1, col2) VALUES ('abcd', 1234)",
+ )
+
+ self.validate_all(
+ "SELECT col FROM (SELECT 1 AS col) AS _t",
+ read={
+ "duckdb": "SELECT col FROM (VALUES (1)) AS _t(col)",
+ },
+ )
+ self.validate_all(
+ "SELECT col1, col2 FROM (SELECT 1 AS col1, 2 AS col2 UNION ALL SELECT 3, 4) AS _t",
+ read={
+ "duckdb": "SELECT col1, col2 FROM (VALUES (1, 2), (3, 4)) AS _t(col1, col2)",
+ },
+ )
def test_cte(self):
self.validate_identity("WITH 'x' AS foo SELECT foo")
@@ -501,7 +618,7 @@ class TestClickhouse(Validator):
self.validate_all(
"SELECT {abc: UInt32}, {b: String}, {c: DateTime},{d: Map(String, Array(UInt8))}, {e: Tuple(UInt8, String)}",
write={
- "clickhouse": "SELECT {abc: UInt32}, {b: String}, {c: DATETIME}, {d: Map(String, Array(UInt8))}, {e: Tuple(UInt8, String)}",
+ "clickhouse": "SELECT {abc: UInt32}, {b: String}, {c: DateTime}, {d: Map(String, Array(UInt8))}, {e: Tuple(UInt8, String)}",
"": "SELECT :abc, :b, :c, :d, :e",
},
)
@@ -532,14 +649,87 @@ class TestClickhouse(Validator):
)
def test_ddl(self):
+ db_table_expr = exp.Table(this=None, db=exp.to_identifier("foo"), catalog=None)
+ create_with_cluster = exp.Create(
+ this=db_table_expr,
+ kind="DATABASE",
+ properties=exp.Properties(expressions=[exp.OnCluster(this=exp.to_identifier("c"))]),
+ )
+ self.assertEqual(create_with_cluster.sql("clickhouse"), "CREATE DATABASE foo ON CLUSTER c")
+
+ # Transpiled CREATE SCHEMA may have OnCluster property set
+ create_with_cluster = exp.Create(
+ this=db_table_expr,
+ kind="SCHEMA",
+ properties=exp.Properties(expressions=[exp.OnCluster(this=exp.to_identifier("c"))]),
+ )
+ self.assertEqual(create_with_cluster.sql("clickhouse"), "CREATE DATABASE foo ON CLUSTER c")
+
+ ctas_with_comment = exp.Create(
+ this=exp.table_("foo"),
+ kind="TABLE",
+ expression=exp.select("*").from_("db.other_table"),
+ properties=exp.Properties(
+ expressions=[
+ exp.EngineProperty(this=exp.var("Memory")),
+ exp.SchemaCommentProperty(this=exp.Literal.string("foo")),
+ ],
+ ),
+ )
+ self.assertEqual(
+ ctas_with_comment.sql("clickhouse"),
+ "CREATE TABLE foo ENGINE=Memory AS (SELECT * FROM db.other_table) COMMENT 'foo'",
+ )
+
+ self.validate_identity("CREATE MATERIALIZED VIEW a.b TO a.c (c Int32) AS SELECT * FROM a.d")
+ self.validate_identity("""CREATE TABLE ip_data (ip4 IPv4, ip6 IPv6) ENGINE=TinyLog()""")
+ self.validate_identity("""CREATE TABLE dates (dt1 Date32) ENGINE=TinyLog()""")
+ self.validate_identity("CREATE TABLE named_tuples (a Tuple(select String, i Int64))")
+ self.validate_identity("""CREATE TABLE t (a String) EMPTY AS SELECT * FROM dummy""")
+ self.validate_identity(
+ "CREATE TABLE t1 (a String EPHEMERAL, b String EPHEMERAL func(), c String MATERIALIZED func(), d String ALIAS func()) ENGINE=TinyLog()"
+ )
+ self.validate_identity(
+ "CREATE TABLE t (a String, b String, c UInt64, PROJECTION p1 (SELECT a, sum(c) GROUP BY a, b), PROJECTION p2 (SELECT b, sum(c) GROUP BY b)) ENGINE=MergeTree()"
+ )
+ self.validate_identity(
+ """CREATE TABLE xyz (ts DateTime, data String) ENGINE=MergeTree() ORDER BY ts SETTINGS index_granularity = 8192 COMMENT '{"key": "value"}'"""
+ )
+ self.validate_identity(
+ "INSERT INTO FUNCTION s3('a', 'b', 'c', 'd', 'e') PARTITION BY CONCAT(s1, s2, s3, s4) SETTINGS set1 = 1, set2 = '2' SELECT * FROM some_table SETTINGS foo = 3"
+ )
self.validate_identity(
'CREATE TABLE data5 ("x" UInt32, "y" UInt32) ENGINE=MergeTree ORDER BY (round(y / 1000000000), cityHash64(x)) SAMPLE BY cityHash64(x)'
)
self.validate_identity(
"CREATE TABLE foo (x UInt32) TTL time_column + INTERVAL '1' MONTH DELETE WHERE column = 'value'"
)
+ self.validate_identity(
+ "CREATE TABLE a ENGINE=Memory AS SELECT 1 AS c COMMENT 'foo'",
+ "CREATE TABLE a ENGINE=Memory AS (SELECT 1 AS c) COMMENT 'foo'",
+ )
self.validate_all(
+ "CREATE DATABASE x",
+ read={
+ "duckdb": "CREATE SCHEMA x",
+ },
+ write={
+ "clickhouse": "CREATE DATABASE x",
+ "duckdb": "CREATE SCHEMA x",
+ },
+ )
+ self.validate_all(
+ "DROP DATABASE x",
+ read={
+ "duckdb": "DROP SCHEMA x",
+ },
+ write={
+ "clickhouse": "DROP DATABASE x",
+ "duckdb": "DROP SCHEMA x",
+ },
+ )
+ self.validate_all(
"""
CREATE TABLE example1 (
timestamp DateTime,
@@ -552,7 +742,7 @@ class TestClickhouse(Validator):
""",
write={
"clickhouse": """CREATE TABLE example1 (
- timestamp DATETIME,
+ timestamp DateTime,
x UInt32 TTL now() + INTERVAL '1' MONTH,
y String TTL timestamp + INTERVAL '1' DAY,
z String
@@ -630,7 +820,7 @@ SETTINGS
""",
write={
"clickhouse": """CREATE TABLE example_table (
- d DATETIME,
+ d DateTime,
a Int32
)
ENGINE=MergeTree
@@ -657,7 +847,7 @@ TTL
""",
write={
"clickhouse": """CREATE TABLE table_with_where (
- d DATETIME,
+ d DateTime,
a Int32
)
ENGINE=MergeTree
@@ -685,7 +875,7 @@ WHERE
""",
write={
"clickhouse": """CREATE TABLE table_for_recompression (
- d DATETIME,
+ d DateTime,
key UInt64,
value String
)
@@ -717,7 +907,7 @@ SETTINGS
""",
write={
"clickhouse": """CREATE TABLE table_for_aggregation (
- d DATETIME,
+ d DateTime,
k1 Int32,
k2 Int32,
x Int32,
@@ -824,8 +1014,6 @@ LIFETIME(MIN 0 MAX 0)""",
},
pretty=True,
)
- self.validate_identity("""CREATE TABLE ip_data (ip4 IPv4, ip6 IPv6) ENGINE=TinyLog()""")
- self.validate_identity("""CREATE TABLE dates (dt1 Date32) ENGINE=TinyLog()""")
self.validate_all(
"""
CREATE TABLE t (
@@ -842,11 +1030,11 @@ LIFETIME(MIN 0 MAX 0)""",
},
pretty=True,
)
- self.validate_identity(
- "CREATE TABLE t1 (a String EPHEMERAL, b String EPHEMERAL func(), c String MATERIALIZED func(), d String ALIAS func()) ENGINE=TinyLog()"
- )
- self.validate_identity(
- "CREATE TABLE t (a String, b String, c UInt64, PROJECTION p1 (SELECT a, sum(c) GROUP BY a, b), PROJECTION p2 (SELECT b, sum(c) GROUP BY b)) ENGINE=MergeTree()"
+
+ self.assertIsNotNone(
+ self.validate_identity("CREATE TABLE t1 (a String MATERIALIZED func())").find(
+ exp.ColumnConstraint
+ )
)
def test_agg_functions(self):
@@ -880,3 +1068,35 @@ LIFETIME(MIN 0 MAX 0)""",
for creatable in ("DATABASE", "TABLE", "VIEW", "DICTIONARY", "FUNCTION"):
with self.subTest(f"Test DROP {creatable} ON CLUSTER"):
self.validate_identity(f"DROP {creatable} test ON CLUSTER test_cluster")
+
+ def test_datetime_funcs(self):
+ # Each datetime func has an alias that is roundtripped to the original name e.g. (DATE_SUB, DATESUB) -> DATE_SUB
+ datetime_funcs = (("DATE_SUB", "DATESUB"), ("DATE_ADD", "DATEADD"))
+
+ # 2-arg functions of type <func>(date, unit)
+ for func in (*datetime_funcs, ("TIMESTAMP_ADD", "TIMESTAMPADD")):
+ func_name = func[0]
+ for func_alias in func:
+ self.validate_identity(
+ f"""SELECT {func_alias}(date, INTERVAL '3' YEAR)""",
+ f"""SELECT {func_name}(date, INTERVAL '3' YEAR)""",
+ )
+
+ # 3-arg functions of type <func>(unit, value, date)
+ for func in (*datetime_funcs, ("DATE_DIFF", "DATEDIFF"), ("TIMESTAMP_SUB", "TIMESTAMPSUB")):
+ func_name = func[0]
+ for func_alias in func:
+ with self.subTest(f"Test 3-arg date-time function {func_alias}"):
+ self.validate_identity(
+ f"SELECT {func_alias}(SECOND, 1, bar)",
+ f"SELECT {func_name}(SECOND, 1, bar)",
+ )
+
+ def test_convert(self):
+ self.assertEqual(
+ convert(date(2020, 1, 1)).sql(dialect=self.dialect), "toDate('2020-01-01')"
+ )
+
+ def test_grant(self):
+ self.validate_identity("GRANT SELECT(x, y) ON db.table TO john WITH GRANT OPTION")
+ self.validate_identity("GRANT INSERT(x, y) ON db.table TO john")
diff --git a/tests/dialects/test_databricks.py b/tests/dialects/test_databricks.py
index 9ef3b86..65e8d5d 100644
--- a/tests/dialects/test_databricks.py
+++ b/tests/dialects/test_databricks.py
@@ -1,4 +1,4 @@
-from sqlglot import transpile
+from sqlglot import exp, transpile
from sqlglot.errors import ParseError
from tests.dialects.test_dialect import Validator
@@ -7,6 +7,7 @@ class TestDatabricks(Validator):
dialect = "databricks"
def test_databricks(self):
+ self.validate_identity("ALTER TABLE labels ADD COLUMN label_score FLOAT")
self.validate_identity("DESCRIBE HISTORY a.b")
self.validate_identity("DESCRIBE history.tbl")
self.validate_identity("CREATE TABLE t (a STRUCT<c: MAP<STRING, STRING>>)")
@@ -25,6 +26,7 @@ class TestDatabricks(Validator):
self.validate_identity("CREATE FUNCTION a AS b")
self.validate_identity("SELECT ${x} FROM ${y} WHERE ${z} > 1")
self.validate_identity("CREATE TABLE foo (x DATE GENERATED ALWAYS AS (CAST(y AS DATE)))")
+ self.validate_identity("TRUNCATE TABLE t1 PARTITION(age = 10, name = 'test', address)")
self.validate_identity(
"CREATE TABLE IF NOT EXISTS db.table (a TIMESTAMP, b BOOLEAN GENERATED ALWAYS AS (NOT a IS NULL)) USING DELTA"
)
@@ -37,22 +39,26 @@ class TestDatabricks(Validator):
self.validate_identity(
"SELECT * FROM sales UNPIVOT EXCLUDE NULLS (sales FOR quarter IN (q1 AS `Jan-Mar`))"
)
-
self.validate_identity(
"CREATE FUNCTION add_one(x INT) RETURNS INT LANGUAGE PYTHON AS $$def add_one(x):\n return x+1$$"
)
-
self.validate_identity(
"CREATE FUNCTION add_one(x INT) RETURNS INT LANGUAGE PYTHON AS $FOO$def add_one(x):\n return x+1$FOO$"
)
-
- self.validate_identity("TRUNCATE TABLE t1 PARTITION(age = 10, name = 'test', address)")
self.validate_identity(
"TRUNCATE TABLE t1 PARTITION(age = 10, name = 'test', city LIKE 'LA')"
)
self.validate_identity(
"COPY INTO target FROM `s3://link` FILEFORMAT = AVRO VALIDATE = ALL FILES = ('file1', 'file2') FORMAT_OPTIONS ('opt1'='true', 'opt2'='test') COPY_OPTIONS ('mergeSchema'='true')"
)
+ self.validate_identity(
+ "DATE_DIFF(day, created_at, current_date())",
+ "DATEDIFF(DAY, created_at, CURRENT_DATE)",
+ ).args["unit"].assert_is(exp.Var)
+ self.validate_identity(
+ r'SELECT r"\\foo.bar\"',
+ r"SELECT '\\\\foo.bar\\'",
+ )
self.validate_all(
"CREATE TABLE foo (x INT GENERATED ALWAYS AS (YEAR(y)))",
@@ -67,7 +73,6 @@ class TestDatabricks(Validator):
"teradata": "CREATE TABLE t1 AS (SELECT c FROM t2) WITH DATA",
},
)
-
self.validate_all(
"SELECT X'1A2B'",
read={
@@ -94,35 +99,55 @@ class TestDatabricks(Validator):
read="databricks",
)
+ self.validate_all(
+ "CREATE OR REPLACE FUNCTION func(a BIGINT, b BIGINT) RETURNS TABLE (a INT) RETURN SELECT a",
+ write={
+ "databricks": "CREATE OR REPLACE FUNCTION func(a BIGINT, b BIGINT) RETURNS TABLE (a INT) RETURN SELECT a",
+ "duckdb": "CREATE OR REPLACE FUNCTION func(a, b) AS TABLE SELECT a",
+ },
+ )
+
+ self.validate_all(
+ "CREATE OR REPLACE FUNCTION func(a BIGINT, b BIGINT) RETURNS BIGINT RETURN a",
+ write={
+ "databricks": "CREATE OR REPLACE FUNCTION func(a BIGINT, b BIGINT) RETURNS BIGINT RETURN a",
+ "duckdb": "CREATE OR REPLACE FUNCTION func(a, b) AS a",
+ },
+ )
+
# https://docs.databricks.com/sql/language-manual/functions/colonsign.html
def test_json(self):
+ self.validate_identity("SELECT c1:price, c1:price.foo, c1:price.bar[1]")
self.validate_identity(
- """SELECT c1 : price FROM VALUES ('{ "price": 5 }') AS T(c1)""",
- """SELECT GET_JSON_OBJECT(c1, '$.price') FROM VALUES ('{ "price": 5 }') AS T(c1)""",
+ """SELECT c1:item[1].price FROM VALUES ('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)"""
)
self.validate_identity(
- """SELECT c1:['price'] FROM VALUES('{ "price": 5 }') AS T(c1)""",
- """SELECT GET_JSON_OBJECT(c1, '$.price') FROM VALUES ('{ "price": 5 }') AS T(c1)""",
+ """SELECT c1:item[*].price FROM VALUES ('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)"""
)
self.validate_identity(
- """SELECT c1:item[1].price FROM VALUES('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)""",
- """SELECT GET_JSON_OBJECT(c1, '$.item[1].price') FROM VALUES ('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)""",
+ """SELECT FROM_JSON(c1:item[*].price, 'ARRAY<DOUBLE>')[0] FROM VALUES ('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)"""
)
self.validate_identity(
- """SELECT c1:item[*].price FROM VALUES('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)""",
- """SELECT GET_JSON_OBJECT(c1, '$.item[*].price') FROM VALUES ('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)""",
+ """SELECT INLINE(FROM_JSON(c1:item[*], 'ARRAY<STRUCT<model STRING, price DOUBLE>>')) FROM VALUES ('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)"""
)
self.validate_identity(
- """SELECT from_json(c1:item[*].price, 'ARRAY<DOUBLE>')[0] FROM VALUES('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)""",
- """SELECT FROM_JSON(GET_JSON_OBJECT(c1, '$.item[*].price'), 'ARRAY<DOUBLE>')[0] FROM VALUES ('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)""",
+ """SELECT c1:['price'] FROM VALUES ('{ "price": 5 }') AS T(c1)""",
+ """SELECT c1:price FROM VALUES ('{ "price": 5 }') AS T(c1)""",
)
self.validate_identity(
- """SELECT inline(from_json(c1:item[*], 'ARRAY<STRUCT<model STRING, price DOUBLE>>')) FROM VALUES('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)""",
- """SELECT INLINE(FROM_JSON(GET_JSON_OBJECT(c1, '$.item[*]'), 'ARRAY<STRUCT<model STRING, price DOUBLE>>')) FROM VALUES ('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)""",
+ """SELECT GET_JSON_OBJECT(c1, '$.price') FROM VALUES ('{ "price": 5 }') AS T(c1)""",
+ """SELECT c1:price FROM VALUES ('{ "price": 5 }') AS T(c1)""",
)
self.validate_identity(
- "SELECT c1 : price",
- "SELECT GET_JSON_OBJECT(c1, '$.price')",
+ """SELECT raw:`zip code`, raw:`fb:testid`, raw:store['bicycle'], raw:store["zip code"]""",
+ """SELECT raw:["zip code"], raw:["fb:testid"], raw:store.bicycle, raw:store["zip code"]""",
+ )
+ self.validate_all(
+ "SELECT col:`fr'uit`",
+ write={
+ "databricks": """SELECT col:["fr'uit"]""",
+ "postgres": "SELECT JSON_EXTRACT_PATH(col, 'fr''uit')",
+ },
)
def test_datediff(self):
@@ -238,3 +263,17 @@ class TestDatabricks(Validator):
"databricks": "WITH x AS (SELECT 1) SELECT * FROM x",
},
)
+
+ def test_streaming_tables(self):
+ self.validate_identity(
+ "CREATE STREAMING TABLE raw_data AS SELECT * FROM STREAM READ_FILES('abfss://container@storageAccount.dfs.core.windows.net/base/path')"
+ )
+ self.validate_identity(
+ "CREATE OR REFRESH STREAMING TABLE csv_data (id INT, ts TIMESTAMP, event STRING) AS SELECT * FROM STREAM READ_FILES('s3://bucket/path', format => 'csv', schema => 'id int, ts timestamp, event string')"
+ )
+
+ def test_grant(self):
+ self.validate_identity("GRANT CREATE ON SCHEMA my_schema TO `alf@melmak.et`")
+ self.validate_identity("GRANT SELECT ON TABLE sample_data TO `alf@melmak.et`")
+ self.validate_identity("GRANT ALL PRIVILEGES ON TABLE forecasts TO finance")
+ self.validate_identity("GRANT SELECT ON TABLE t TO `fab9e00e-ca35-11ec-9d64-0242ac120002`")
diff --git a/tests/dialects/test_dialect.py b/tests/dialects/test_dialect.py
index aaeb7b0..96ce600 100644
--- a/tests/dialects/test_dialect.py
+++ b/tests/dialects/test_dialect.py
@@ -20,7 +20,9 @@ class Validator(unittest.TestCase):
def parse_one(self, sql, **kwargs):
return parse_one(sql, read=self.dialect, **kwargs)
- def validate_identity(self, sql, write_sql=None, pretty=False, check_command_warning=False):
+ def validate_identity(
+ self, sql, write_sql=None, pretty=False, check_command_warning=False, identify=False
+ ):
if check_command_warning:
with self.assertLogs(parser_logger) as cm:
expression = self.parse_one(sql)
@@ -28,7 +30,9 @@ class Validator(unittest.TestCase):
else:
expression = self.parse_one(sql)
- self.assertEqual(write_sql or sql, expression.sql(dialect=self.dialect, pretty=pretty))
+ self.assertEqual(
+ write_sql or sql, expression.sql(dialect=self.dialect, pretty=pretty, identify=identify)
+ )
return expression
def validate_all(self, sql, read=None, write=None, pretty=False, identify=False):
@@ -102,14 +106,10 @@ class TestDialect(Validator):
lowercase_mysql = Dialect.get_or_raise("mysql, normalization_strategy = lowercase")
self.assertEqual(lowercase_mysql.normalization_strategy.value, "LOWERCASE")
- with self.assertRaises(ValueError) as cm:
+ with self.assertRaises(AttributeError) as cm:
Dialect.get_or_raise("mysql, normalization_strategy")
- self.assertEqual(
- str(cm.exception),
- "Invalid dialect format: 'mysql, normalization_strategy'. "
- "Please use the correct format: 'dialect [, k1 = v2 [, ...]]'.",
- )
+ self.assertEqual(str(cm.exception), "'bool' object has no attribute 'upper'")
with self.assertRaises(ValueError) as cm:
Dialect.get_or_raise("myqsl")
@@ -121,6 +121,18 @@ class TestDialect(Validator):
self.assertEqual(str(cm.exception), "Unknown dialect 'asdfjasodiufjsd'.")
+ oracle_with_settings = Dialect.get_or_raise(
+ "oracle, normalization_strategy = lowercase, version = 19.5"
+ )
+ self.assertEqual(oracle_with_settings.normalization_strategy.value, "LOWERCASE")
+ self.assertEqual(oracle_with_settings.settings, {"version": "19.5"})
+
+ bool_settings = Dialect.get_or_raise("oracle, s1=TruE, s2=1, s3=FaLse, s4=0, s5=nonbool")
+ self.assertEqual(
+ bool_settings.settings,
+ {"s1": True, "s2": True, "s3": False, "s4": False, "s5": "nonbool"},
+ )
+
def test_compare_dialects(self):
bigquery_class = Dialect["bigquery"]
bigquery_object = BigQuery()
@@ -152,7 +164,7 @@ class TestDialect(Validator):
"CAST(a AS TEXT)",
write={
"bigquery": "CAST(a AS STRING)",
- "clickhouse": "CAST(a AS String)",
+ "clickhouse": "CAST(a AS Nullable(String))",
"drill": "CAST(a AS VARCHAR)",
"duckdb": "CAST(a AS TEXT)",
"materialize": "CAST(a AS TEXT)",
@@ -173,7 +185,7 @@ class TestDialect(Validator):
"CAST(a AS BINARY(4))",
write={
"bigquery": "CAST(a AS BYTES)",
- "clickhouse": "CAST(a AS BINARY(4))",
+ "clickhouse": "CAST(a AS Nullable(BINARY(4)))",
"drill": "CAST(a AS VARBINARY(4))",
"duckdb": "CAST(a AS BLOB(4))",
"materialize": "CAST(a AS BYTEA(4))",
@@ -193,7 +205,7 @@ class TestDialect(Validator):
"CAST(a AS VARBINARY(4))",
write={
"bigquery": "CAST(a AS BYTES)",
- "clickhouse": "CAST(a AS String)",
+ "clickhouse": "CAST(a AS Nullable(String))",
"duckdb": "CAST(a AS BLOB(4))",
"materialize": "CAST(a AS BYTEA(4))",
"mysql": "CAST(a AS VARBINARY(4))",
@@ -211,19 +223,19 @@ class TestDialect(Validator):
self.validate_all(
"CAST(MAP('a', '1') AS MAP(TEXT, TEXT))",
write={
- "clickhouse": "CAST(map('a', '1') AS Map(String, String))",
+ "clickhouse": "CAST(map('a', '1') AS Map(String, Nullable(String)))",
},
)
self.validate_all(
"CAST(ARRAY(1, 2) AS ARRAY<TINYINT>)",
write={
- "clickhouse": "CAST([1, 2] AS Array(Int8))",
+ "clickhouse": "CAST([1, 2] AS Array(Nullable(Int8)))",
},
)
self.validate_all(
- "CAST((1, 2) AS STRUCT<a: TINYINT, b: SMALLINT, c: INT, d: BIGINT>)",
+ "CAST((1, 2, 3, 4) AS STRUCT<a: TINYINT, b: SMALLINT, c: INT, d: BIGINT>)",
write={
- "clickhouse": "CAST((1, 2) AS Tuple(a Int8, b Int16, c Int32, d Int64))",
+ "clickhouse": "CAST((1, 2, 3, 4) AS Tuple(a Nullable(Int8), b Nullable(Int16), c Nullable(Int32), d Nullable(Int64)))",
},
)
self.validate_all(
@@ -320,19 +332,9 @@ class TestDialect(Validator):
"redshift": "CAST(a AS DOUBLE PRECISION)",
},
write={
- "duckdb": "CAST(a AS DOUBLE)",
- "drill": "CAST(a AS DOUBLE)",
- "postgres": "CAST(a AS DOUBLE PRECISION)",
- "redshift": "CAST(a AS DOUBLE PRECISION)",
- "doris": "CAST(a AS DOUBLE)",
- },
- )
-
- self.validate_all(
- "CAST(a AS DOUBLE)",
- write={
"bigquery": "CAST(a AS FLOAT64)",
- "clickhouse": "CAST(a AS Float64)",
+ "clickhouse": "CAST(a AS Nullable(Float64))",
+ "doris": "CAST(a AS DOUBLE)",
"drill": "CAST(a AS DOUBLE)",
"duckdb": "CAST(a AS DOUBLE)",
"materialize": "CAST(a AS DOUBLE PRECISION)",
@@ -584,7 +586,7 @@ class TestDialect(Validator):
"hive": "CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, 'yyyy-MM-ddTHH:mm:ss')) AS TIMESTAMP)",
"presto": "DATE_PARSE(x, '%Y-%m-%dT%T')",
"drill": "TO_TIMESTAMP(x, 'yyyy-MM-dd''T''HH:mm:ss')",
- "redshift": "TO_TIMESTAMP(x, 'YYYY-MM-DDTHH:MI:SS')",
+ "redshift": "TO_TIMESTAMP(x, 'YYYY-MM-DDTHH24:MI:SS')",
"spark": "TO_TIMESTAMP(x, 'yyyy-MM-ddTHH:mm:ss')",
},
)
@@ -620,7 +622,7 @@ class TestDialect(Validator):
write={
"duckdb": "EPOCH(STRPTIME('2020-01-01', '%Y-%m-%d'))",
"hive": "UNIX_TIMESTAMP('2020-01-01', 'yyyy-MM-dd')",
- "presto": "TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST('2020-01-01' AS VARCHAR), '%Y-%m-%d')), PARSE_DATETIME(CAST('2020-01-01' AS VARCHAR), 'yyyy-MM-dd')))",
+ "presto": "TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST('2020-01-01' AS VARCHAR), '%Y-%m-%d')), PARSE_DATETIME(DATE_FORMAT(CAST('2020-01-01' AS TIMESTAMP), '%Y-%m-%d'), 'yyyy-MM-dd')))",
"starrocks": "UNIX_TIMESTAMP('2020-01-01', '%Y-%m-%d')",
"doris": "UNIX_TIMESTAMP('2020-01-01', '%Y-%m-%d')",
},
@@ -639,8 +641,18 @@ class TestDialect(Validator):
self.validate_all(
"TIME_STR_TO_TIME('2020-01-01')",
write={
- "drill": "CAST('2020-01-01' AS TIMESTAMP)",
+ "bigquery": "CAST('2020-01-01' AS DATETIME)",
+ "databricks": "CAST('2020-01-01' AS TIMESTAMP)",
"duckdb": "CAST('2020-01-01' AS TIMESTAMP)",
+ "tsql": "CAST('2020-01-01' AS DATETIME2)",
+ "mysql": "CAST('2020-01-01' AS DATETIME)",
+ "postgres": "CAST('2020-01-01' AS TIMESTAMP)",
+ "redshift": "CAST('2020-01-01' AS TIMESTAMP)",
+ "snowflake": "CAST('2020-01-01' AS TIMESTAMP)",
+ "spark": "CAST('2020-01-01' AS TIMESTAMP)",
+ "trino": "CAST('2020-01-01' AS TIMESTAMP)",
+ "clickhouse": "CAST('2020-01-01' AS Nullable(DateTime))",
+ "drill": "CAST('2020-01-01' AS TIMESTAMP)",
"hive": "CAST('2020-01-01' AS TIMESTAMP)",
"presto": "CAST('2020-01-01' AS TIMESTAMP)",
"sqlite": "'2020-01-01'",
@@ -648,6 +660,64 @@ class TestDialect(Validator):
},
)
self.validate_all(
+ "TIME_STR_TO_TIME('2020-01-01 12:13:14.123456+00:00')",
+ write={
+ "mysql": "CAST('2020-01-01 12:13:14.123456+00:00' AS DATETIME(6))",
+ "trino": "CAST('2020-01-01 12:13:14.123456+00:00' AS TIMESTAMP(6))",
+ "presto": "CAST('2020-01-01 12:13:14.123456+00:00' AS TIMESTAMP)",
+ },
+ )
+ self.validate_all(
+ "TIME_STR_TO_TIME('2020-01-01 12:13:14.123-08:00', 'America/Los_Angeles')",
+ write={
+ "mysql": "TIMESTAMP('2020-01-01 12:13:14.123-08:00')",
+ "trino": "CAST('2020-01-01 12:13:14.123-08:00' AS TIMESTAMP(3) WITH TIME ZONE)",
+ "presto": "CAST('2020-01-01 12:13:14.123-08:00' AS TIMESTAMP WITH TIME ZONE)",
+ },
+ )
+ self.validate_all(
+ "TIME_STR_TO_TIME('2020-01-01 12:13:14-08:00', 'America/Los_Angeles')",
+ write={
+ "bigquery": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)",
+ "databricks": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)",
+ "duckdb": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMPTZ)",
+ "tsql": "CAST('2020-01-01 12:13:14-08:00' AS DATETIMEOFFSET) AT TIME ZONE 'UTC'",
+ "mysql": "TIMESTAMP('2020-01-01 12:13:14-08:00')",
+ "postgres": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMPTZ)",
+ "redshift": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP WITH TIME ZONE)",
+ "snowflake": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMPTZ)",
+ "spark": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)",
+ "trino": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP WITH TIME ZONE)",
+ "clickhouse": "CAST('2020-01-01 12:13:14' AS Nullable(DateTime('America/Los_Angeles')))",
+ "drill": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)",
+ "hive": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)",
+ "presto": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP WITH TIME ZONE)",
+ "sqlite": "'2020-01-01 12:13:14-08:00'",
+ "doris": "CAST('2020-01-01 12:13:14-08:00' AS DATETIME)",
+ },
+ )
+ self.validate_all(
+ "TIME_STR_TO_TIME(col, 'America/Los_Angeles')",
+ write={
+ "bigquery": "CAST(col AS TIMESTAMP)",
+ "databricks": "CAST(col AS TIMESTAMP)",
+ "duckdb": "CAST(col AS TIMESTAMPTZ)",
+ "tsql": "CAST(col AS DATETIMEOFFSET) AT TIME ZONE 'UTC'",
+ "mysql": "TIMESTAMP(col)",
+ "postgres": "CAST(col AS TIMESTAMPTZ)",
+ "redshift": "CAST(col AS TIMESTAMP WITH TIME ZONE)",
+ "snowflake": "CAST(col AS TIMESTAMPTZ)",
+ "spark": "CAST(col AS TIMESTAMP)",
+ "trino": "CAST(col AS TIMESTAMP WITH TIME ZONE)",
+ "clickhouse": "CAST(col AS Nullable(DateTime('America/Los_Angeles')))",
+ "drill": "CAST(col AS TIMESTAMP)",
+ "hive": "CAST(col AS TIMESTAMP)",
+ "presto": "CAST(col AS TIMESTAMP WITH TIME ZONE)",
+ "sqlite": "col",
+ "doris": "CAST(col AS DATETIME)",
+ },
+ )
+ self.validate_all(
"TIME_STR_TO_UNIX('2020-01-01')",
write={
"duckdb": "EPOCH(CAST('2020-01-01' AS TIMESTAMP))",
@@ -673,6 +743,13 @@ class TestDialect(Validator):
},
)
self.validate_all(
+ "TIME_TO_STR(a, '%Y-%m-%d %H:%M:%S.%f')",
+ write={
+ "redshift": "TO_CHAR(a, 'YYYY-MM-DD HH24:MI:SS.US')",
+ "tsql": "FORMAT(a, 'yyyy-MM-dd HH:mm:ss.ffffff')",
+ },
+ )
+ self.validate_all(
"TIME_TO_TIME_STR(x)",
write={
"drill": "CAST(x AS VARCHAR)",
@@ -1150,13 +1227,19 @@ class TestDialect(Validator):
write={
"bigquery": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname",
"duckdb": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC, lname NULLS FIRST",
- "oracle": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname",
"presto": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC, lname NULLS FIRST",
"hive": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname",
"spark": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname",
},
)
+ order_by_all_sql = "SELECT * FROM t ORDER BY ALL"
+ self.validate_identity(order_by_all_sql).find(exp.Ordered).this.assert_is(exp.Column)
+
+ for dialect in ("duckdb", "spark", "databricks"):
+ with self.subTest(f"Testing ORDER BY ALL in {dialect}"):
+ parse_one(order_by_all_sql, read=dialect).find(exp.Ordered).this.assert_is(exp.Var)
+
def test_json(self):
self.validate_all(
"""JSON_EXTRACT(x, '$["a b"]')""",
@@ -1329,6 +1412,13 @@ class TestDialect(Validator):
},
)
+ for dialect in ("duckdb", "starrocks"):
+ with self.subTest(f"Generating json extraction with digit-prefixed key ({dialect})"):
+ self.assertEqual(
+ parse_one("""select '{"0": "v"}' -> '0'""", read=dialect).sql(dialect=dialect),
+ """SELECT '{"0": "v"}' -> '0'""",
+ )
+
def test_cross_join(self):
self.validate_all(
"SELECT a FROM x CROSS JOIN UNNEST(y) AS t (a)",
@@ -1343,7 +1433,7 @@ class TestDialect(Validator):
write={
"drill": "SELECT a, b FROM x CROSS JOIN UNNEST(y, z) AS t(a, b)",
"presto": "SELECT a, b FROM x CROSS JOIN UNNEST(y, z) AS t(a, b)",
- "spark": "SELECT a, b FROM x LATERAL VIEW EXPLODE(y) t AS a LATERAL VIEW EXPLODE(z) t AS b",
+ "spark": "SELECT a, b FROM x LATERAL VIEW INLINE(ARRAYS_ZIP(y, z)) t AS a, b",
},
)
self.validate_all(
@@ -1409,12 +1499,14 @@ class TestDialect(Validator):
"SELECT * FROM a INTERSECT SELECT * FROM b",
read={
"bigquery": "SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b",
+ "clickhouse": "SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b",
"duckdb": "SELECT * FROM a INTERSECT SELECT * FROM b",
"presto": "SELECT * FROM a INTERSECT SELECT * FROM b",
"spark": "SELECT * FROM a INTERSECT SELECT * FROM b",
},
write={
"bigquery": "SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b",
+ "clickhouse": "SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b",
"duckdb": "SELECT * FROM a INTERSECT SELECT * FROM b",
"presto": "SELECT * FROM a INTERSECT SELECT * FROM b",
"spark": "SELECT * FROM a INTERSECT SELECT * FROM b",
@@ -1424,12 +1516,14 @@ class TestDialect(Validator):
"SELECT * FROM a EXCEPT SELECT * FROM b",
read={
"bigquery": "SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b",
+ "clickhouse": "SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b",
"duckdb": "SELECT * FROM a EXCEPT SELECT * FROM b",
"presto": "SELECT * FROM a EXCEPT SELECT * FROM b",
"spark": "SELECT * FROM a EXCEPT SELECT * FROM b",
},
write={
"bigquery": "SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b",
+ "clickhouse": "SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b",
"duckdb": "SELECT * FROM a EXCEPT SELECT * FROM b",
"presto": "SELECT * FROM a EXCEPT SELECT * FROM b",
"spark": "SELECT * FROM a EXCEPT SELECT * FROM b",
@@ -1448,6 +1542,7 @@ class TestDialect(Validator):
"SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b",
write={
"bigquery": "SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b",
+ "clickhouse": "SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b",
"duckdb": "SELECT * FROM a INTERSECT SELECT * FROM b",
"presto": "SELECT * FROM a INTERSECT SELECT * FROM b",
"spark": "SELECT * FROM a INTERSECT SELECT * FROM b",
@@ -1457,6 +1552,7 @@ class TestDialect(Validator):
"SELECT * FROM a INTERSECT ALL SELECT * FROM b",
write={
"bigquery": "SELECT * FROM a INTERSECT ALL SELECT * FROM b",
+ "clickhouse": "SELECT * FROM a INTERSECT SELECT * FROM b",
"duckdb": "SELECT * FROM a INTERSECT ALL SELECT * FROM b",
"presto": "SELECT * FROM a INTERSECT ALL SELECT * FROM b",
"spark": "SELECT * FROM a INTERSECT ALL SELECT * FROM b",
@@ -1466,6 +1562,7 @@ class TestDialect(Validator):
"SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b",
write={
"bigquery": "SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b",
+ "clickhouse": "SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b",
"duckdb": "SELECT * FROM a EXCEPT SELECT * FROM b",
"presto": "SELECT * FROM a EXCEPT SELECT * FROM b",
"spark": "SELECT * FROM a EXCEPT SELECT * FROM b",
@@ -1475,6 +1572,7 @@ class TestDialect(Validator):
"SELECT * FROM a EXCEPT ALL SELECT * FROM b",
read={
"bigquery": "SELECT * FROM a EXCEPT ALL SELECT * FROM b",
+ "clickhouse": "SELECT * FROM a EXCEPT ALL SELECT * FROM b",
"duckdb": "SELECT * FROM a EXCEPT ALL SELECT * FROM b",
"presto": "SELECT * FROM a EXCEPT ALL SELECT * FROM b",
"spark": "SELECT * FROM a EXCEPT ALL SELECT * FROM b",
@@ -1690,6 +1788,13 @@ class TestDialect(Validator):
},
)
self.validate_all(
+ "FILTER(the_array, x -> x > 0)",
+ write={
+ "presto": "FILTER(the_array, x -> x > 0)",
+ "starrocks": "ARRAY_FILTER(the_array, x -> x > 0)",
+ },
+ )
+ self.validate_all(
"a / b",
write={
"bigquery": "a / b",
@@ -2101,13 +2206,24 @@ SELECT
},
)
+ # needs to preserve the target alias in then WHEN condition but not in the THEN clause
+ self.validate_all(
+ """MERGE INTO foo AS target USING (SELECT a, b FROM tbl) AS src ON src.a = target.a
+ WHEN MATCHED AND target.a <> src.a THEN UPDATE SET target.b = 'FOO'
+ WHEN NOT MATCHED THEN INSERT (target.a, target.b) VALUES (src.a, src.b)""",
+ write={
+ "trino": """MERGE INTO foo AS target USING (SELECT a, b FROM tbl) AS src ON src.a = target.a WHEN MATCHED AND target.a <> src.a THEN UPDATE SET b = 'FOO' WHEN NOT MATCHED THEN INSERT (a, b) VALUES (src.a, src.b)""",
+ "postgres": """MERGE INTO foo AS target USING (SELECT a, b FROM tbl) AS src ON src.a = target.a WHEN MATCHED AND target.a <> src.a THEN UPDATE SET b = 'FOO' WHEN NOT MATCHED THEN INSERT (a, b) VALUES (src.a, src.b)""",
+ },
+ )
+
def test_substring(self):
self.validate_all(
"SUBSTR('123456', 2, 3)",
write={
- "bigquery": "SUBSTR('123456', 2, 3)",
+ "bigquery": "SUBSTRING('123456', 2, 3)",
"oracle": "SUBSTR('123456', 2, 3)",
- "postgres": "SUBSTR('123456', 2, 3)",
+ "postgres": "SUBSTRING('123456' FROM 2 FOR 3)",
},
)
self.validate_all(
@@ -2260,11 +2376,11 @@ SELECT
write={
"duckdb": "SELECT * FROM t QUALIFY COUNT(*) OVER () > 1",
"snowflake": "SELECT * FROM t QUALIFY COUNT(*) OVER () > 1",
- "clickhouse": "SELECT * FROM (SELECT *, COUNT(*) OVER () AS _w FROM t) AS _t WHERE _w > 1",
+ "clickhouse": "SELECT * FROM t QUALIFY COUNT(*) OVER () > 1",
"mysql": "SELECT * FROM (SELECT *, COUNT(*) OVER () AS _w FROM t) AS _t WHERE _w > 1",
"oracle": "SELECT * FROM (SELECT *, COUNT(*) OVER () AS _w FROM t) _t WHERE _w > 1",
"postgres": "SELECT * FROM (SELECT *, COUNT(*) OVER () AS _w FROM t) AS _t WHERE _w > 1",
- "tsql": "SELECT * FROM (SELECT *, COUNT(*) OVER () AS _w FROM t) AS _t WHERE _w > 1",
+ "tsql": "SELECT * FROM (SELECT *, COUNT_BIG(*) OVER () AS _w FROM t) AS _t WHERE _w > 1",
},
)
self.validate_all(
@@ -2272,11 +2388,11 @@ SELECT
write={
"duckdb": 'SELECT "user id", some_id, 1 AS other_id, 2 AS "2 nd id" FROM t QUALIFY COUNT(*) OVER () > 1',
"snowflake": 'SELECT "user id", some_id, 1 AS other_id, 2 AS "2 nd id" FROM t QUALIFY COUNT(*) OVER () > 1',
- "clickhouse": 'SELECT "user id", some_id, other_id, "2 nd id" FROM (SELECT "user id", some_id, 1 AS other_id, 2 AS "2 nd id", COUNT(*) OVER () AS _w FROM t) AS _t WHERE _w > 1',
+ "clickhouse": 'SELECT "user id", some_id, 1 AS other_id, 2 AS "2 nd id" FROM t QUALIFY COUNT(*) OVER () > 1',
"mysql": "SELECT `user id`, some_id, other_id, `2 nd id` FROM (SELECT `user id`, some_id, 1 AS other_id, 2 AS `2 nd id`, COUNT(*) OVER () AS _w FROM t) AS _t WHERE _w > 1",
"oracle": 'SELECT "user id", some_id, other_id, "2 nd id" FROM (SELECT "user id", some_id, 1 AS other_id, 2 AS "2 nd id", COUNT(*) OVER () AS _w FROM t) _t WHERE _w > 1',
"postgres": 'SELECT "user id", some_id, other_id, "2 nd id" FROM (SELECT "user id", some_id, 1 AS other_id, 2 AS "2 nd id", COUNT(*) OVER () AS _w FROM t) AS _t WHERE _w > 1',
- "tsql": "SELECT [user id], some_id, other_id, [2 nd id] FROM (SELECT [user id] AS [user id], some_id AS some_id, 1 AS other_id, 2 AS [2 nd id], COUNT(*) OVER () AS _w FROM t) AS _t WHERE _w > 1",
+ "tsql": "SELECT [user id], some_id, other_id, [2 nd id] FROM (SELECT [user id] AS [user id], some_id AS some_id, 1 AS other_id, 2 AS [2 nd id], COUNT_BIG(*) OVER () AS _w FROM t) AS _t WHERE _w > 1",
},
)
@@ -2559,3 +2675,219 @@ FROM subquery2""",
"""SELECT partition.d FROM t PARTITION (d)""",
"""SELECT partition.d FROM t AS PARTITION(d)""",
)
+
+ def test_string_functions(self):
+ for pad_func in ("LPAD", "RPAD"):
+ ch_alias = "LEFTPAD" if pad_func == "LPAD" else "RIGHTPAD"
+ for fill_pattern in ("", ", ' '"):
+ with self.subTest(f"Testing {pad_func}() with pattern {fill_pattern}"):
+ self.validate_all(
+ f"SELECT {pad_func}('bar', 5{fill_pattern})",
+ read={
+ "snowflake": f"SELECT {pad_func}('bar', 5{fill_pattern})",
+ "databricks": f"SELECT {pad_func}('bar', 5{fill_pattern})",
+ "spark": f"SELECT {pad_func}('bar', 5{fill_pattern})",
+ "postgres": f"SELECT {pad_func}('bar', 5{fill_pattern})",
+ "clickhouse": f"SELECT {ch_alias}('bar', 5{fill_pattern})",
+ },
+ write={
+ "": f"SELECT {pad_func}('bar', 5{fill_pattern})",
+ "spark": f"SELECT {pad_func}('bar', 5{fill_pattern})",
+ "postgres": f"SELECT {pad_func}('bar', 5{fill_pattern})",
+ "clickhouse": f"SELECT {pad_func}('bar', 5{fill_pattern})",
+ "snowflake": f"SELECT {pad_func}('bar', 5{fill_pattern})",
+ "databricks": f"SELECT {pad_func}('bar', 5{fill_pattern})",
+ "duckdb": f"SELECT {pad_func}('bar', 5, ' ')",
+ "mysql": f"SELECT {pad_func}('bar', 5, ' ')",
+ "hive": f"SELECT {pad_func}('bar', 5, ' ')",
+ "spark2": f"SELECT {pad_func}('bar', 5, ' ')",
+ "presto": f"SELECT {pad_func}('bar', 5, ' ')",
+ "trino": f"SELECT {pad_func}('bar', 5, ' ')",
+ },
+ )
+
+ def test_generate_date_array(self):
+ self.validate_all(
+ "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
+ write={
+ "bigquery": "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), INTERVAL '1' WEEK))",
+ "databricks": "SELECT * FROM EXPLODE(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), INTERVAL '1' WEEK))",
+ "duckdb": "SELECT * FROM UNNEST(CAST(GENERATE_SERIES(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (7 * INTERVAL '1' DAY)) AS DATE[]))",
+ "mysql": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) FROM _generated_dates WHERE CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates",
+ "postgres": "SELECT * FROM (SELECT CAST(value AS DATE) FROM GENERATE_SERIES(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), INTERVAL '1 WEEK') AS value) AS _unnested_generate_series",
+ "presto": "SELECT * FROM UNNEST(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (1 * INTERVAL '7' DAY)))",
+ "redshift": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates",
+ "snowflake": "SELECT * FROM (SELECT DATEADD(WEEK, CAST(value AS INT), CAST('2020-01-01' AS DATE)) AS value FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(WEEK, CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE)) + 1 - 1) + 1))) AS _u(seq, key, path, index, value, this))",
+ "spark": "SELECT * FROM EXPLODE(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), INTERVAL '1' WEEK))",
+ "trino": "SELECT * FROM UNNEST(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (1 * INTERVAL '7' DAY)))",
+ "tsql": "WITH _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_value AS date_value FROM _generated_dates) AS _generated_dates",
+ },
+ )
+ self.validate_all(
+ "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
+ write={
+ "mysql": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) FROM _generated_dates WHERE CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) <= CAST('2020-02-01' AS DATE)), dates AS (SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates) SELECT * FROM dates",
+ "redshift": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)), dates AS (SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates) SELECT * FROM dates",
+ "tsql": "WITH _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)), dates AS (SELECT * FROM (SELECT date_value AS date_value FROM _generated_dates) AS _generated_dates) SELECT * FROM dates",
+ },
+ )
+ self.validate_all(
+ "WITH dates1 AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))), dates2 AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-03-01', INTERVAL 1 MONTH))) SELECT * FROM dates1 CROSS JOIN dates2",
+ write={
+ "mysql": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) FROM _generated_dates WHERE CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) <= CAST('2020-02-01' AS DATE)), _generated_dates_1(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATE_ADD(date_value, INTERVAL 1 MONTH) AS DATE) FROM _generated_dates_1 WHERE CAST(DATE_ADD(date_value, INTERVAL 1 MONTH) AS DATE) <= CAST('2020-03-01' AS DATE)), dates1 AS (SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates), dates2 AS (SELECT * FROM (SELECT date_value FROM _generated_dates_1) AS _generated_dates_1) SELECT * FROM dates1 CROSS JOIN dates2",
+ "redshift": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)), _generated_dates_1(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(MONTH, 1, date_value) AS DATE) FROM _generated_dates_1 WHERE CAST(DATEADD(MONTH, 1, date_value) AS DATE) <= CAST('2020-03-01' AS DATE)), dates1 AS (SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates), dates2 AS (SELECT * FROM (SELECT date_value FROM _generated_dates_1) AS _generated_dates_1) SELECT * FROM dates1 CROSS JOIN dates2",
+ "tsql": "WITH _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)), _generated_dates_1(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(MONTH, 1, date_value) AS DATE) FROM _generated_dates_1 WHERE CAST(DATEADD(MONTH, 1, date_value) AS DATE) <= CAST('2020-03-01' AS DATE)), dates1 AS (SELECT * FROM (SELECT date_value AS date_value FROM _generated_dates) AS _generated_dates), dates2 AS (SELECT * FROM (SELECT date_value AS date_value FROM _generated_dates_1) AS _generated_dates_1) SELECT * FROM dates1 CROSS JOIN dates2",
+ },
+ )
+ self.validate_all(
+ "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK)) AS _q(date_week)",
+ write={
+ "mysql": "WITH RECURSIVE _generated_dates(date_week) AS (SELECT CAST('2020-01-01' AS DATE) AS date_week UNION ALL SELECT CAST(DATE_ADD(date_week, INTERVAL 1 WEEK) AS DATE) FROM _generated_dates WHERE CAST(DATE_ADD(date_week, INTERVAL 1 WEEK) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_week FROM _generated_dates) AS _generated_dates",
+ "redshift": "WITH RECURSIVE _generated_dates(date_week) AS (SELECT CAST('2020-01-01' AS DATE) AS date_week UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_week) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_week) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_week FROM _generated_dates) AS _generated_dates",
+ "snowflake": "SELECT * FROM (SELECT DATEADD(WEEK, CAST(date_week AS INT), CAST('2020-01-01' AS DATE)) AS date_week FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(WEEK, CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE)) + 1 - 1) + 1))) AS _q(seq, key, path, index, date_week, this)) AS _q(date_week)",
+ "tsql": "WITH _generated_dates(date_week) AS (SELECT CAST('2020-01-01' AS DATE) AS date_week UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_week) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_week) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_week AS date_week FROM _generated_dates) AS _generated_dates",
+ },
+ )
+
+ def test_set_operation_specifiers(self):
+ self.validate_all(
+ "SELECT 1 EXCEPT ALL SELECT 1",
+ write={
+ "": "SELECT 1 EXCEPT ALL SELECT 1",
+ "bigquery": UnsupportedError,
+ "clickhouse": "SELECT 1 EXCEPT SELECT 1",
+ "databricks": "SELECT 1 EXCEPT ALL SELECT 1",
+ "duckdb": "SELECT 1 EXCEPT ALL SELECT 1",
+ "mysql": "SELECT 1 EXCEPT ALL SELECT 1",
+ "oracle": "SELECT 1 EXCEPT ALL SELECT 1",
+ "postgres": "SELECT 1 EXCEPT ALL SELECT 1",
+ "presto": UnsupportedError,
+ "redshift": UnsupportedError,
+ "snowflake": UnsupportedError,
+ "spark": "SELECT 1 EXCEPT ALL SELECT 1",
+ "sqlite": UnsupportedError,
+ "starrocks": UnsupportedError,
+ "trino": UnsupportedError,
+ "tsql": UnsupportedError,
+ },
+ )
+
+ def test_normalize(self):
+ for form in ("", ", nfkc"):
+ with self.subTest(f"Testing NORMALIZE('str'{form}) roundtrip"):
+ self.validate_all(
+ f"SELECT NORMALIZE('str'{form})",
+ read={
+ "presto": f"SELECT NORMALIZE('str'{form})",
+ "trino": f"SELECT NORMALIZE('str'{form})",
+ "bigquery": f"SELECT NORMALIZE('str'{form})",
+ },
+ write={
+ "presto": f"SELECT NORMALIZE('str'{form})",
+ "trino": f"SELECT NORMALIZE('str'{form})",
+ "bigquery": f"SELECT NORMALIZE('str'{form})",
+ },
+ )
+
+ self.assertIsInstance(parse_one("NORMALIZE('str', NFD)").args.get("form"), exp.Var)
+
+ def test_coalesce(self):
+ """
+ Validate that "expressions" is a list for all the exp.Coalesce instances; This is important
+ as some optimizer rules are coalesce specific and will iterate on "expressions"
+ """
+
+ # Check the 2-arg aliases
+ for func in ("COALESCE", "IFNULL", "NVL"):
+ self.assertIsInstance(self.parse_one(f"{func}(1, 2)").expressions, list)
+
+ # Check the varlen case
+ coalesce = self.parse_one("COALESCE(x, y, z)")
+ self.assertIsInstance(coalesce.expressions, list)
+ self.assertIsNone(coalesce.args.get("is_nvl"))
+
+ # Check Oracle's NVL which is decoupled from COALESCE
+ oracle_nvl = parse_one("NVL(x, y)", read="oracle")
+ self.assertIsInstance(oracle_nvl.expressions, list)
+ self.assertTrue(oracle_nvl.args.get("is_nvl"))
+
+ # Check T-SQL's ISNULL which is parsed into exp.Coalesce
+ self.assertIsInstance(parse_one("ISNULL(x, y)", read="tsql").expressions, list)
+
+ def test_trim(self):
+ self.validate_all(
+ "TRIM('abc', 'a')",
+ read={
+ "bigquery": "TRIM('abc', 'a')",
+ "snowflake": "TRIM('abc', 'a')",
+ },
+ write={
+ "bigquery": "TRIM('abc', 'a')",
+ "snowflake": "TRIM('abc', 'a')",
+ },
+ )
+
+ self.validate_all(
+ "LTRIM('Hello World', 'H')",
+ read={
+ "oracle": "LTRIM('Hello World', 'H')",
+ "clickhouse": "TRIM(LEADING 'H' FROM 'Hello World')",
+ "snowflake": "LTRIM('Hello World', 'H')",
+ "bigquery": "LTRIM('Hello World', 'H')",
+ "": "LTRIM('Hello World', 'H')",
+ },
+ write={
+ "clickhouse": "TRIM(LEADING 'H' FROM 'Hello World')",
+ "oracle": "LTRIM('Hello World', 'H')",
+ "snowflake": "LTRIM('Hello World', 'H')",
+ "bigquery": "LTRIM('Hello World', 'H')",
+ },
+ )
+
+ self.validate_all(
+ "RTRIM('Hello World', 'd')",
+ read={
+ "clickhouse": "TRIM(TRAILING 'd' FROM 'Hello World')",
+ "oracle": "RTRIM('Hello World', 'd')",
+ "snowflake": "RTRIM('Hello World', 'd')",
+ "bigquery": "RTRIM('Hello World', 'd')",
+ "": "RTRIM('Hello World', 'd')",
+ },
+ write={
+ "clickhouse": "TRIM(TRAILING 'd' FROM 'Hello World')",
+ "oracle": "RTRIM('Hello World', 'd')",
+ "snowflake": "RTRIM('Hello World', 'd')",
+ "bigquery": "RTRIM('Hello World', 'd')",
+ },
+ )
+
+ def test_uuid(self):
+ self.validate_all(
+ "UUID()",
+ read={
+ "hive": "UUID()",
+ "spark2": "UUID()",
+ "spark": "UUID()",
+ "databricks": "UUID()",
+ "duckdb": "UUID()",
+ "presto": "UUID()",
+ "trino": "UUID()",
+ "mysql": "UUID()",
+ "postgres": "GEN_RANDOM_UUID()",
+ "bigquery": "GENERATE_UUID()",
+ "snowflake": "UUID_STRING()",
+ },
+ write={
+ "hive": "UUID()",
+ "spark2": "UUID()",
+ "spark": "UUID()",
+ "databricks": "UUID()",
+ "duckdb": "UUID()",
+ "presto": "UUID()",
+ "trino": "UUID()",
+ "mysql": "UUID()",
+ "postgres": "GEN_RANDOM_UUID()",
+ "bigquery": "GENERATE_UUID()",
+ "snowflake": "UUID_STRING()",
+ },
+ )
diff --git a/tests/dialects/test_doris.py b/tests/dialects/test_doris.py
index 8180d05..99076ba 100644
--- a/tests/dialects/test_doris.py
+++ b/tests/dialects/test_doris.py
@@ -56,6 +56,34 @@ class TestDoris(Validator):
"postgres": "SELECT STRING_AGG('aa', ',')",
},
)
+ self.validate_all(
+ "SELECT LAG(1, 1, NULL) OVER (ORDER BY 1)",
+ read={
+ "doris": "SELECT LAG(1, 1, NULL) OVER (ORDER BY 1)",
+ "postgres": "SELECT LAG(1) OVER (ORDER BY 1)",
+ },
+ )
+ self.validate_all(
+ "SELECT LAG(1, 2, NULL) OVER (ORDER BY 1)",
+ read={
+ "doris": "SELECT LAG(1, 2, NULL) OVER (ORDER BY 1)",
+ "postgres": "SELECT LAG(1, 2) OVER (ORDER BY 1)",
+ },
+ )
+ self.validate_all(
+ "SELECT LEAD(1, 1, NULL) OVER (ORDER BY 1)",
+ read={
+ "doris": "SELECT LEAD(1, 1, NULL) OVER (ORDER BY 1)",
+ "postgres": "SELECT LEAD(1) OVER (ORDER BY 1)",
+ },
+ )
+ self.validate_all(
+ "SELECT LEAD(1, 2, NULL) OVER (ORDER BY 1)",
+ read={
+ "doris": "SELECT LEAD(1, 2, NULL) OVER (ORDER BY 1)",
+ "postgres": "SELECT LEAD(1, 2) OVER (ORDER BY 1)",
+ },
+ )
def test_identity(self):
self.validate_identity("COALECSE(a, b, c, d)")
diff --git a/tests/dialects/test_duckdb.py b/tests/dialects/test_duckdb.py
index 2bde478..e4788ec 100644
--- a/tests/dialects/test_duckdb.py
+++ b/tests/dialects/test_duckdb.py
@@ -18,36 +18,35 @@ class TestDuckDB(Validator):
"WITH _data AS (SELECT [STRUCT(1 AS a, 2 AS b), STRUCT(2 AS a, 3 AS b)] AS col) SELECT col.b FROM _data, UNNEST(_data.col) AS col WHERE col.a = 1",
)
- self.validate_all(
- "SELECT straight_join",
- write={
- "duckdb": "SELECT straight_join",
- "mysql": "SELECT `straight_join`",
- },
+ struct_array_type = exp.maybe_parse(
+ "STRUCT(k TEXT, v STRUCT(v_str TEXT, v_int INT, v_int_arr INT[]))[]",
+ into=exp.DataType,
+ dialect="duckdb",
+ )
+ self.assertEqual(
+ struct_array_type.sql("duckdb"),
+ "STRUCT(k TEXT, v STRUCT(v_str TEXT, v_int INT, v_int_arr INT[]))[]",
)
+
self.validate_all(
- "SELECT CAST('2020-01-01 12:05:01' AS TIMESTAMP)",
- read={
- "duckdb": "SELECT CAST('2020-01-01 12:05:01' AS TIMESTAMP)",
- "snowflake": "SELECT CAST('2020-01-01 12:05:01' AS TIMESTAMPNTZ)",
+ "CAST(x AS UUID)",
+ write={
+ "bigquery": "CAST(x AS STRING)",
+ "duckdb": "CAST(x AS UUID)",
},
)
self.validate_all(
- "SELECT CAST('2020-01-01' AS DATE) + INTERVAL (day_offset) DAY FROM t",
+ """SELECT CASE WHEN JSON_VALID('{"x: 1}') THEN '{"x: 1}' ELSE NULL END""",
read={
- "duckdb": "SELECT CAST('2020-01-01' AS DATE) + INTERVAL (day_offset) DAY FROM t",
- "mysql": "SELECT DATE '2020-01-01' + INTERVAL day_offset DAY FROM t",
+ "duckdb": """SELECT CASE WHEN JSON_VALID('{"x: 1}') THEN '{"x: 1}' ELSE NULL END""",
+ "snowflake": """SELECT TRY_PARSE_JSON('{"x: 1}')""",
},
)
self.validate_all(
- "SELECT CAST('09:05:03' AS TIME) + INTERVAL 2 HOUR",
- read={
- "bigquery": "SELECT TIME_ADD(CAST('09:05:03' AS TIME), INTERVAL 2 HOUR)",
- "snowflake": "SELECT TIMEADD(HOUR, 2, TO_TIME('09:05:03'))",
- },
+ "SELECT straight_join",
write={
- "duckdb": "SELECT CAST('09:05:03' AS TIME) + INTERVAL '2' HOUR",
- "snowflake": "SELECT CAST('09:05:03' AS TIME) + INTERVAL '2 HOUR'",
+ "duckdb": "SELECT straight_join",
+ "mysql": "SELECT `straight_join`",
},
)
self.validate_all(
@@ -103,7 +102,9 @@ class TestDuckDB(Validator):
self.validate_all(
"CREATE TEMPORARY FUNCTION f1(a, b) AS (a + b)",
- read={"bigquery": "CREATE TEMP FUNCTION f1(a INT64, b INT64) AS (a + b)"},
+ read={
+ "bigquery": "CREATE TEMP FUNCTION f1(a INT64, b INT64) AS (a + b)",
+ },
)
self.validate_identity("SELECT 1 WHERE x > $1")
self.validate_identity("SELECT 1 WHERE x > $name")
@@ -119,13 +120,17 @@ class TestDuckDB(Validator):
)
self.validate_all(
- "{'a': 1, 'b': '2'}", write={"presto": "CAST(ROW(1, '2') AS ROW(a INTEGER, b VARCHAR))"}
+ "{'a': 1, 'b': '2'}",
+ write={
+ "presto": "CAST(ROW(1, '2') AS ROW(a INTEGER, b VARCHAR))",
+ },
)
self.validate_all(
"struct_pack(a := 1, b := 2)",
- write={"presto": "CAST(ROW(1, 2) AS ROW(a INTEGER, b INTEGER))"},
+ write={
+ "presto": "CAST(ROW(1, 2) AS ROW(a INTEGER, b INTEGER))",
+ },
)
-
self.validate_all(
"struct_pack(a := 1, b := x)",
write={
@@ -243,10 +248,6 @@ class TestDuckDB(Validator):
},
)
- self.validate_identity("INSERT INTO x BY NAME SELECT 1 AS y")
- self.validate_identity("SELECT 1 AS x UNION ALL BY NAME SELECT 2 AS x")
- self.validate_identity("SELECT SUM(x) FILTER (x = 1)", "SELECT SUM(x) FILTER(WHERE x = 1)")
-
# https://github.com/duckdb/duckdb/releases/tag/v0.8.0
self.assertEqual(
parse_one("a / b", read="duckdb").assert_is(exp.Div).sql(dialect="duckdb"), "a / b"
@@ -255,6 +256,11 @@ class TestDuckDB(Validator):
parse_one("a // b", read="duckdb").assert_is(exp.IntDiv).sql(dialect="duckdb"), "a // b"
)
+ self.validate_identity("CREATE TABLE tbl1 (u UNION(num INT, str TEXT))")
+ self.validate_identity("INSERT INTO x BY NAME SELECT 1 AS y")
+ self.validate_identity("SELECT 1 AS x UNION ALL BY NAME SELECT 2 AS x")
+ self.validate_identity("SELECT SUM(x) FILTER (x = 1)", "SELECT SUM(x) FILTER(WHERE x = 1)")
+ self.validate_identity("SELECT * FROM GLOB(x)")
self.validate_identity("SELECT MAP(['key1', 'key2', 'key3'], [10, 20, 30])")
self.validate_identity("SELECT MAP {'x': 1}")
self.validate_identity("SELECT (MAP {'x': 1})['x']")
@@ -286,10 +292,39 @@ class TestDuckDB(Validator):
self.validate_identity("x -> '$.family'")
self.validate_identity("CREATE TABLE color (name ENUM('RED', 'GREEN', 'BLUE'))")
self.validate_identity("SELECT * FROM foo WHERE bar > $baz AND bla = $bob")
+ self.validate_identity("SUMMARIZE tbl").assert_is(exp.Summarize)
+ self.validate_identity("SUMMARIZE SELECT * FROM tbl").assert_is(exp.Summarize)
+ self.validate_identity("CREATE TABLE tbl_summary AS SELECT * FROM (SUMMARIZE tbl)")
+ self.validate_identity("UNION_VALUE(k1 := 1)").find(exp.PropertyEQ).this.assert_is(
+ exp.Identifier
+ )
+ self.validate_identity(
+ "SELECT species, island, COUNT(*) FROM t GROUP BY GROUPING SETS (species), GROUPING SETS (island)"
+ )
+ self.validate_identity(
+ "SELECT species, island, COUNT(*) FROM t GROUP BY CUBE (species), CUBE (island)"
+ )
+ self.validate_identity(
+ "SELECT species, island, COUNT(*) FROM t GROUP BY ROLLUP (species), ROLLUP (island)"
+ )
+ self.validate_identity(
+ "SUMMARIZE TABLE 'https://blobs.duckdb.org/data/Star_Trek-Season_1.csv'"
+ ).assert_is(exp.Summarize)
self.validate_identity(
"SELECT * FROM x LEFT JOIN UNNEST(y)", "SELECT * FROM x LEFT JOIN UNNEST(y) ON TRUE"
)
self.validate_identity(
+ """SELECT '{ "family": "anatidae", "species": [ "duck", "goose", "swan", null ] }' ->> ['$.family', '$.species']""",
+ )
+ self.validate_identity(
+ """SELECT JSON_EXTRACT_STRING('{ "family": "anatidae", "species": [ "duck", "goose", "swan", null ] }', ['$.family', '$.species'])""",
+ """SELECT '{ "family": "anatidae", "species": [ "duck", "goose", "swan", null ] }' ->> ['$.family', '$.species']""",
+ )
+ self.validate_identity(
+ "SELECT col FROM t WHERE JSON_EXTRACT_STRING(col, '$.id') NOT IN ('b')",
+ "SELECT col FROM t WHERE NOT (col ->> '$.id') IN ('b')",
+ )
+ self.validate_identity(
"SELECT a, LOGICAL_OR(b) FROM foo GROUP BY a",
"SELECT a, BOOL_OR(b) FROM foo GROUP BY a",
)
@@ -302,6 +337,14 @@ class TestDuckDB(Validator):
"SELECT (c -> '$.k1') = 'v1'",
)
self.validate_identity(
+ "SELECT JSON_EXTRACT(c, '$[*].id')[0:2]",
+ "SELECT (c -> '$[*].id')[0 : 2]",
+ )
+ self.validate_identity(
+ "SELECT JSON_EXTRACT_STRING(c, '$[*].id')[0:2]",
+ "SELECT (c ->> '$[*].id')[0 : 2]",
+ )
+ self.validate_identity(
"""SELECT '{"foo": [1, 2, 3]}' -> 'foo' -> 0""",
"""SELECT '{"foo": [1, 2, 3]}' -> '$.foo' -> '$[0]'""",
)
@@ -354,6 +397,10 @@ class TestDuckDB(Validator):
self.validate_identity(
"SELECT * FROM (PIVOT Cities ON Year USING SUM(Population) GROUP BY Country) AS pivot_alias"
)
+ self.validate_identity(
+ # QUALIFY comes after WINDOW
+ "SELECT schema_name, function_name, ROW_NUMBER() OVER my_window AS function_rank FROM DUCKDB_FUNCTIONS() WINDOW my_window AS (PARTITION BY schema_name ORDER BY function_name) QUALIFY ROW_NUMBER() OVER my_window < 3"
+ )
self.validate_identity("DATE_SUB('YEAR', col, '2020-01-01')").assert_is(exp.Anonymous)
self.validate_identity("DATESUB('YEAR', col, '2020-01-01')").assert_is(exp.Anonymous)
@@ -488,8 +535,8 @@ class TestDuckDB(Validator):
write={
"duckdb": "STR_SPLIT(x, 'a')",
"presto": "SPLIT(x, 'a')",
- "hive": "SPLIT(x, CONCAT('\\\\Q', 'a'))",
- "spark": "SPLIT(x, CONCAT('\\\\Q', 'a'))",
+ "hive": "SPLIT(x, CONCAT('\\\\Q', 'a', '\\\\E'))",
+ "spark": "SPLIT(x, CONCAT('\\\\Q', 'a', '\\\\E'))",
},
)
self.validate_all(
@@ -497,8 +544,8 @@ class TestDuckDB(Validator):
write={
"duckdb": "STR_SPLIT(x, 'a')",
"presto": "SPLIT(x, 'a')",
- "hive": "SPLIT(x, CONCAT('\\\\Q', 'a'))",
- "spark": "SPLIT(x, CONCAT('\\\\Q', 'a'))",
+ "hive": "SPLIT(x, CONCAT('\\\\Q', 'a', '\\\\E'))",
+ "spark": "SPLIT(x, CONCAT('\\\\Q', 'a', '\\\\E'))",
},
)
self.validate_all(
@@ -677,11 +724,11 @@ class TestDuckDB(Validator):
},
)
self.validate_all(
- "SELECT CAST('2020-05-06' AS DATE) - INTERVAL 5 DAY",
+ "SELECT CAST('2020-05-06' AS DATE) - INTERVAL '5' DAY",
read={"bigquery": "SELECT DATE_SUB(CAST('2020-05-06' AS DATE), INTERVAL 5 DAY)"},
)
self.validate_all(
- "SELECT CAST('2020-05-06' AS DATE) + INTERVAL 5 DAY",
+ "SELECT CAST('2020-05-06' AS DATE) + INTERVAL '5' DAY",
read={"bigquery": "SELECT DATE_ADD(CAST('2020-05-06' AS DATE), INTERVAL 5 DAY)"},
)
self.validate_identity(
@@ -786,6 +833,32 @@ class TestDuckDB(Validator):
},
)
+ self.validate_identity("SELECT LENGTH(foo)")
+ self.validate_identity("SELECT ARRAY[1, 2, 3]", "SELECT [1, 2, 3]")
+
+ self.validate_identity("SELECT * FROM (DESCRIBE t)")
+
+ self.validate_identity("SELECT UNNEST([*COLUMNS('alias_.*')]) AS column_name")
+ self.validate_identity(
+ "SELECT COALESCE(*COLUMNS(*)) FROM (SELECT NULL, 2, 3) AS t(a, b, c)"
+ )
+ self.validate_identity(
+ "SELECT id, STRUCT_PACK(*COLUMNS('m\\d')) AS measurements FROM many_measurements",
+ """SELECT id, {'_0': *COLUMNS('m\\d')} AS measurements FROM many_measurements""",
+ )
+ self.validate_identity("SELECT COLUMNS(c -> c LIKE '%num%') FROM numbers")
+ self.validate_identity(
+ "SELECT MIN(COLUMNS(* REPLACE (number + id AS number))), COUNT(COLUMNS(* EXCLUDE (number))) FROM numbers"
+ )
+ self.validate_identity("SELECT COLUMNS(*) + COLUMNS(*) FROM numbers")
+ self.validate_identity("SELECT COLUMNS('(id|numbers?)') FROM numbers")
+ self.validate_identity(
+ "SELECT COALESCE(COLUMNS(['a', 'b', 'c'])) AS result FROM (SELECT NULL AS a, 42 AS b, TRUE AS c)"
+ )
+ self.validate_identity(
+ "SELECT COALESCE(*COLUMNS(['a', 'b', 'c'])) AS result FROM (SELECT NULL AS a, 42 AS b, TRUE AS c)"
+ )
+
def test_array_index(self):
with self.assertLogs(helper_logger) as cm:
self.validate_all(
@@ -818,10 +891,10 @@ class TestDuckDB(Validator):
self.assertEqual(
cm.output,
[
- "WARNING:sqlglot:Applying array index offset (-1)",
- "WARNING:sqlglot:Applying array index offset (1)",
- "WARNING:sqlglot:Applying array index offset (1)",
- "WARNING:sqlglot:Applying array index offset (1)",
+ "INFO:sqlglot:Applying array index offset (-1)",
+ "INFO:sqlglot:Applying array index offset (1)",
+ "INFO:sqlglot:Applying array index offset (1)",
+ "INFO:sqlglot:Applying array index offset (1)",
],
)
@@ -847,7 +920,7 @@ class TestDuckDB(Validator):
read={"bigquery": "SELECT DATE(PARSE_DATE('%m/%d/%Y', '05/06/2020'))"},
)
self.validate_all(
- "SELECT CAST('2020-01-01' AS DATE) + INTERVAL (-1) DAY",
+ "SELECT CAST('2020-01-01' AS DATE) + INTERVAL '-1' DAY",
read={"mysql": "SELECT DATE '2020-01-01' + INTERVAL -1 DAY"},
)
self.validate_all(
@@ -855,7 +928,7 @@ class TestDuckDB(Validator):
write={"duckdb": "SELECT (90 * INTERVAL '1' DAY)"},
)
self.validate_all(
- "SELECT ((DATE_TRUNC('DAY', CAST(CAST(DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS DATE) AS TIMESTAMP) + INTERVAL (0 - ((DAYOFWEEK(CAST(CAST(DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS DATE) AS TIMESTAMP)) % 7) - 1 + 7) % 7) DAY) + (7 * INTERVAL (-5) DAY))) AS t1",
+ "SELECT ((DATE_TRUNC('DAY', CAST(CAST(DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS DATE) AS TIMESTAMP) + INTERVAL (0 - ((ISODOW(CAST(CAST(DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS DATE) AS TIMESTAMP)) % 7) - 1 + 7) % 7) DAY) + (7 * INTERVAL (-5) DAY))) AS t1",
read={
"presto": "SELECT ((DATE_ADD('week', -5, DATE_TRUNC('DAY', DATE_ADD('day', (0 - MOD((DAY_OF_WEEK(CAST(CAST(DATE_TRUNC('DAY', NOW()) AS DATE) AS TIMESTAMP)) % 7) - 1 + 7, 7)), CAST(CAST(DATE_TRUNC('DAY', NOW()) AS DATE) AS TIMESTAMP)))))) AS t1",
},
@@ -876,12 +949,12 @@ class TestDuckDB(Validator):
"EPOCH_MS(x)",
write={
"bigquery": "TIMESTAMP_MILLIS(x)",
+ "clickhouse": "fromUnixTimestamp64Milli(CAST(x AS Nullable(Int64)))",
"duckdb": "EPOCH_MS(x)",
+ "mysql": "FROM_UNIXTIME(x / POWER(10, 3))",
+ "postgres": "TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / 10 ^ 3)",
"presto": "FROM_UNIXTIME(CAST(x AS DOUBLE) / POW(10, 3))",
"spark": "TIMESTAMP_MILLIS(x)",
- "clickhouse": "fromUnixTimestamp64Milli(CAST(x AS Int64))",
- "postgres": "TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / 10 ^ 3)",
- "mysql": "FROM_UNIXTIME(x / POWER(10, 3))",
},
)
self.validate_all(
@@ -925,7 +998,7 @@ class TestDuckDB(Validator):
self.validate_all(
"STRPTIME(x, '%-m/%-d/%y %-I:%M %p')",
write={
- "bigquery": "PARSE_TIMESTAMP('%-m/%-d/%y %-I:%M %p', x)",
+ "bigquery": "PARSE_TIMESTAMP('%-m/%e/%y %-I:%M %p', x)",
"duckdb": "STRPTIME(x, '%-m/%-d/%y %-I:%M %p')",
"presto": "DATE_PARSE(x, '%c/%e/%y %l:%i %p')",
"hive": "CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, 'M/d/yy h:mm a')) AS TIMESTAMP)",
@@ -984,12 +1057,20 @@ class TestDuckDB(Validator):
"duckdb": "SELECT * FROM example TABLESAMPLE RESERVOIR (3 ROWS) REPEATABLE (82)",
},
)
+ self.validate_all(
+ "SELECT * FROM (SELECT * FROM t) AS t1 TABLESAMPLE (1 ROWS), (SELECT * FROM t) AS t2 TABLESAMPLE (2 ROWS)",
+ write={
+ "duckdb": "SELECT * FROM (SELECT * FROM t) AS t1 TABLESAMPLE RESERVOIR (1 ROWS), (SELECT * FROM t) AS t2 TABLESAMPLE RESERVOIR (2 ROWS)",
+ "spark": "SELECT * FROM (SELECT * FROM t) TABLESAMPLE (1 ROWS) AS t1, (SELECT * FROM t) TABLESAMPLE (2 ROWS) AS t2",
+ },
+ )
def test_array(self):
self.validate_identity("ARRAY(SELECT id FROM t)")
self.validate_identity("ARRAY((SELECT id FROM t))")
def test_cast(self):
+ self.validate_identity("x::int[3]", "CAST(x AS INT[3])")
self.validate_identity("CAST(x AS REAL)")
self.validate_identity("CAST(x AS UINTEGER)")
self.validate_identity("CAST(x AS UBIGINT)")
@@ -1029,14 +1110,53 @@ class TestDuckDB(Validator):
)
self.validate_identity(
"CAST([[STRUCT_PACK(a := 1)]] AS STRUCT(a BIGINT)[][])",
- "CAST([[{'a': 1}]] AS STRUCT(a BIGINT)[][])",
+ "CAST([[ROW(1)]] AS STRUCT(a BIGINT)[][])",
)
self.validate_identity(
"CAST([STRUCT_PACK(a := 1)] AS STRUCT(a BIGINT)[])",
- "CAST([{'a': 1}] AS STRUCT(a BIGINT)[])",
+ "CAST([ROW(1)] AS STRUCT(a BIGINT)[])",
+ )
+ self.validate_identity(
+ "STRUCT_PACK(a := 'b')::json",
+ "CAST({'a': 'b'} AS JSON)",
+ )
+ self.validate_identity(
+ "STRUCT_PACK(a := 'b')::STRUCT(a TEXT)",
+ "CAST(ROW('b') AS STRUCT(a TEXT))",
)
self.validate_all(
+ "CAST(x AS TIME)",
+ read={
+ "duckdb": "CAST(x AS TIME)",
+ "presto": "CAST(x AS TIME(6))",
+ },
+ )
+ self.validate_all(
+ "SELECT CAST('2020-01-01 12:05:01' AS TIMESTAMP)",
+ read={
+ "duckdb": "SELECT CAST('2020-01-01 12:05:01' AS TIMESTAMP)",
+ "snowflake": "SELECT CAST('2020-01-01 12:05:01' AS TIMESTAMPNTZ)",
+ },
+ )
+ self.validate_all(
+ "SELECT CAST('2020-01-01' AS DATE) + INTERVAL (day_offset) DAY FROM t",
+ read={
+ "duckdb": "SELECT CAST('2020-01-01' AS DATE) + INTERVAL (day_offset) DAY FROM t",
+ "mysql": "SELECT DATE '2020-01-01' + INTERVAL day_offset DAY FROM t",
+ },
+ )
+ self.validate_all(
+ "SELECT CAST('09:05:03' AS TIME) + INTERVAL 2 HOUR",
+ read={
+ "snowflake": "SELECT TIMEADD(HOUR, 2, TO_TIME('09:05:03'))",
+ },
+ write={
+ "duckdb": "SELECT CAST('09:05:03' AS TIME) + INTERVAL '2' HOUR",
+ "snowflake": "SELECT CAST('09:05:03' AS TIME) + INTERVAL '2 HOUR'",
+ },
+ )
+ self.validate_all(
"CAST(x AS VARCHAR(5))",
write={
"duckdb": "CAST(x AS TEXT)",
@@ -1116,6 +1236,12 @@ class TestDuckDB(Validator):
},
)
+ self.validate_identity("SELECT x::INT[3][3]", "SELECT CAST(x AS INT[3][3])")
+ self.validate_identity(
+ """SELECT ARRAY[[[1]]]::INT[1][1][1]""",
+ """SELECT CAST([[[1]]] AS INT[1][1][1])""",
+ )
+
def test_encode_decode(self):
self.validate_all(
"ENCODE(x)",
@@ -1194,3 +1320,20 @@ class TestDuckDB(Validator):
read={"bigquery": "SELECT @foo"},
write={"bigquery": "SELECT @foo", "duckdb": "SELECT $foo"},
)
+
+ def test_ignore_nulls(self):
+ # Note that DuckDB differentiates window functions (e.g. LEAD, LAG) from aggregate functions (e.g. SUM)
+ from sqlglot.dialects.duckdb import WINDOW_FUNCS_WITH_IGNORE_NULLS
+
+ agg_funcs = (exp.Sum, exp.Max, exp.Min)
+
+ for func_type in WINDOW_FUNCS_WITH_IGNORE_NULLS + agg_funcs:
+ func = func_type(this=exp.to_identifier("col"))
+ ignore_null = exp.IgnoreNulls(this=func)
+ windowed_ignore_null = exp.Window(this=ignore_null)
+
+ if func_type in WINDOW_FUNCS_WITH_IGNORE_NULLS:
+ self.assertIn("IGNORE NULLS", windowed_ignore_null.sql("duckdb"))
+ else:
+ self.assertEqual(ignore_null.sql("duckdb"), func.sql("duckdb"))
+ self.assertNotIn("IGNORE NULLS", windowed_ignore_null.sql("duckdb"))
diff --git a/tests/dialects/test_hive.py b/tests/dialects/test_hive.py
index 0311336..136ea60 100644
--- a/tests/dialects/test_hive.py
+++ b/tests/dialects/test_hive.py
@@ -171,6 +171,16 @@ class TestHive(Validator):
self.validate_identity(
"""CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY<DATE>) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')"""
)
+ self.validate_identity("ALTER VIEW v1 AS SELECT x, UPPER(s) AS s FROM t2")
+ self.validate_identity("ALTER VIEW v1 (c1, c2) AS SELECT x, UPPER(s) AS s FROM t2")
+ self.validate_identity(
+ "ALTER VIEW v7 (c1 COMMENT 'Comment for c1', c2) AS SELECT t1.c1, t1.c2 FROM t1"
+ )
+ self.validate_identity("ALTER VIEW db1.v1 RENAME TO db2.v2")
+ self.validate_identity("ALTER VIEW v1 SET TBLPROPERTIES ('tblp1'='1', 'tblp2'='2')")
+ self.validate_identity(
+ "ALTER VIEW v1 UNSET TBLPROPERTIES ('tblp1', 'tblp2')", check_command_warning=True
+ )
def test_lateral_view(self):
self.validate_all(
@@ -372,7 +382,7 @@ class TestHive(Validator):
"UNIX_TIMESTAMP(x)",
write={
"duckdb": "EPOCH(STRPTIME(x, '%Y-%m-%d %H:%M:%S'))",
- "presto": "TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST(x AS VARCHAR), '%Y-%m-%d %T')), PARSE_DATETIME(CAST(x AS VARCHAR), 'yyyy-MM-dd HH:mm:ss')))",
+ "presto": "TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST(x AS VARCHAR), '%Y-%m-%d %T')), PARSE_DATETIME(DATE_FORMAT(x, '%Y-%m-%d %T'), 'yyyy-MM-dd HH:mm:ss')))",
"hive": "UNIX_TIMESTAMP(x)",
"spark": "UNIX_TIMESTAMP(x)",
"": "STR_TO_UNIX(x, '%Y-%m-%d %H:%M:%S')",
@@ -402,6 +412,7 @@ class TestHive(Validator):
)
def test_hive(self):
+ self.validate_identity("SELECT * FROM t WHERE col IN ('stream')")
self.validate_identity("SET hiveconf:some_var = 5", check_command_warning=True)
self.validate_identity("(VALUES (1 AS a, 2 AS b, 3))")
self.validate_identity("SELECT * FROM my_table TIMESTAMP AS OF DATE_ADD(CURRENT_DATE, -1)")
@@ -705,8 +716,8 @@ class TestHive(Validator):
"presto": "ARRAY_AGG(x)",
},
write={
- "duckdb": "ARRAY_AGG(x)",
- "presto": "ARRAY_AGG(x)",
+ "duckdb": "ARRAY_AGG(x) FILTER(WHERE x IS NOT NULL)",
+ "presto": "ARRAY_AGG(x) FILTER(WHERE x IS NOT NULL)",
"hive": "COLLECT_LIST(x)",
"spark": "COLLECT_LIST(x)",
},
@@ -754,6 +765,24 @@ class TestHive(Validator):
"presto": "SELECT DATE_TRUNC('MONTH', TRY_CAST(ds AS TIMESTAMP)) AS mm FROM tbl WHERE ds BETWEEN '2023-10-01' AND '2024-02-29'",
},
)
+ self.validate_all(
+ "REGEXP_EXTRACT('abc', '(a)(b)(c)')",
+ read={
+ "hive": "REGEXP_EXTRACT('abc', '(a)(b)(c)')",
+ "spark2": "REGEXP_EXTRACT('abc', '(a)(b)(c)')",
+ "spark": "REGEXP_EXTRACT('abc', '(a)(b)(c)')",
+ "databricks": "REGEXP_EXTRACT('abc', '(a)(b)(c)')",
+ },
+ write={
+ "hive": "REGEXP_EXTRACT('abc', '(a)(b)(c)')",
+ "spark2": "REGEXP_EXTRACT('abc', '(a)(b)(c)')",
+ "spark": "REGEXP_EXTRACT('abc', '(a)(b)(c)')",
+ "databricks": "REGEXP_EXTRACT('abc', '(a)(b)(c)')",
+ "presto": "REGEXP_EXTRACT('abc', '(a)(b)(c)', 1)",
+ "trino": "REGEXP_EXTRACT('abc', '(a)(b)(c)', 1)",
+ "duckdb": "REGEXP_EXTRACT('abc', '(a)(b)(c)', 1)",
+ },
+ )
def test_escapes(self) -> None:
self.validate_identity("'\n'", "'\\n'")
diff --git a/tests/dialects/test_mysql.py b/tests/dialects/test_mysql.py
index 280ebbf..835ee7c 100644
--- a/tests/dialects/test_mysql.py
+++ b/tests/dialects/test_mysql.py
@@ -1,3 +1,6 @@
+import unittest
+import sys
+
from sqlglot import expressions as exp
from sqlglot.dialects.mysql import MySQL
from tests.dialects.test_dialect import Validator
@@ -24,6 +27,21 @@ class TestMySQL(Validator):
self.validate_identity("ALTER TABLE t ADD INDEX `i` (`c`)")
self.validate_identity("ALTER TABLE t ADD UNIQUE `i` (`c`)")
self.validate_identity("ALTER TABLE test_table MODIFY COLUMN test_column LONGTEXT")
+ self.validate_identity("ALTER VIEW v AS SELECT a, b, c, d FROM foo")
+ self.validate_identity("ALTER VIEW v AS SELECT * FROM foo WHERE c > 100")
+ self.validate_identity(
+ "ALTER ALGORITHM = MERGE VIEW v AS SELECT * FROM foo", check_command_warning=True
+ )
+ self.validate_identity(
+ "ALTER DEFINER = 'admin'@'localhost' VIEW v AS SELECT * FROM foo",
+ check_command_warning=True,
+ )
+ self.validate_identity(
+ "ALTER SQL SECURITY = DEFINER VIEW v AS SELECT * FROM foo", check_command_warning=True
+ )
+ self.validate_identity(
+ "INSERT INTO things (a, b) VALUES (1, 2) AS new_data ON DUPLICATE KEY UPDATE id = LAST_INSERT_ID(id), a = new_data.a, b = new_data.b"
+ )
self.validate_identity(
"CREATE TABLE `oauth_consumer` (`key` VARCHAR(32) NOT NULL, UNIQUE `OAUTH_CONSUMER_KEY` (`key`))"
)
@@ -64,6 +82,10 @@ class TestMySQL(Validator):
"CREATE OR REPLACE VIEW my_view AS SELECT column1 AS `boo`, column2 AS `foo` FROM my_table WHERE column3 = 'some_value' UNION SELECT q.* FROM fruits_table, JSON_TABLE(Fruits, '$[*]' COLUMNS(id VARCHAR(255) PATH '$.$id', value VARCHAR(255) PATH '$.value')) AS q",
)
self.validate_identity(
+ "CREATE TABLE t (name VARCHAR)",
+ "CREATE TABLE t (name TEXT)",
+ )
+ self.validate_identity(
"ALTER TABLE t ADD KEY `i` (`c`)",
"ALTER TABLE t ADD INDEX `i` (`c`)",
)
@@ -117,6 +139,7 @@ class TestMySQL(Validator):
)
def test_identity(self):
+ self.validate_identity("SELECT CAST(COALESCE(`id`, 'NULL') AS CHAR CHARACTER SET binary)")
self.validate_identity("SELECT e.* FROM e STRAIGHT_JOIN p ON e.x = p.y")
self.validate_identity("ALTER TABLE test_table ALTER COLUMN test_column SET DEFAULT 1")
self.validate_identity("SELECT DATE_FORMAT(NOW(), '%Y-%m-%d %H:%i:00.0000')")
@@ -157,6 +180,10 @@ class TestMySQL(Validator):
"REPLACE INTO table SELECT id FROM table2 WHERE cnt > 100", check_command_warning=True
)
self.validate_identity(
+ "CAST(x AS VARCHAR)",
+ "CAST(x AS CHAR)",
+ )
+ self.validate_identity(
"""SELECT * FROM foo WHERE 3 MEMBER OF(info->'$.value')""",
"""SELECT * FROM foo WHERE 3 MEMBER OF(JSON_EXTRACT(info, '$.value'))""",
)
@@ -222,7 +249,7 @@ class TestMySQL(Validator):
"SET @@GLOBAL.sort_buffer_size = 1000000, @@LOCAL.sort_buffer_size = 1000000"
)
self.validate_identity("INTERVAL '1' YEAR")
- self.validate_identity("DATE_ADD(x, INTERVAL 1 YEAR)")
+ self.validate_identity("DATE_ADD(x, INTERVAL '1' YEAR)")
self.validate_identity("CHAR(0)")
self.validate_identity("CHAR(77, 121, 83, 81, '76')")
self.validate_identity("CHAR(77, 77.3, '77.3' USING utf8mb4)")
@@ -520,9 +547,16 @@ class TestMySQL(Validator):
},
)
self.validate_all(
- "SELECT DATE_FORMAT('2009-10-04 22:23:00', '%W %M %Y')",
+ "SELECT DATE_FORMAT('2024-08-22 14:53:12', '%a')",
+ write={
+ "mysql": "SELECT DATE_FORMAT('2024-08-22 14:53:12', '%a')",
+ "snowflake": "SELECT TO_CHAR(CAST('2024-08-22 14:53:12' AS TIMESTAMP), 'DY')",
+ },
+ )
+ self.validate_all(
+ "SELECT DATE_FORMAT('2009-10-04 22:23:00', '%a %M %Y')",
write={
- "mysql": "SELECT DATE_FORMAT('2009-10-04 22:23:00', '%W %M %Y')",
+ "mysql": "SELECT DATE_FORMAT('2009-10-04 22:23:00', '%a %M %Y')",
"snowflake": "SELECT TO_CHAR(CAST('2009-10-04 22:23:00' AS TIMESTAMP), 'DY mmmm yyyy')",
},
)
@@ -536,7 +570,7 @@ class TestMySQL(Validator):
self.validate_all(
"SELECT DATE_FORMAT('1900-10-04 22:23:00', '%d %y %a %d %m %b')",
write={
- "mysql": "SELECT DATE_FORMAT('1900-10-04 22:23:00', '%d %y %W %d %m %b')",
+ "mysql": "SELECT DATE_FORMAT('1900-10-04 22:23:00', '%d %y %a %d %m %b')",
"snowflake": "SELECT TO_CHAR(CAST('1900-10-04 22:23:00' AS TIMESTAMP), 'DD yy DY DD mm mon')",
},
)
@@ -621,6 +655,53 @@ class TestMySQL(Validator):
},
)
+ # No timezone, make sure DATETIME captures the correct precision
+ self.validate_identity(
+ "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15.123456+00:00')",
+ write_sql="SELECT CAST('2023-01-01 13:14:15.123456+00:00' AS DATETIME(6))",
+ )
+ self.validate_identity(
+ "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15.123+00:00')",
+ write_sql="SELECT CAST('2023-01-01 13:14:15.123+00:00' AS DATETIME(3))",
+ )
+ self.validate_identity(
+ "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15+00:00')",
+ write_sql="SELECT CAST('2023-01-01 13:14:15+00:00' AS DATETIME)",
+ )
+
+ # With timezone, make sure the TIMESTAMP constructor is used
+ # also TIMESTAMP doesnt have the subsecond precision truncation issue that DATETIME does so we dont need to TIMESTAMP(6)
+ self.validate_identity(
+ "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15-08:00', 'America/Los_Angeles')",
+ write_sql="SELECT TIMESTAMP('2023-01-01 13:14:15-08:00')",
+ )
+ self.validate_identity(
+ "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15-08:00', 'America/Los_Angeles')",
+ write_sql="SELECT TIMESTAMP('2023-01-01 13:14:15-08:00')",
+ )
+
+ @unittest.skipUnless(
+ sys.version_info >= (3, 11),
+ "Python 3.11 relaxed datetime.fromisoformat() parsing with regards to microseconds",
+ )
+ def test_mysql_time_python311(self):
+ self.validate_identity(
+ "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15.12345+00:00')",
+ write_sql="SELECT CAST('2023-01-01 13:14:15.12345+00:00' AS DATETIME(6))",
+ )
+ self.validate_identity(
+ "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15.1234+00:00')",
+ write_sql="SELECT CAST('2023-01-01 13:14:15.1234+00:00' AS DATETIME(6))",
+ )
+ self.validate_identity(
+ "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15.12+00:00')",
+ write_sql="SELECT CAST('2023-01-01 13:14:15.12+00:00' AS DATETIME(3))",
+ )
+ self.validate_identity(
+ "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15.1+00:00')",
+ write_sql="SELECT CAST('2023-01-01 13:14:15.1+00:00' AS DATETIME(3))",
+ )
+
def test_mysql(self):
self.validate_all(
"SELECT CONCAT('11', '22')",
@@ -1176,3 +1257,39 @@ COMMENT='客户账户表'"""
"mysql": f"DATE_ADD('0000-01-01 00:00:00', INTERVAL (TIMESTAMPDIFF({unit}, '0000-01-01 00:00:00', CAST('2001-02-16 20:38:40' AS DATETIME))) {unit})",
},
)
+
+ def test_at_time_zone(self):
+ with self.assertLogs() as cm:
+ # Check AT TIME ZONE doesnt discard the column name and also raises a warning
+ self.validate_identity(
+ "SELECT foo AT TIME ZONE 'UTC'",
+ write_sql="SELECT foo",
+ )
+ assert "AT TIME ZONE is not supported" in cm.output[0]
+
+ def test_json_value(self):
+ json_doc = """'{"item": "shoes", "price": "49.95"}'"""
+ self.validate_identity(f"""SELECT JSON_VALUE({json_doc}, '$.price')""")
+ self.validate_identity(
+ f"""SELECT JSON_VALUE({json_doc}, '$.price' RETURNING DECIMAL(4, 2))"""
+ )
+
+ for on_option in ("NULL", "ERROR", "DEFAULT 1"):
+ self.validate_identity(
+ f"""SELECT JSON_VALUE({json_doc}, '$.price' RETURNING DECIMAL(4, 2) {on_option} ON EMPTY {on_option} ON ERROR) AS price"""
+ )
+
+ def test_grant(self):
+ grant_cmds = [
+ "GRANT 'role1', 'role2' TO 'user1'@'localhost', 'user2'@'localhost'",
+ "GRANT SELECT ON world.* TO 'role3'",
+ "GRANT SELECT ON db2.invoice TO 'jeffrey'@'localhost'",
+ "GRANT INSERT ON `d%`.* TO u",
+ "GRANT ALL ON test.* TO ''@'localhost'",
+ "GRANT SELECT (col1), INSERT (col1, col2) ON mydb.mytbl TO 'someuser'@'somehost'",
+ "GRANT SELECT, INSERT, UPDATE ON *.* TO u2",
+ ]
+
+ for sql in grant_cmds:
+ with self.subTest(f"Testing MySQL's GRANT command statement: {sql}"):
+ self.validate_identity(sql, check_command_warning=True)
diff --git a/tests/dialects/test_oracle.py b/tests/dialects/test_oracle.py
index 7cc4d72..8675086 100644
--- a/tests/dialects/test_oracle.py
+++ b/tests/dialects/test_oracle.py
@@ -1,5 +1,4 @@
from sqlglot import exp, UnsupportedError
-from sqlglot.dialects.oracle import eliminate_join_marks
from tests.dialects.test_dialect import Validator
@@ -7,15 +6,17 @@ class TestOracle(Validator):
dialect = "oracle"
def test_oracle(self):
+ self.validate_identity("1 /* /* */")
self.validate_all(
"SELECT CONNECT_BY_ROOT x y",
write={
- "": "SELECT CONNECT_BY_ROOT(x) AS y",
+ "": "SELECT CONNECT_BY_ROOT x AS y",
"oracle": "SELECT CONNECT_BY_ROOT x AS y",
},
)
- self.parse_one("ALTER TABLE tbl_name DROP FOREIGN KEY fk_symbol").assert_is(exp.AlterTable)
+ self.parse_one("ALTER TABLE tbl_name DROP FOREIGN KEY fk_symbol").assert_is(exp.Alter)
+ self.validate_identity("SYSDATE")
self.validate_identity("CREATE GLOBAL TEMPORARY TABLE t AS SELECT * FROM orders")
self.validate_identity("CREATE PRIVATE TEMPORARY TABLE t AS SELECT * FROM orders")
self.validate_identity("REGEXP_REPLACE('source', 'search')")
@@ -45,6 +46,9 @@ class TestOracle(Validator):
self.validate_identity("SELECT * FROM V$SESSION")
self.validate_identity("SELECT TO_DATE('January 15, 1989, 11:00 A.M.')")
self.validate_identity(
+ "SELECT * FROM test UNPIVOT INCLUDE NULLS (value FOR Description IN (col AS 'PREFIX ' || CHR(38) || ' SUFFIX'))"
+ )
+ self.validate_identity(
"SELECT last_name, employee_id, manager_id, LEVEL FROM employees START WITH employee_id = 100 CONNECT BY PRIOR employee_id = manager_id ORDER SIBLINGS BY last_name"
)
self.validate_identity(
@@ -66,6 +70,10 @@ class TestOracle(Validator):
"SELECT MIN(column_name) KEEP (DENSE_RANK FIRST ORDER BY column_name DESC) FROM table_name"
)
self.validate_identity(
+ "SELECT TRUNC(SYSDATE)",
+ "SELECT TRUNC(SYSDATE, 'DD')",
+ )
+ self.validate_identity(
"""SELECT JSON_OBJECT(KEY 'key1' IS emp.column1, KEY 'key2' IS emp.column1) "emp_key" FROM emp""",
"""SELECT JSON_OBJECT('key1': emp.column1, 'key2': emp.column1) AS "emp_key" FROM emp""",
)
@@ -74,10 +82,6 @@ class TestOracle(Validator):
"SELECT JSON_OBJECTAGG(department_name: department_id) FROM dep WHERE id <= 30",
)
self.validate_identity(
- "SYSDATE",
- "CURRENT_TIMESTAMP",
- )
- self.validate_identity(
"SELECT last_name, department_id, salary, MIN(salary) KEEP (DENSE_RANK FIRST ORDER BY commission_pct) "
'OVER (PARTITION BY department_id) AS "Worst", MAX(salary) KEEP (DENSE_RANK LAST ORDER BY commission_pct) '
'OVER (PARTITION BY department_id) AS "Best" FROM employees ORDER BY department_id, salary, last_name'
@@ -87,8 +91,7 @@ class TestOracle(Validator):
"SELECT DISTINCT col1, col2 FROM table",
)
self.validate_identity(
- "SELECT * FROM T ORDER BY I OFFSET nvl(:variable1, 10) ROWS FETCH NEXT nvl(:variable2, 10) ROWS ONLY",
- "SELECT * FROM T ORDER BY I OFFSET COALESCE(:variable1, 10) ROWS FETCH NEXT COALESCE(:variable2, 10) ROWS ONLY",
+ "SELECT * FROM T ORDER BY I OFFSET NVL(:variable1, 10) ROWS FETCH NEXT NVL(:variable2, 10) ROWS ONLY",
)
self.validate_identity(
"SELECT * FROM t SAMPLE (.25)",
@@ -100,6 +103,23 @@ class TestOracle(Validator):
)
self.validate_all(
+ "TRUNC(SYSDATE, 'YEAR')",
+ write={
+ "clickhouse": "DATE_TRUNC('YEAR', CURRENT_TIMESTAMP())",
+ "oracle": "TRUNC(SYSDATE, 'YEAR')",
+ },
+ )
+ self.validate_all(
+ "SELECT * FROM test WHERE MOD(col1, 4) = 3",
+ read={
+ "duckdb": "SELECT * FROM test WHERE col1 % 4 = 3",
+ },
+ write={
+ "duckdb": "SELECT * FROM test WHERE col1 % 4 = 3",
+ "oracle": "SELECT * FROM test WHERE MOD(col1, 4) = 3",
+ },
+ )
+ self.validate_all(
"CURRENT_TIMESTAMP BETWEEN TO_DATE(f.C_SDATE, 'yyyy/mm/dd') AND TO_DATE(f.C_EDATE, 'yyyy/mm/dd')",
read={
"postgres": "CURRENT_TIMESTAMP BETWEEN TO_DATE(f.C_SDATE, 'yyyy/mm/dd') AND TO_DATE(f.C_EDATE, 'yyyy/mm/dd')",
@@ -191,13 +211,6 @@ class TestOracle(Validator):
},
)
self.validate_all(
- "NVL(NULL, 1)",
- write={
- "": "COALESCE(NULL, 1)",
- "oracle": "COALESCE(NULL, 1)",
- },
- )
- self.validate_all(
"DATE '2022-01-01'",
write={
"": "DATE_STR_TO_DATE('2022-01-01')",
@@ -245,6 +258,25 @@ class TestOracle(Validator):
"duckdb": "SELECT CAST(STRPTIME('2024-12-12', '%Y-%m-%d') AS DATE)",
},
)
+ self.validate_identity(
+ """SELECT * FROM t ORDER BY a ASC NULLS LAST, b ASC NULLS FIRST, c DESC NULLS LAST, d DESC NULLS FIRST""",
+ """SELECT * FROM t ORDER BY a ASC, b ASC NULLS FIRST, c DESC NULLS LAST, d DESC""",
+ )
+ self.validate_all(
+ "NVL(NULL, 1)",
+ write={
+ "oracle": "NVL(NULL, 1)",
+ "": "COALESCE(NULL, 1)",
+ "clickhouse": "COALESCE(NULL, 1)",
+ },
+ )
+ self.validate_all(
+ "TRIM(BOTH 'h' FROM 'Hello World')",
+ write={
+ "oracle": "TRIM(BOTH 'h' FROM 'Hello World')",
+ "clickhouse": "TRIM(BOTH 'h' FROM 'Hello World')",
+ },
+ )
def test_join_marker(self):
self.validate_identity("SELECT e1.x, e2.x FROM e e1, e e2 WHERE e1.y (+) = e2.y")
@@ -332,7 +364,7 @@ FROM warehouses, XMLTABLE(
FROM XMLTABLE(
'ROWSET/ROW'
PASSING
- dbms_xmlgen.GETXMLTYPE('SELECT table_name, column_name, data_default FROM user_tab_columns')
+ dbms_xmlgen.getxmltype('SELECT table_name, column_name, data_default FROM user_tab_columns')
COLUMNS
table_name VARCHAR2(128) PATH '*[1]',
column_name VARCHAR2(128) PATH '*[2]',
@@ -416,59 +448,6 @@ WHERE
for query in (f"{body}{start}{connect}", f"{body}{connect}{start}"):
self.validate_identity(query, pretty, pretty=True)
- def test_eliminate_join_marks(self):
- test_sql = [
- (
- "SELECT T1.d, T2.c FROM T1, T2 WHERE T1.x = T2.x (+) and T2.y (+) > 5",
- "SELECT T1.d, T2.c FROM T1 LEFT JOIN T2 ON T1.x = T2.x AND T2.y > 5",
- ),
- (
- "SELECT T1.d, T2.c FROM T1, T2 WHERE T1.x = T2.x (+) and T2.y (+) IS NULL",
- "SELECT T1.d, T2.c FROM T1 LEFT JOIN T2 ON T1.x = T2.x AND T2.y IS NULL",
- ),
- (
- "SELECT T1.d, T2.c FROM T1, T2 WHERE T1.x = T2.x (+) and T2.y IS NULL",
- "SELECT T1.d, T2.c FROM T1 LEFT JOIN T2 ON T1.x = T2.x WHERE T2.y IS NULL",
- ),
- (
- "SELECT T1.d, T2.c FROM T1, T2 WHERE T1.x = T2.x (+) and T1.Z > 4",
- "SELECT T1.d, T2.c FROM T1 LEFT JOIN T2 ON T1.x = T2.x WHERE T1.Z > 4",
- ),
- (
- "SELECT * FROM table1, table2 WHERE table1.column = table2.column(+)",
- "SELECT * FROM table1 LEFT JOIN table2 ON table1.column = table2.column",
- ),
- (
- "SELECT * FROM table1, table2, table3, table4 WHERE table1.column = table2.column(+) and table2.column >= table3.column(+) and table1.column = table4.column(+)",
- "SELECT * FROM table1 LEFT JOIN table2 ON table1.column = table2.column LEFT JOIN table3 ON table2.column >= table3.column LEFT JOIN table4 ON table1.column = table4.column",
- ),
- (
- "SELECT * FROM table1, table2, table3 WHERE table1.column = table2.column(+) and table2.column >= table3.column(+)",
- "SELECT * FROM table1 LEFT JOIN table2 ON table1.column = table2.column LEFT JOIN table3 ON table2.column >= table3.column",
- ),
- (
- "SELECT table1.id, table2.cloumn1, table3.id FROM table1, table2, (SELECT tableInner1.id FROM tableInner1, tableInner2 WHERE tableInner1.id = tableInner2.id(+)) AS table3 WHERE table1.id = table2.id(+) and table1.id = table3.id(+)",
- "SELECT table1.id, table2.cloumn1, table3.id FROM table1 LEFT JOIN table2 ON table1.id = table2.id LEFT JOIN (SELECT tableInner1.id FROM tableInner1 LEFT JOIN tableInner2 ON tableInner1.id = tableInner2.id) table3 ON table1.id = table3.id",
- ),
- # 2 join marks on one side of predicate
- (
- "SELECT * FROM table1, table2 WHERE table1.column = table2.column1(+) + table2.column2(+)",
- "SELECT * FROM table1 LEFT JOIN table2 ON table1.column = table2.column1 + table2.column2",
- ),
- # join mark and expression
- (
- "SELECT * FROM table1, table2 WHERE table1.column = table2.column1(+) + 25",
- "SELECT * FROM table1 LEFT JOIN table2 ON table1.column = table2.column1 + 25",
- ),
- ]
-
- for original, expected in test_sql:
- with self.subTest(original):
- self.assertEqual(
- eliminate_join_marks(self.parse_one(original)).sql(dialect=self.dialect),
- expected,
- )
-
def test_query_restrictions(self):
for restriction in ("READ ONLY", "CHECK OPTION"):
for constraint_name in (" CONSTRAINT name", ""):
@@ -477,3 +456,111 @@ WHERE
self.validate_identity(
f"CREATE VIEW view AS SELECT * FROM tbl WITH {restriction}{constraint_name}"
)
+
+ def test_multitable_inserts(self):
+ self.maxDiff = None
+ self.validate_identity(
+ "INSERT ALL "
+ "INTO dest_tab1 (id, description) VALUES (id, description) "
+ "INTO dest_tab2 (id, description) VALUES (id, description) "
+ "INTO dest_tab3 (id, description) VALUES (id, description) "
+ "SELECT id, description FROM source_tab"
+ )
+
+ self.validate_identity(
+ "INSERT ALL "
+ "INTO pivot_dest (id, day, val) VALUES (id, 'mon', mon_val) "
+ "INTO pivot_dest (id, day, val) VALUES (id, 'tue', tue_val) "
+ "INTO pivot_dest (id, day, val) VALUES (id, 'wed', wed_val) "
+ "INTO pivot_dest (id, day, val) VALUES (id, 'thu', thu_val) "
+ "INTO pivot_dest (id, day, val) VALUES (id, 'fri', fri_val) "
+ "SELECT * "
+ "FROM pivot_source"
+ )
+
+ self.validate_identity(
+ "INSERT ALL "
+ "WHEN id <= 3 THEN "
+ "INTO dest_tab1 (id, description) VALUES (id, description) "
+ "WHEN id BETWEEN 4 AND 7 THEN "
+ "INTO dest_tab2 (id, description) VALUES (id, description) "
+ "WHEN id >= 8 THEN "
+ "INTO dest_tab3 (id, description) VALUES (id, description) "
+ "SELECT id, description "
+ "FROM source_tab"
+ )
+
+ self.validate_identity(
+ "INSERT ALL "
+ "WHEN id <= 3 THEN "
+ "INTO dest_tab1 (id, description) VALUES (id, description) "
+ "WHEN id BETWEEN 4 AND 7 THEN "
+ "INTO dest_tab2 (id, description) VALUES (id, description) "
+ "WHEN 1 = 1 THEN "
+ "INTO dest_tab3 (id, description) VALUES (id, description) "
+ "SELECT id, description "
+ "FROM source_tab"
+ )
+
+ self.validate_identity(
+ "INSERT FIRST "
+ "WHEN id <= 3 THEN "
+ "INTO dest_tab1 (id, description) VALUES (id, description) "
+ "WHEN id <= 5 THEN "
+ "INTO dest_tab2 (id, description) VALUES (id, description) "
+ "ELSE "
+ "INTO dest_tab3 (id, description) VALUES (id, description) "
+ "SELECT id, description "
+ "FROM source_tab"
+ )
+
+ self.validate_identity(
+ "INSERT FIRST "
+ "WHEN id <= 3 THEN "
+ "INTO dest_tab1 (id, description) VALUES (id, description) "
+ "ELSE "
+ "INTO dest_tab2 (id, description) VALUES (id, description) "
+ "INTO dest_tab3 (id, description) VALUES (id, description) "
+ "SELECT id, description "
+ "FROM source_tab"
+ )
+
+ self.validate_identity(
+ "/* COMMENT */ INSERT FIRST "
+ "WHEN salary > 4000 THEN INTO emp2 "
+ "WHEN salary > 5000 THEN INTO emp3 "
+ "WHEN salary > 6000 THEN INTO emp4 "
+ "SELECT salary FROM employees"
+ )
+
+ def test_json_functions(self):
+ for format_json in ("", " FORMAT JSON"):
+ for on_cond in (
+ "",
+ " TRUE ON ERROR",
+ " NULL ON EMPTY",
+ " DEFAULT 1 ON ERROR TRUE ON EMPTY",
+ ):
+ for passing in ("", " PASSING 'name1' AS \"var1\", 'name2' AS \"var2\""):
+ with self.subTest("Testing JSON_EXISTS()"):
+ self.validate_identity(
+ f"SELECT * FROM t WHERE JSON_EXISTS(name{format_json}, '$[1].middle'{passing}{on_cond})"
+ )
+
+ def test_grant(self):
+ grant_cmds = [
+ "GRANT purchases_reader_role TO george, maria",
+ "GRANT USAGE ON TYPE price TO finance_role",
+ "GRANT USAGE ON DERBY AGGREGATE types.maxPrice TO sales_role",
+ ]
+
+ for sql in grant_cmds:
+ with self.subTest(f"Testing Oracles's GRANT command statement: {sql}"):
+ self.validate_identity(sql, check_command_warning=True)
+
+ self.validate_identity("GRANT SELECT ON TABLE t TO maria, harry")
+ self.validate_identity("GRANT SELECT ON TABLE s.v TO PUBLIC")
+ self.validate_identity("GRANT SELECT ON TABLE t TO purchases_reader_role")
+ self.validate_identity("GRANT UPDATE, TRIGGER ON TABLE t TO anita, zhi")
+ self.validate_identity("GRANT EXECUTE ON PROCEDURE p TO george")
+ self.validate_identity("GRANT USAGE ON SEQUENCE order_id TO sales_role")
diff --git a/tests/dialects/test_postgres.py b/tests/dialects/test_postgres.py
index 071677d..63266a5 100644
--- a/tests/dialects/test_postgres.py
+++ b/tests/dialects/test_postgres.py
@@ -8,10 +8,15 @@ class TestPostgres(Validator):
dialect = "postgres"
def test_postgres(self):
- self.validate_identity("SHA384(x)")
- self.validate_identity(
- 'CREATE TABLE x (a TEXT COLLATE "de_DE")', "CREATE TABLE x (a TEXT COLLATE de_DE)"
+ self.validate_all(
+ "x ? y",
+ write={
+ "": "JSONB_CONTAINS(x, y)",
+ "postgres": "x ? y",
+ },
)
+
+ self.validate_identity("SHA384(x)")
self.validate_identity("1.x", "1. AS x")
self.validate_identity("|/ x", "SQRT(x)")
self.validate_identity("||/ x", "CBRT(x)")
@@ -23,7 +28,7 @@ class TestPostgres(Validator):
alter_table_only = """ALTER TABLE ONLY "Album" ADD CONSTRAINT "FK_AlbumArtistId" FOREIGN KEY ("ArtistId") REFERENCES "Artist" ("ArtistId") ON DELETE NO ACTION ON UPDATE NO ACTION"""
expr = self.parse_one(alter_table_only)
- self.assertIsInstance(expr, exp.AlterTable)
+ self.assertIsInstance(expr, exp.Alter)
self.assertEqual(expr.sql(dialect="postgres"), alter_table_only)
self.validate_identity("STRING_TO_ARRAY('xx~^~yy~^~zz', '~^~', 'yy')")
@@ -68,10 +73,6 @@ class TestPostgres(Validator):
self.validate_identity("SELECT CURRENT_USER")
self.validate_identity("SELECT * FROM ONLY t1")
self.validate_identity(
- "SELECT ARRAY[1, 2, 3] <@ ARRAY[1, 2]",
- "SELECT ARRAY[1, 2] @> ARRAY[1, 2, 3]",
- )
- self.validate_identity(
"""UPDATE "x" SET "y" = CAST('0 days 60.000000 seconds' AS INTERVAL) WHERE "x"."id" IN (2, 3)"""
)
self.validate_identity(
@@ -128,6 +129,14 @@ class TestPostgres(Validator):
"ORDER BY 2, 3"
)
self.validate_identity(
+ "/*+ some comment*/ SELECT b.foo, b.bar FROM baz AS b",
+ "/* + some comment */ SELECT b.foo, b.bar FROM baz AS b",
+ )
+ self.validate_identity(
+ "SELECT ARRAY[1, 2, 3] <@ ARRAY[1, 2]",
+ "SELECT ARRAY[1, 2] @> ARRAY[1, 2, 3]",
+ )
+ self.validate_identity(
"SELECT ARRAY[]::INT[] AS foo",
"SELECT CAST(ARRAY[] AS INT[]) AS foo",
)
@@ -540,37 +549,53 @@ class TestPostgres(Validator):
},
)
self.validate_all(
- "GENERATE_SERIES(a, b, ' 2 days ')",
+ "SELECT GENERATE_SERIES(1, 5)",
write={
- "postgres": "GENERATE_SERIES(a, b, INTERVAL '2 DAYS')",
- "presto": "SEQUENCE(a, b, INTERVAL '2' DAY)",
- "trino": "SEQUENCE(a, b, INTERVAL '2' DAY)",
+ "bigquery": UnsupportedError,
+ "postgres": "SELECT GENERATE_SERIES(1, 5)",
},
)
self.validate_all(
- "GENERATE_SERIES('2019-01-01'::TIMESTAMP, NOW(), '1day')",
+ "WITH dates AS (SELECT GENERATE_SERIES('2020-01-01'::DATE, '2024-01-01'::DATE, '1 day'::INTERVAL) AS date), date_table AS (SELECT DISTINCT DATE_TRUNC('MONTH', date) AS date FROM dates) SELECT * FROM date_table",
write={
- "postgres": "GENERATE_SERIES(CAST('2019-01-01' AS TIMESTAMP), CURRENT_TIMESTAMP, INTERVAL '1 DAY')",
- "presto": "SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' DAY)",
- "trino": "SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' DAY)",
+ "duckdb": "WITH dates AS (SELECT UNNEST(GENERATE_SERIES(CAST('2020-01-01' AS DATE), CAST('2024-01-01' AS DATE), CAST('1 day' AS INTERVAL))) AS date), date_table AS (SELECT DISTINCT DATE_TRUNC('MONTH', date) AS date FROM dates) SELECT * FROM date_table",
+ "postgres": "WITH dates AS (SELECT GENERATE_SERIES(CAST('2020-01-01' AS DATE), CAST('2024-01-01' AS DATE), CAST('1 day' AS INTERVAL)) AS date), date_table AS (SELECT DISTINCT DATE_TRUNC('MONTH', date) AS date FROM dates) SELECT * FROM date_table",
+ },
+ )
+ self.validate_all(
+ "GENERATE_SERIES(a, b, ' 2 days ')",
+ write={
+ "postgres": "GENERATE_SERIES(a, b, INTERVAL '2 DAYS')",
+ "presto": "UNNEST(SEQUENCE(a, b, INTERVAL '2' DAY))",
+ "trino": "UNNEST(SEQUENCE(a, b, INTERVAL '2' DAY))",
},
)
self.validate_all(
- "GENERATE_SERIES(a, b)",
+ "GENERATE_SERIES('2019-01-01'::TIMESTAMP, NOW(), '1day')",
write={
- "postgres": "GENERATE_SERIES(a, b)",
- "presto": "SEQUENCE(a, b)",
- "trino": "SEQUENCE(a, b)",
- "tsql": "GENERATE_SERIES(a, b)",
+ "databricks": "EXPLODE(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY))",
+ "hive": "EXPLODE(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY))",
+ "postgres": "GENERATE_SERIES(CAST('2019-01-01' AS TIMESTAMP), CURRENT_TIMESTAMP, INTERVAL '1 DAY')",
+ "presto": "UNNEST(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' DAY))",
+ "spark": "EXPLODE(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY))",
+ "spark2": "EXPLODE(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY))",
+ "trino": "UNNEST(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' DAY))",
},
)
self.validate_all(
- "GENERATE_SERIES(a, b)",
+ "SELECT * FROM GENERATE_SERIES(a, b)",
read={
- "postgres": "GENERATE_SERIES(a, b)",
- "presto": "SEQUENCE(a, b)",
- "trino": "SEQUENCE(a, b)",
- "tsql": "GENERATE_SERIES(a, b)",
+ "tsql": "SELECT * FROM GENERATE_SERIES(a, b)",
+ },
+ write={
+ "databricks": "SELECT * FROM EXPLODE(SEQUENCE(a, b))",
+ "hive": "SELECT * FROM EXPLODE(SEQUENCE(a, b))",
+ "postgres": "SELECT * FROM GENERATE_SERIES(a, b)",
+ "presto": "SELECT * FROM UNNEST(SEQUENCE(a, b))",
+ "spark": "SELECT * FROM EXPLODE(SEQUENCE(a, b))",
+ "spark2": "SELECT * FROM EXPLODE(SEQUENCE(a, b))",
+ "trino": "SELECT * FROM UNNEST(SEQUENCE(a, b))",
+ "tsql": "SELECT * FROM GENERATE_SERIES(a, b)",
},
)
self.validate_all(
@@ -651,17 +676,25 @@ class TestPostgres(Validator):
},
)
self.validate_all(
- """'{"a":1,"b":2}'::json->'b'""",
+ "TRIM(BOTH 'as' FROM 'as string as')",
write={
- "postgres": """CAST('{"a":1,"b":2}' AS JSON) -> 'b'""",
- "redshift": """JSON_EXTRACT_PATH_TEXT('{"a":1,"b":2}', 'b')""",
+ "postgres": "TRIM(BOTH 'as' FROM 'as string as')",
+ "spark": "TRIM(BOTH 'as' FROM 'as string as')",
},
)
+ self.validate_identity(
+ """SELECT TRIM(LEADING ' XXX ' COLLATE "de_DE")""",
+ """SELECT LTRIM(' XXX ' COLLATE "de_DE")""",
+ )
+ self.validate_identity(
+ """SELECT TRIM(TRAILING ' XXX ' COLLATE "de_DE")""",
+ """SELECT RTRIM(' XXX ' COLLATE "de_DE")""",
+ )
self.validate_all(
- "TRIM(BOTH 'as' FROM 'as string as')",
+ """'{"a":1,"b":2}'::json->'b'""",
write={
- "postgres": "TRIM(BOTH 'as' FROM 'as string as')",
- "spark": "TRIM(BOTH 'as' FROM 'as string as')",
+ "postgres": """CAST('{"a":1,"b":2}' AS JSON) -> 'b'""",
+ "redshift": """JSON_EXTRACT_PATH_TEXT('{"a":1,"b":2}', 'b')""",
},
)
self.validate_all(
@@ -747,6 +780,29 @@ class TestPostgres(Validator):
},
)
+ self.validate_all(
+ "SELECT TO_DATE('01/01/2000', 'MM/DD/YYYY')",
+ write={
+ "duckdb": "SELECT CAST(STRPTIME('01/01/2000', '%m/%d/%Y') AS DATE)",
+ "postgres": "SELECT TO_DATE('01/01/2000', 'MM/DD/YYYY')",
+ },
+ )
+
+ self.validate_identity(
+ 'SELECT js, js IS JSON AS "json?", js IS JSON VALUE AS "scalar?", js IS JSON SCALAR AS "scalar?", js IS JSON OBJECT AS "object?", js IS JSON ARRAY AS "array?" FROM t'
+ )
+ self.validate_identity(
+ 'SELECT js, js IS JSON ARRAY WITH UNIQUE KEYS AS "array w. UK?", js IS JSON ARRAY WITHOUT UNIQUE KEYS AS "array w/o UK?", js IS JSON ARRAY UNIQUE KEYS AS "array w UK 2?" FROM t'
+ )
+ self.validate_identity(
+ "MERGE INTO target_table USING source_table AS source ON target.id = source.id WHEN MATCHED THEN DO NOTHING WHEN NOT MATCHED THEN DO NOTHING RETURNING MERGE_ACTION(), *"
+ )
+ self.validate_identity(
+ "SELECT 1 FROM ((VALUES (1)) AS vals(id) LEFT OUTER JOIN tbl ON vals.id = tbl.id)"
+ )
+ self.validate_identity("SELECT OVERLAY(a PLACING b FROM 1)")
+ self.validate_identity("SELECT OVERLAY(a PLACING b FROM 1 FOR 1)")
+
def test_ddl(self):
# Checks that user-defined types are parsed into DataType instead of Identifier
self.parse_one("CREATE TABLE t (a udt)").this.expressions[0].args["kind"].assert_is(
@@ -763,6 +819,9 @@ class TestPostgres(Validator):
cdef.args["kind"].assert_is(exp.DataType)
self.assertEqual(expr.sql(dialect="postgres"), "CREATE TABLE t (x INTERVAL DAY)")
+ self.validate_identity('ALTER INDEX "IX_Ratings_Column1" RENAME TO "IX_Ratings_Column2"')
+ self.validate_identity('CREATE TABLE x (a TEXT COLLATE "de_DE")')
+ self.validate_identity('CREATE TABLE x (a TEXT COLLATE pg_catalog."default")')
self.validate_identity("CREATE TABLE t (col INT[3][5])")
self.validate_identity("CREATE TABLE t (col INT[3])")
self.validate_identity("CREATE INDEX IF NOT EXISTS ON t(c)")
@@ -796,6 +855,9 @@ class TestPostgres(Validator):
self.validate_identity("ALTER TABLE t1 SET TABLESPACE tablespace")
self.validate_identity("ALTER TABLE t1 SET (fillfactor = 5, autovacuum_enabled = TRUE)")
self.validate_identity(
+ "ALTER TABLE tested_table ADD CONSTRAINT unique_example UNIQUE (column_name) NOT VALID"
+ )
+ self.validate_identity(
"CREATE FUNCTION pymax(a INT, b INT) RETURNS INT LANGUAGE plpython3u AS $$\n if a > b:\n return a\n return b\n$$",
)
self.validate_identity(
@@ -965,6 +1027,42 @@ class TestPostgres(Validator):
},
)
+ self.validate_identity("CREATE TABLE tbl (col INT UNIQUE NULLS NOT DISTINCT DEFAULT 9.99)")
+ self.validate_identity("CREATE TABLE tbl (col UUID UNIQUE DEFAULT GEN_RANDOM_UUID())")
+ self.validate_identity("CREATE TABLE tbl (col UUID, UNIQUE NULLS NOT DISTINCT (col))")
+
+ self.validate_identity("CREATE INDEX CONCURRENTLY ix_table_id ON tbl USING btree(id)")
+ self.validate_identity(
+ "CREATE INDEX CONCURRENTLY IF NOT EXISTS ix_table_id ON tbl USING btree(id)"
+ )
+ self.validate_identity("DROP INDEX ix_table_id")
+ self.validate_identity("DROP INDEX IF EXISTS ix_table_id")
+ self.validate_identity("DROP INDEX CONCURRENTLY ix_table_id")
+ self.validate_identity("DROP INDEX CONCURRENTLY IF EXISTS ix_table_id")
+
+ self.validate_identity(
+ """
+ CREATE TABLE IF NOT EXISTS public.rental
+ (
+ inventory_id INT NOT NULL,
+ CONSTRAINT rental_customer_id_fkey FOREIGN KEY (customer_id)
+ REFERENCES public.customer (customer_id) MATCH FULL
+ ON UPDATE CASCADE
+ ON DELETE RESTRICT,
+ CONSTRAINT rental_inventory_id_fkey FOREIGN KEY (inventory_id)
+ REFERENCES public.inventory (inventory_id) MATCH PARTIAL
+ ON UPDATE CASCADE
+ ON DELETE RESTRICT,
+ CONSTRAINT rental_staff_id_fkey FOREIGN KEY (staff_id)
+ REFERENCES public.staff (staff_id) MATCH SIMPLE
+ ON UPDATE CASCADE
+ ON DELETE RESTRICT,
+ INITIALLY IMMEDIATE
+ )
+ """,
+ "CREATE TABLE IF NOT EXISTS public.rental (inventory_id INT NOT NULL, CONSTRAINT rental_customer_id_fkey FOREIGN KEY (customer_id) REFERENCES public.customer (customer_id) MATCH FULL ON UPDATE CASCADE ON DELETE RESTRICT, CONSTRAINT rental_inventory_id_fkey FOREIGN KEY (inventory_id) REFERENCES public.inventory (inventory_id) MATCH PARTIAL ON UPDATE CASCADE ON DELETE RESTRICT, CONSTRAINT rental_staff_id_fkey FOREIGN KEY (staff_id) REFERENCES public.staff (staff_id) MATCH SIMPLE ON UPDATE CASCADE ON DELETE RESTRICT, INITIALLY IMMEDIATE)",
+ )
+
with self.assertRaises(ParseError):
transpile("CREATE TABLE products (price DECIMAL CHECK price > 0)", read="postgres")
with self.assertRaises(ParseError):
@@ -1011,10 +1109,10 @@ class TestPostgres(Validator):
self.assertEqual(
cm.output,
[
- "WARNING:sqlglot:Applying array index offset (-1)",
- "WARNING:sqlglot:Applying array index offset (1)",
- "WARNING:sqlglot:Applying array index offset (1)",
- "WARNING:sqlglot:Applying array index offset (1)",
+ "INFO:sqlglot:Applying array index offset (-1)",
+ "INFO:sqlglot:Applying array index offset (1)",
+ "INFO:sqlglot:Applying array index offset (1)",
+ "INFO:sqlglot:Applying array index offset (1)",
],
)
@@ -1118,3 +1216,12 @@ CROSS JOIN JSON_ARRAY_ELEMENTS(CAST(boxcrate AS JSON)) AS x(tbox)
CROSS JOIN JSON_ARRAY_ELEMENTS(CAST(JSON_EXTRACT_PATH(tbox, 'boxes') AS JSON)) AS y(boxes)"""
self.validate_all(expected_postgres, read={"trino": trino_input}, pretty=True)
+
+ def test_rows_from(self):
+ self.validate_identity("""SELECT * FROM ROWS FROM (FUNC1(col1, col2))""")
+ self.validate_identity(
+ """SELECT * FROM ROWS FROM (FUNC1(col1) AS alias1("col1" TEXT), FUNC2(col2) AS alias2("col2" INT)) WITH ORDINALITY"""
+ )
+ self.validate_identity(
+ """SELECT * FROM table1, ROWS FROM (FUNC1(col1) AS alias1("col1" TEXT)) WITH ORDINALITY AS alias3("col3" INT, "col4" TEXT)"""
+ )
diff --git a/tests/dialects/test_presto.py b/tests/dialects/test_presto.py
index ebb270a..3d5fbfe 100644
--- a/tests/dialects/test_presto.py
+++ b/tests/dialects/test_presto.py
@@ -14,6 +14,13 @@ class TestPresto(Validator):
self.validate_identity("CAST(x AS HYPERLOGLOG)")
self.validate_all(
+ "SELECT FROM_ISO8601_TIMESTAMP('2020-05-11T11:15:05')",
+ write={
+ "duckdb": "SELECT CAST('2020-05-11T11:15:05' AS TIMESTAMPTZ)",
+ "presto": "SELECT FROM_ISO8601_TIMESTAMP('2020-05-11T11:15:05')",
+ },
+ )
+ self.validate_all(
"CAST(x AS INTERVAL YEAR TO MONTH)",
write={
"oracle": "CAST(x AS INTERVAL YEAR TO MONTH)",
@@ -151,8 +158,8 @@ class TestPresto(Validator):
write={
"duckdb": "STR_SPLIT(x, 'a.')",
"presto": "SPLIT(x, 'a.')",
- "hive": "SPLIT(x, CONCAT('\\\\Q', 'a.'))",
- "spark": "SPLIT(x, CONCAT('\\\\Q', 'a.'))",
+ "hive": "SPLIT(x, CONCAT('\\\\Q', 'a.', '\\\\E'))",
+ "spark": "SPLIT(x, CONCAT('\\\\Q', 'a.', '\\\\E'))",
},
)
self.validate_all(
@@ -269,10 +276,19 @@ class TestPresto(Validator):
self.validate_all(
"DATE_PARSE(SUBSTR(x, 1, 10), '%Y-%m-%d')",
write={
- "duckdb": "STRPTIME(SUBSTR(x, 1, 10), '%Y-%m-%d')",
- "presto": "DATE_PARSE(SUBSTR(x, 1, 10), '%Y-%m-%d')",
- "hive": "CAST(SUBSTR(x, 1, 10) AS TIMESTAMP)",
- "spark": "TO_TIMESTAMP(SUBSTR(x, 1, 10), 'yyyy-MM-dd')",
+ "duckdb": "STRPTIME(SUBSTRING(x, 1, 10), '%Y-%m-%d')",
+ "presto": "DATE_PARSE(SUBSTRING(x, 1, 10), '%Y-%m-%d')",
+ "hive": "CAST(SUBSTRING(x, 1, 10) AS TIMESTAMP)",
+ "spark": "TO_TIMESTAMP(SUBSTRING(x, 1, 10), 'yyyy-MM-dd')",
+ },
+ )
+ self.validate_all(
+ "DATE_PARSE(SUBSTRING(x, 1, 10), '%Y-%m-%d')",
+ write={
+ "duckdb": "STRPTIME(SUBSTRING(x, 1, 10), '%Y-%m-%d')",
+ "presto": "DATE_PARSE(SUBSTRING(x, 1, 10), '%Y-%m-%d')",
+ "hive": "CAST(SUBSTRING(x, 1, 10) AS TIMESTAMP)",
+ "spark": "TO_TIMESTAMP(SUBSTRING(x, 1, 10), 'yyyy-MM-dd')",
},
)
self.validate_all(
@@ -322,11 +338,20 @@ class TestPresto(Validator):
},
)
self.validate_all(
- "DAY_OF_WEEK(timestamp '2012-08-08 01:00:00')",
- write={
+ "((DAY_OF_WEEK(CAST(TRY_CAST('2012-08-08 01:00:00' AS TIMESTAMP) AS DATE)) % 7) + 1)",
+ read={
"spark": "DAYOFWEEK(CAST('2012-08-08 01:00:00' AS TIMESTAMP))",
+ },
+ )
+ self.validate_all(
+ "DAY_OF_WEEK(CAST('2012-08-08 01:00:00' AS TIMESTAMP))",
+ read={
+ "duckdb": "ISODOW(CAST('2012-08-08 01:00:00' AS TIMESTAMP))",
+ },
+ write={
+ "spark": "((DAYOFWEEK(CAST('2012-08-08 01:00:00' AS TIMESTAMP)) % 7) + 1)",
"presto": "DAY_OF_WEEK(CAST('2012-08-08 01:00:00' AS TIMESTAMP))",
- "duckdb": "DAYOFWEEK(CAST('2012-08-08 01:00:00' AS TIMESTAMP))",
+ "duckdb": "ISODOW(CAST('2012-08-08 01:00:00' AS TIMESTAMP))",
},
)
@@ -405,6 +430,27 @@ class TestPresto(Validator):
)
self.validate_identity("DATE_ADD('DAY', 1, y)")
+ self.validate_all(
+ "SELECT DATE_ADD('MINUTE', 30, col)",
+ write={
+ "presto": "SELECT DATE_ADD('MINUTE', 30, col)",
+ "trino": "SELECT DATE_ADD('MINUTE', 30, col)",
+ },
+ )
+
+ self.validate_identity("DATE_ADD('DAY', FLOOR(5), y)")
+ self.validate_identity(
+ """SELECT DATE_ADD('DAY', MOD(5, 2.5), y), DATE_ADD('DAY', CEIL(5.5), y)""",
+ """SELECT DATE_ADD('DAY', CAST(5 % 2.5 AS BIGINT), y), DATE_ADD('DAY', CAST(CEIL(5.5) AS BIGINT), y)""",
+ )
+
+ self.validate_all(
+ "DATE_ADD('MINUTE', CAST(FLOOR(CAST(EXTRACT(MINUTE FROM CURRENT_TIMESTAMP) AS DOUBLE) / NULLIF(30, 0)) * 30 AS BIGINT), col)",
+ read={
+ "spark": "TIMESTAMPADD(MINUTE, FLOOR(EXTRACT(MINUTE FROM CURRENT_TIMESTAMP)/30)*30, col)",
+ },
+ )
+
def test_ddl(self):
self.validate_all(
"CREATE TABLE test WITH (FORMAT = 'PARQUET') AS SELECT 1",
@@ -494,6 +540,9 @@ class TestPresto(Validator):
},
)
+ self.validate_identity("""CREATE OR REPLACE VIEW v SECURITY DEFINER AS SELECT id FROM t""")
+ self.validate_identity("""CREATE OR REPLACE VIEW v SECURITY INVOKER AS SELECT id FROM t""")
+
def test_quotes(self):
self.validate_all(
"''''",
@@ -581,6 +630,13 @@ class TestPresto(Validator):
)
def test_presto(self):
+ self.assertEqual(
+ exp.func("md5", exp.func("concat", exp.cast("x", "text"), exp.Literal.string("s"))).sql(
+ dialect="presto"
+ ),
+ "LOWER(TO_HEX(MD5(TO_UTF8(CONCAT(CAST(x AS VARCHAR), CAST('s' AS VARCHAR))))))",
+ )
+
with self.assertLogs(helper_logger):
self.validate_all(
"SELECT COALESCE(ELEMENT_AT(MAP_FROM_ENTRIES(ARRAY[(51, '1')]), id), quantity) FROM my_table",
@@ -599,6 +655,7 @@ class TestPresto(Validator):
},
)
+ self.validate_identity("SELECT a FROM t GROUP BY a, ROLLUP (b), ROLLUP (c), ROLLUP (d)")
self.validate_identity("SELECT a FROM test TABLESAMPLE BERNOULLI (50)")
self.validate_identity("SELECT a FROM test TABLESAMPLE SYSTEM (75)")
self.validate_identity("string_agg(x, ',')", "ARRAY_JOIN(ARRAY_AGG(x), ',')")
@@ -680,9 +737,6 @@ class TestPresto(Validator):
)
self.validate_all(
"SELECT ROW(1, 2)",
- read={
- "spark": "SELECT STRUCT(1, 2)",
- },
write={
"presto": "SELECT ROW(1, 2)",
"spark": "SELECT STRUCT(1, 2)",
@@ -801,12 +855,6 @@ class TestPresto(Validator):
},
)
self.validate_all(
- "SELECT a FROM t GROUP BY a, ROLLUP(b), ROLLUP(c), ROLLUP(d)",
- write={
- "presto": "SELECT a FROM t GROUP BY a, ROLLUP (b, c, d)",
- },
- )
- self.validate_all(
'SELECT a."b" FROM "foo"',
write={
"duckdb": 'SELECT a."b" FROM "foo"',
@@ -927,8 +975,8 @@ class TestPresto(Validator):
write={
"bigquery": "SELECT * FROM UNNEST(['7', '14'])",
"presto": "SELECT * FROM UNNEST(ARRAY['7', '14']) AS x",
- "hive": "SELECT * FROM UNNEST(ARRAY('7', '14')) AS x",
- "spark": "SELECT * FROM UNNEST(ARRAY('7', '14')) AS x",
+ "hive": "SELECT * FROM EXPLODE(ARRAY('7', '14')) AS x",
+ "spark": "SELECT * FROM EXPLODE(ARRAY('7', '14')) AS x",
},
)
self.validate_all(
@@ -936,8 +984,8 @@ class TestPresto(Validator):
write={
"bigquery": "SELECT * FROM UNNEST(['7', '14']) AS y",
"presto": "SELECT * FROM UNNEST(ARRAY['7', '14']) AS x(y)",
- "hive": "SELECT * FROM UNNEST(ARRAY('7', '14')) AS x(y)",
- "spark": "SELECT * FROM UNNEST(ARRAY('7', '14')) AS x(y)",
+ "hive": "SELECT * FROM EXPLODE(ARRAY('7', '14')) AS x(y)",
+ "spark": "SELECT * FROM EXPLODE(ARRAY('7', '14')) AS x(y)",
},
)
self.validate_all(
@@ -995,6 +1043,25 @@ class TestPresto(Validator):
"spark": "SELECT REGEXP_EXTRACT(TO_JSON(FROM_JSON('[[1, 2, 3]]', SCHEMA_OF_JSON('[[1, 2, 3]]'))), '^.(.*).$', 1)",
},
)
+ self.validate_all(
+ "REGEXP_EXTRACT('abc', '(a)(b)(c)')",
+ read={
+ "presto": "REGEXP_EXTRACT('abc', '(a)(b)(c)')",
+ "trino": "REGEXP_EXTRACT('abc', '(a)(b)(c)')",
+ "duckdb": "REGEXP_EXTRACT('abc', '(a)(b)(c)')",
+ "snowflake": "REGEXP_SUBSTR('abc', '(a)(b)(c)')",
+ },
+ write={
+ "presto": "REGEXP_EXTRACT('abc', '(a)(b)(c)')",
+ "trino": "REGEXP_EXTRACT('abc', '(a)(b)(c)')",
+ "duckdb": "REGEXP_EXTRACT('abc', '(a)(b)(c)')",
+ "snowflake": "REGEXP_SUBSTR('abc', '(a)(b)(c)')",
+ "hive": "REGEXP_EXTRACT('abc', '(a)(b)(c)', 0)",
+ "spark2": "REGEXP_EXTRACT('abc', '(a)(b)(c)', 0)",
+ "spark": "REGEXP_EXTRACT('abc', '(a)(b)(c)', 0)",
+ "databricks": "REGEXP_EXTRACT('abc', '(a)(b)(c)', 0)",
+ },
+ )
def test_encode_decode(self):
self.validate_identity("FROM_UTF8(x, y)")
@@ -1192,3 +1259,18 @@ MATCH_RECOGNIZE (
"starrocks": "SIGN(x)",
},
)
+
+ def test_json_vs_row_extract(self):
+ for dialect in ("trino", "presto"):
+ s = parse_one('SELECT col:x:y."special string"', read="snowflake")
+
+ dialect_json_extract_setting = f"{dialect}, variant_extract_is_json_extract=True"
+ dialect_row_access_setting = f"{dialect}, variant_extract_is_json_extract=False"
+
+ # By default, Snowflake VARIANT will generate JSON_EXTRACT() in Presto/Trino
+ json_extract_result = """SELECT JSON_EXTRACT(col, '$.x.y["special string"]')"""
+ self.assertEqual(s.sql(dialect), json_extract_result)
+ self.assertEqual(s.sql(dialect_json_extract_setting), json_extract_result)
+
+ # If the setting is overriden to False, then generate ROW access (dot notation)
+ self.assertEqual(s.sql(dialect_row_access_setting), 'SELECT col.x.y."special string"')
diff --git a/tests/dialects/test_prql.py b/tests/dialects/test_prql.py
index 5b438f1..2e35bb2 100644
--- a/tests/dialects/test_prql.py
+++ b/tests/dialects/test_prql.py
@@ -5,77 +5,165 @@ class TestPRQL(Validator):
dialect = "prql"
def test_prql(self):
- self.validate_identity("from x", "SELECT * FROM x")
- self.validate_identity("from x derive a + 1", "SELECT *, a + 1 FROM x")
- self.validate_identity("from x derive x = a + 1", "SELECT *, a + 1 AS x FROM x")
- self.validate_identity("from x derive {a + 1}", "SELECT *, a + 1 FROM x")
- self.validate_identity("from x derive {x = a + 1, b}", "SELECT *, a + 1 AS x, b FROM x")
- self.validate_identity(
- "from x derive {x = a + 1, b} select {y = x, 2}", "SELECT a + 1 AS y, 2 FROM x"
- )
- self.validate_identity("from x take 10", "SELECT * FROM x LIMIT 10")
- self.validate_identity("from x take 10 take 5", "SELECT * FROM x LIMIT 5")
- self.validate_identity("from x filter age > 25", "SELECT * FROM x WHERE age > 25")
- self.validate_identity(
+ self.validate_all(
+ "from x",
+ write={
+ "": "SELECT * FROM x",
+ },
+ )
+ self.validate_all(
+ "from x derive a + 1",
+ write={
+ "": "SELECT *, a + 1 FROM x",
+ },
+ )
+ self.validate_all(
+ "from x derive x = a + 1",
+ write={
+ "": "SELECT *, a + 1 AS x FROM x",
+ },
+ )
+ self.validate_all(
+ "from x derive {a + 1}",
+ write={
+ "": "SELECT *, a + 1 FROM x",
+ },
+ )
+ self.validate_all(
+ "from x derive {x = a + 1, b}",
+ write={
+ "": "SELECT *, a + 1 AS x, b FROM x",
+ },
+ )
+ self.validate_all(
+ "from x derive {x = a + 1, b} select {y = x, 2}",
+ write={"": "SELECT a + 1 AS y, 2 FROM x"},
+ )
+ self.validate_all(
+ "from x take 10",
+ write={
+ "": "SELECT * FROM x LIMIT 10",
+ },
+ )
+ self.validate_all(
+ "from x take 10 take 5",
+ write={
+ "": "SELECT * FROM x LIMIT 5",
+ },
+ )
+ self.validate_all(
+ "from x filter age > 25",
+ write={
+ "": "SELECT * FROM x WHERE age > 25",
+ },
+ )
+ self.validate_all(
"from x derive {x = a + 1, b} filter age > 25",
- "SELECT *, a + 1 AS x, b FROM x WHERE age > 25",
+ write={
+ "": "SELECT *, a + 1 AS x, b FROM x WHERE age > 25",
+ },
)
- self.validate_identity("from x filter dept != 'IT'", "SELECT * FROM x WHERE dept <> 'IT'")
- self.validate_identity(
- "from x filter p == 'product' select { a, b }", "SELECT a, b FROM x WHERE p = 'product'"
+ self.validate_all(
+ "from x filter dept != 'IT'",
+ write={
+ "": "SELECT * FROM x WHERE dept <> 'IT'",
+ },
)
- self.validate_identity(
- "from x filter age > 25 filter age < 27", "SELECT * FROM x WHERE age > 25 AND age < 27"
+ self.validate_all(
+ "from x filter p == 'product' select { a, b }",
+ write={"": "SELECT a, b FROM x WHERE p = 'product'"},
)
- self.validate_identity(
- "from x filter (age > 25 && age < 27)", "SELECT * FROM x WHERE (age > 25 AND age < 27)"
+ self.validate_all(
+ "from x filter age > 25 filter age < 27",
+ write={"": "SELECT * FROM x WHERE age > 25 AND age < 27"},
)
- self.validate_identity(
- "from x filter (age > 25 || age < 27)", "SELECT * FROM x WHERE (age > 25 OR age < 27)"
+ self.validate_all(
+ "from x filter (age > 25 && age < 27)",
+ write={"": "SELECT * FROM x WHERE (age > 25 AND age < 27)"},
)
- self.validate_identity(
+ self.validate_all(
+ "from x filter (age > 25 || age < 27)",
+ write={"": "SELECT * FROM x WHERE (age > 25 OR age < 27)"},
+ )
+ self.validate_all(
"from x filter (age > 25 || age < 22) filter age > 26 filter age < 27",
- "SELECT * FROM x WHERE ((age > 25 OR age < 22) AND age > 26) AND age < 27",
+ write={
+ "": "SELECT * FROM x WHERE ((age > 25 OR age < 22) AND age > 26) AND age < 27",
+ },
)
- self.validate_identity(
+ self.validate_all(
"from x sort age",
- "SELECT * FROM x ORDER BY age",
+ write={
+ "": "SELECT * FROM x ORDER BY age",
+ },
)
- self.validate_identity(
+ self.validate_all(
"from x sort {-age}",
- "SELECT * FROM x ORDER BY age DESC",
+ write={
+ "": "SELECT * FROM x ORDER BY age DESC",
+ },
)
- self.validate_identity(
+ self.validate_all(
"from x sort {age, name}",
- "SELECT * FROM x ORDER BY age, name",
+ write={
+ "": "SELECT * FROM x ORDER BY age, name",
+ },
)
- self.validate_identity(
+ self.validate_all(
"from x sort {-age, +name}",
- "SELECT * FROM x ORDER BY age DESC, name",
- )
- self.validate_identity("from x append y", "SELECT * FROM x UNION ALL SELECT * FROM y")
- self.validate_identity("from x remove y", "SELECT * FROM x EXCEPT ALL SELECT * FROM y")
- self.validate_identity(
- "from x intersect y", "SELECT * FROM x INTERSECT ALL SELECT * FROM y"
- )
- self.validate_identity(
+ write={
+ "": "SELECT * FROM x ORDER BY age DESC, name",
+ },
+ )
+ self.validate_all(
+ "from x append y",
+ write={
+ "": "SELECT * FROM x UNION ALL SELECT * FROM y",
+ },
+ )
+ self.validate_all(
+ "from x remove y",
+ write={
+ "": "SELECT * FROM x EXCEPT ALL SELECT * FROM y",
+ },
+ )
+ self.validate_all(
+ "from x intersect y",
+ write={"": "SELECT * FROM x INTERSECT ALL SELECT * FROM y"},
+ )
+ self.validate_all(
"from x filter a == null filter null != b",
- "SELECT * FROM x WHERE a IS NULL AND NOT b IS NULL",
+ write={
+ "": "SELECT * FROM x WHERE a IS NULL AND NOT b IS NULL",
+ },
)
- self.validate_identity(
+ self.validate_all(
"from x filter (a > 1 || null != b || c != null)",
- "SELECT * FROM x WHERE (a > 1 OR NOT b IS NULL OR NOT c IS NULL)",
+ write={
+ "": "SELECT * FROM x WHERE (a > 1 OR NOT b IS NULL OR NOT c IS NULL)",
+ },
+ )
+ self.validate_all(
+ "from a aggregate { average x }",
+ write={
+ "": "SELECT AVG(x) FROM a",
+ },
)
- self.validate_identity("from a aggregate { average x }", "SELECT AVG(x) FROM a")
- self.validate_identity(
+ self.validate_all(
"from a aggregate { average x, min y, ct = sum z }",
- "SELECT AVG(x), MIN(y), COALESCE(SUM(z), 0) AS ct FROM a",
+ write={
+ "": "SELECT AVG(x), MIN(y), COALESCE(SUM(z), 0) AS ct FROM a",
+ },
)
- self.validate_identity(
+ self.validate_all(
"from a aggregate { average x, min y, sum z }",
- "SELECT AVG(x), MIN(y), COALESCE(SUM(z), 0) FROM a",
+ write={
+ "": "SELECT AVG(x), MIN(y), COALESCE(SUM(z), 0) FROM a",
+ },
)
- self.validate_identity(
+ self.validate_all(
"from a aggregate { min y, b = stddev x, max z }",
- "SELECT MIN(y), STDDEV(x) AS b, MAX(z) FROM a",
+ write={
+ "": "SELECT MIN(y), STDDEV(x) AS b, MAX(z) FROM a",
+ },
)
diff --git a/tests/dialects/test_redshift.py b/tests/dialects/test_redshift.py
index 69793c7..6f561da 100644
--- a/tests/dialects/test_redshift.py
+++ b/tests/dialects/test_redshift.py
@@ -6,7 +6,6 @@ class TestRedshift(Validator):
dialect = "redshift"
def test_redshift(self):
- self.validate_identity("1 div", "1 AS div")
self.validate_all(
"SELECT SPLIT_TO_ARRAY('12,345,6789')",
write={
@@ -28,7 +27,7 @@ class TestRedshift(Validator):
"""SELECT JSON_EXTRACT_PATH_TEXT('{ "farm": {"barn": { "color": "red", "feed stocked": true }}}', 'farm', 'barn', 'color')""",
write={
"bigquery": """SELECT JSON_EXTRACT_SCALAR('{ "farm": {"barn": { "color": "red", "feed stocked": true }}}', '$.farm.barn.color')""",
- "databricks": """SELECT GET_JSON_OBJECT('{ "farm": {"barn": { "color": "red", "feed stocked": true }}}', '$.farm.barn.color')""",
+ "databricks": """SELECT '{ "farm": {"barn": { "color": "red", "feed stocked": true }}}':farm.barn.color""",
"duckdb": """SELECT '{ "farm": {"barn": { "color": "red", "feed stocked": true }}}' ->> '$.farm.barn.color'""",
"postgres": """SELECT JSON_EXTRACT_PATH_TEXT('{ "farm": {"barn": { "color": "red", "feed stocked": true }}}', 'farm', 'barn', 'color')""",
"presto": """SELECT JSON_EXTRACT_SCALAR('{ "farm": {"barn": { "color": "red", "feed stocked": true }}}', '$.farm.barn.color')""",
@@ -228,7 +227,7 @@ class TestRedshift(Validator):
"drill": "SELECT a, b FROM (SELECT a, b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY c DESC NULLS FIRST) AS _row_number FROM x) AS _t WHERE _row_number = 1",
"hive": "SELECT a, b FROM (SELECT a, b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY c DESC NULLS FIRST) AS _row_number FROM x) AS _t WHERE _row_number = 1",
"mysql": "SELECT a, b FROM (SELECT a, b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY CASE WHEN c IS NULL THEN 1 ELSE 0 END DESC, c DESC) AS _row_number FROM x) AS _t WHERE _row_number = 1",
- "oracle": "SELECT a, b FROM (SELECT a, b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY c DESC NULLS FIRST) AS _row_number FROM x) _t WHERE _row_number = 1",
+ "oracle": "SELECT a, b FROM (SELECT a, b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY c DESC) AS _row_number FROM x) _t WHERE _row_number = 1",
"presto": "SELECT a, b FROM (SELECT a, b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY c DESC NULLS FIRST) AS _row_number FROM x) AS _t WHERE _row_number = 1",
"redshift": "SELECT a, b FROM (SELECT a, b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY c DESC) AS _row_number FROM x) AS _t WHERE _row_number = 1",
"snowflake": "SELECT a, b FROM (SELECT a, b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY c DESC) AS _row_number FROM x) AS _t WHERE _row_number = 1",
@@ -259,6 +258,12 @@ class TestRedshift(Validator):
"postgres": "COALESCE(a, b, c, d)",
},
)
+
+ self.validate_identity(
+ "DATEDIFF(days, a, b)",
+ "DATEDIFF(DAY, a, b)",
+ )
+
self.validate_all(
"DATEDIFF('day', a, b)",
write={
@@ -300,7 +305,16 @@ class TestRedshift(Validator):
},
)
+ self.validate_all(
+ "SELECT EXTRACT(EPOCH FROM CURRENT_DATE)",
+ write={
+ "snowflake": "SELECT DATE_PART(EPOCH, CURRENT_DATE)",
+ "redshift": "SELECT EXTRACT(EPOCH FROM CURRENT_DATE)",
+ },
+ )
+
def test_identity(self):
+ self.validate_identity("1 div", "1 AS div")
self.validate_identity("LISTAGG(DISTINCT foo, ', ')")
self.validate_identity("CREATE MATERIALIZED VIEW orders AUTO REFRESH YES AS SELECT 1")
self.validate_identity("SELECT DATEADD(DAY, 1, 'today')")
@@ -324,6 +338,10 @@ class TestRedshift(Validator):
"""SELECT JSON_EXTRACT_PATH_TEXT('{"f2":{"f3":1},"f4":{"f5":99,"f6":"star"}', 'f4', 'f6', TRUE)"""
)
self.validate_identity(
+ 'DATE_PART(year, "somecol")',
+ 'EXTRACT(year FROM "somecol")',
+ ).this.assert_is(exp.Var)
+ self.validate_identity(
"SELECT CONCAT('abc', 'def')",
"SELECT 'abc' || 'def'",
)
@@ -416,6 +434,14 @@ ORDER BY
)
self.validate_identity("SELECT JSON_PARSE('[]')")
+ self.validate_identity("SELECT ARRAY(1, 2, 3)")
+ self.validate_identity("SELECT ARRAY[1, 2, 3]")
+
+ self.validate_identity(
+ """SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')""",
+ """SELECT CONVERT_TIMEZONE('UTC', 'America/New_York', '2024-08-06 09:10:00.000')""",
+ )
+
def test_values(self):
# Test crazy-sized VALUES clause to UNION ALL conversion to ensure we don't get RecursionError
values = [str(v) for v in range(0, 10000)]
@@ -594,3 +620,42 @@ FROM (
"select a.foo, b.bar, a.baz from a, b where a.baz = b.baz (+)",
"SELECT a.foo, b.bar, a.baz FROM a, b WHERE a.baz = b.baz (+)",
)
+
+ def test_time(self):
+ self.validate_all(
+ "TIME_TO_STR(a, '%Y-%m-%d %H:%M:%S.%f')",
+ write={"redshift": "TO_CHAR(a, 'YYYY-MM-DD HH24:MI:SS.US')"},
+ )
+
+ def test_grant(self):
+ grant_cmds = [
+ "GRANT SELECT ON ALL TABLES IN SCHEMA qa_tickit TO fred",
+ "GRANT USAGE ON DATASHARE salesshare TO NAMESPACE '13b8833d-17c6-4f16-8fe4-1a018f5ed00d'",
+ "GRANT USAGE FOR SCHEMAS IN DATABASE Sales_db TO ROLE Sales",
+ "GRANT EXECUTE FOR FUNCTIONS IN SCHEMA Sales_schema TO bob",
+ "GRANT SELECT FOR TABLES IN DATABASE Sales_db TO alice WITH GRANT OPTION",
+ "GRANT ALL FOR TABLES IN SCHEMA ShareSchema DATABASE ShareDb TO ROLE Sales",
+ "GRANT ASSUMEROLE ON 'arn:aws:iam::123456789012:role/Redshift-Exfunc' TO reg_user1 FOR EXTERNAL FUNCTION",
+ "GRANT ROLE sample_role1 TO ROLE sample_role2",
+ ]
+
+ for sql in grant_cmds:
+ with self.subTest(f"Testing Redshift's GRANT command statement: {sql}"):
+ self.validate_identity(sql, check_command_warning=True)
+
+ self.validate_identity("GRANT SELECT ON TABLE sales TO fred")
+ self.validate_identity("GRANT ALL ON SCHEMA qa_tickit TO GROUP qa_users")
+ self.validate_identity("GRANT ALL ON TABLE qa_tickit.sales TO GROUP qa_users")
+ self.validate_identity(
+ "GRANT ALL ON TABLE qa_tickit.sales TO GROUP qa_users, GROUP ro_users"
+ )
+ self.validate_identity("GRANT ALL ON view_date TO view_user")
+ self.validate_identity(
+ "GRANT SELECT(cust_name, cust_phone), UPDATE(cust_contact_preference) ON cust_profile TO GROUP sales_group"
+ )
+ self.validate_identity(
+ "GRANT ALL(cust_name, cust_phone, cust_contact_preference) ON cust_profile TO GROUP sales_admin"
+ )
+ self.validate_identity("GRANT USAGE ON DATABASE sales_db TO Bob")
+ self.validate_identity("GRANT USAGE ON SCHEMA sales_schema TO ROLE Analyst_role")
+ self.validate_identity("GRANT SELECT ON sales_db.sales_schema.tickit_sales_redshift TO Bob")
diff --git a/tests/dialects/test_snowflake.py b/tests/dialects/test_snowflake.py
index 1286436..6cde86b 100644
--- a/tests/dialects/test_snowflake.py
+++ b/tests/dialects/test_snowflake.py
@@ -11,17 +11,10 @@ class TestSnowflake(Validator):
dialect = "snowflake"
def test_snowflake(self):
- self.validate_identity(
- "transform(x, a int -> a + a + 1)",
- "TRANSFORM(x, a -> CAST(a AS INT) + CAST(a AS INT) + 1)",
- )
-
- self.validate_all(
- "ARRAY_CONSTRUCT_COMPACT(1, null, 2)",
- write={
- "spark": "ARRAY_COMPACT(ARRAY(1, NULL, 2))",
- "snowflake": "ARRAY_CONSTRUCT_COMPACT(1, NULL, 2)",
- },
+ self.assertEqual(
+ # Ensures we don't fail when generating ParseJSON with the `safe` arg set to `True`
+ self.validate_identity("""SELECT TRY_PARSE_JSON('{"x: 1}')""").sql(),
+ """SELECT PARSE_JSON('{"x: 1}')""",
)
expr = parse_one("SELECT APPROX_TOP_K(C4, 3, 5) FROM t")
@@ -49,6 +42,9 @@ WHERE
)""",
)
+ self.validate_identity("exclude := [foo]")
+ self.validate_identity("SELECT CAST([1, 2, 3] AS VECTOR(FLOAT, 3))")
+ self.validate_identity("SELECT CONNECT_BY_ROOT test AS test_column_alias")
self.validate_identity("SELECT number").selects[0].assert_is(exp.Column)
self.validate_identity("INTERVAL '4 years, 5 months, 3 hours'")
self.validate_identity("ALTER TABLE table1 CLUSTER BY (name DESC)")
@@ -84,7 +80,6 @@ WHERE
self.validate_identity("WITH x AS (SELECT 1 AS foo) SELECT foo FROM IDENTIFIER('x')")
self.validate_identity("WITH x AS (SELECT 1 AS foo) SELECT IDENTIFIER('foo') FROM x")
self.validate_identity("INITCAP('iqamqinterestedqinqthisqtopic', 'q')")
- self.validate_identity("CAST(x AS GEOMETRY)")
self.validate_identity("OBJECT_CONSTRUCT(*)")
self.validate_identity("SELECT CAST('2021-01-01' AS DATE) + INTERVAL '1 DAY'")
self.validate_identity("SELECT HLL(*)")
@@ -101,6 +96,22 @@ WHERE
self.validate_identity("ALTER TABLE a SWAP WITH b")
self.validate_identity("SELECT MATCH_CONDITION")
self.validate_identity("SELECT * REPLACE (CAST(col AS TEXT) AS scol) FROM t")
+ self.validate_identity("1 /* /* */")
+ self.validate_identity(
+ "SELECT * FROM table AT (TIMESTAMP => '2024-07-24') UNPIVOT(a FOR b IN (c)) AS pivot_table"
+ )
+ self.validate_identity(
+ "SELECT * FROM quarterly_sales PIVOT(SUM(amount) FOR quarter IN ('2023_Q1', '2023_Q2', '2023_Q3', '2023_Q4', '2024_Q1') DEFAULT ON NULL (0)) ORDER BY empid"
+ )
+ self.validate_identity(
+ "SELECT * FROM quarterly_sales PIVOT(SUM(amount) FOR quarter IN (SELECT DISTINCT quarter FROM ad_campaign_types_by_quarter WHERE television = TRUE ORDER BY quarter)) ORDER BY empid"
+ )
+ self.validate_identity(
+ "SELECT * FROM quarterly_sales PIVOT(SUM(amount) FOR quarter IN (ANY ORDER BY quarter)) ORDER BY empid"
+ )
+ self.validate_identity(
+ "SELECT * FROM quarterly_sales PIVOT(SUM(amount) FOR quarter IN (ANY)) ORDER BY empid"
+ )
self.validate_identity(
"MERGE INTO my_db AS ids USING (SELECT new_id FROM my_model WHERE NOT col IS NULL) AS new_ids ON ids.type = new_ids.type AND ids.source = new_ids.source WHEN NOT MATCHED THEN INSERT VALUES (new_ids.new_id)"
)
@@ -114,6 +125,30 @@ WHERE
"SELECT * FROM DATA AS DATA_L ASOF JOIN DATA AS DATA_R MATCH_CONDITION (DATA_L.VAL > DATA_R.VAL) ON DATA_L.ID = DATA_R.ID"
)
self.validate_identity(
+ "CAST(x AS GEOGRAPHY)",
+ "TO_GEOGRAPHY(x)",
+ )
+ self.validate_identity(
+ "CAST(x AS GEOMETRY)",
+ "TO_GEOMETRY(x)",
+ )
+ self.validate_identity(
+ "transform(x, a int -> a + a + 1)",
+ "TRANSFORM(x, a -> CAST(a AS INT) + CAST(a AS INT) + 1)",
+ )
+ self.validate_identity(
+ "SELECT * FROM s WHERE c NOT IN (1, 2, 3)",
+ "SELECT * FROM s WHERE NOT c IN (1, 2, 3)",
+ )
+ self.validate_identity(
+ "SELECT * FROM s WHERE c NOT IN (SELECT * FROM t)",
+ "SELECT * FROM s WHERE c <> ALL (SELECT * FROM t)",
+ )
+ self.validate_identity(
+ "SELECT * FROM t1 INNER JOIN t2 USING (t1.col)",
+ "SELECT * FROM t1 INNER JOIN t2 USING (col)",
+ )
+ self.validate_identity(
"CURRENT_TIMESTAMP - INTERVAL '1 w' AND (1 = 1)",
"CURRENT_TIMESTAMP() - INTERVAL '1 WEEK' AND (1 = 1)",
)
@@ -183,18 +218,6 @@ WHERE
"""SELECT CAST(GET_PATH(PARSE_JSON('{"food":{"fruit":"banana"}}'), 'food.fruit') AS VARCHAR)""",
)
self.validate_identity(
- "SELECT * FROM foo at",
- "SELECT * FROM foo AS at",
- )
- self.validate_identity(
- "SELECT * FROM foo before",
- "SELECT * FROM foo AS before",
- )
- self.validate_identity(
- "SELECT * FROM foo at (col)",
- "SELECT * FROM foo AS at(col)",
- )
- self.validate_identity(
"SELECT * FROM unnest(x) with ordinality",
"SELECT * FROM TABLE(FLATTEN(INPUT => x)) AS _u(seq, key, path, index, value, this)",
)
@@ -283,6 +306,13 @@ WHERE
)
self.validate_all(
+ "ARRAY_CONSTRUCT_COMPACT(1, null, 2)",
+ write={
+ "spark": "ARRAY_COMPACT(ARRAY(1, NULL, 2))",
+ "snowflake": "ARRAY_CONSTRUCT_COMPACT(1, NULL, 2)",
+ },
+ )
+ self.validate_all(
"OBJECT_CONSTRUCT_KEEP_NULL('key_1', 'one', 'key_2', NULL)",
read={
"bigquery": "JSON_OBJECT(['key_1', 'key_2'], ['one', NULL])",
@@ -337,7 +367,7 @@ WHERE
"""SELECT PARSE_JSON('{"fruit":"banana"}'):fruit""",
write={
"bigquery": """SELECT JSON_EXTRACT(PARSE_JSON('{"fruit":"banana"}'), '$.fruit')""",
- "databricks": """SELECT GET_JSON_OBJECT('{"fruit":"banana"}', '$.fruit')""",
+ "databricks": """SELECT '{"fruit":"banana"}':fruit""",
"duckdb": """SELECT JSON('{"fruit":"banana"}') -> '$.fruit'""",
"mysql": """SELECT JSON_EXTRACT('{"fruit":"banana"}', '$.fruit')""",
"presto": """SELECT JSON_EXTRACT(JSON_PARSE('{"fruit":"banana"}'), '$.fruit')""",
@@ -572,12 +602,12 @@ WHERE
self.validate_all(
"DIV0(foo, bar)",
write={
- "snowflake": "IFF(bar = 0, 0, foo / bar)",
- "sqlite": "IIF(bar = 0, 0, CAST(foo AS REAL) / bar)",
- "presto": "IF(bar = 0, 0, CAST(foo AS DOUBLE) / bar)",
- "spark": "IF(bar = 0, 0, foo / bar)",
- "hive": "IF(bar = 0, 0, foo / bar)",
- "duckdb": "CASE WHEN bar = 0 THEN 0 ELSE foo / bar END",
+ "snowflake": "IFF(bar = 0 AND NOT foo IS NULL, 0, foo / bar)",
+ "sqlite": "IIF(bar = 0 AND NOT foo IS NULL, 0, CAST(foo AS REAL) / bar)",
+ "presto": "IF(bar = 0 AND NOT foo IS NULL, 0, CAST(foo AS DOUBLE) / bar)",
+ "spark": "IF(bar = 0 AND NOT foo IS NULL, 0, foo / bar)",
+ "hive": "IF(bar = 0 AND NOT foo IS NULL, 0, foo / bar)",
+ "duckdb": "CASE WHEN bar = 0 AND NOT foo IS NULL THEN 0 ELSE foo / bar END",
},
)
self.validate_all(
@@ -725,6 +755,8 @@ WHERE
write={
"spark": "SELECT COLLECT_LIST(DISTINCT a)",
"snowflake": "SELECT ARRAY_AGG(DISTINCT a)",
+ "duckdb": "SELECT ARRAY_AGG(DISTINCT a) FILTER(WHERE a IS NOT NULL)",
+ "presto": "SELECT ARRAY_AGG(DISTINCT a) FILTER(WHERE a IS NOT NULL)",
},
)
self.validate_all(
@@ -831,6 +863,71 @@ WHERE
},
)
+ self.validate_all(
+ "SELECT OBJECT_INSERT(OBJECT_INSERT(OBJECT_INSERT(OBJECT_CONSTRUCT('key5', 'value5'), 'key1', 5), 'key2', 2.2), 'key3', 'value3')",
+ write={
+ "snowflake": "SELECT OBJECT_INSERT(OBJECT_INSERT(OBJECT_INSERT(OBJECT_CONSTRUCT('key5', 'value5'), 'key1', 5), 'key2', 2.2), 'key3', 'value3')",
+ "duckdb": "SELECT STRUCT_INSERT(STRUCT_INSERT(STRUCT_INSERT({'key5': 'value5'}, key1 := 5), key2 := 2.2), key3 := 'value3')",
+ },
+ )
+
+ self.validate_all(
+ "SELECT OBJECT_INSERT(OBJECT_INSERT(OBJECT_INSERT(OBJECT_CONSTRUCT(), 'key1', 5), 'key2', 2.2), 'key3', 'value3')",
+ write={
+ "snowflake": "SELECT OBJECT_INSERT(OBJECT_INSERT(OBJECT_INSERT(OBJECT_CONSTRUCT(), 'key1', 5), 'key2', 2.2), 'key3', 'value3')",
+ "duckdb": "SELECT STRUCT_INSERT(STRUCT_INSERT(STRUCT_PACK(key1 := 5), key2 := 2.2), key3 := 'value3')",
+ },
+ )
+
+ self.validate_identity(
+ """SELECT ARRAY_CONSTRUCT('foo')::VARIANT[0]""",
+ """SELECT CAST(['foo'] AS VARIANT)[0]""",
+ )
+
+ self.validate_all(
+ "SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')",
+ write={
+ "snowflake": "SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')",
+ "spark": "SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')",
+ "databricks": "SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')",
+ "redshift": "SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')",
+ },
+ )
+
+ self.validate_all(
+ "SELECT CONVERT_TIMEZONE('America/Los_Angeles', 'America/New_York', '2024-08-06 09:10:00.000')",
+ write={
+ "snowflake": "SELECT CONVERT_TIMEZONE('America/Los_Angeles', 'America/New_York', '2024-08-06 09:10:00.000')",
+ "spark": "SELECT CONVERT_TIMEZONE('America/Los_Angeles', 'America/New_York', '2024-08-06 09:10:00.000')",
+ "databricks": "SELECT CONVERT_TIMEZONE('America/Los_Angeles', 'America/New_York', '2024-08-06 09:10:00.000')",
+ "redshift": "SELECT CONVERT_TIMEZONE('America/Los_Angeles', 'America/New_York', '2024-08-06 09:10:00.000')",
+ "mysql": "SELECT CONVERT_TZ('2024-08-06 09:10:00.000', 'America/Los_Angeles', 'America/New_York')",
+ "duckdb": "SELECT CAST('2024-08-06 09:10:00.000' AS TIMESTAMP) AT TIME ZONE 'America/Los_Angeles' AT TIME ZONE 'America/New_York'",
+ },
+ )
+
+ self.validate_identity(
+ "SELECT UUID_STRING(), UUID_STRING('fe971b24-9572-4005-b22f-351e9c09274d', 'foo')"
+ )
+
+ self.validate_all(
+ "UUID_STRING('fe971b24-9572-4005-b22f-351e9c09274d', 'foo')",
+ read={
+ "snowflake": "UUID_STRING('fe971b24-9572-4005-b22f-351e9c09274d', 'foo')",
+ },
+ write={
+ "hive": "UUID()",
+ "spark2": "UUID()",
+ "spark": "UUID()",
+ "databricks": "UUID()",
+ "duckdb": "UUID()",
+ "presto": "UUID()",
+ "trino": "UUID()",
+ "postgres": "GEN_RANDOM_UUID()",
+ "bigquery": "GENERATE_UUID()",
+ },
+ )
+
def test_null_treatment(self):
self.validate_all(
r"SELECT FIRST_VALUE(TABLE1.COLUMN1) OVER (PARTITION BY RANDOM_COLUMN1, RANDOM_COLUMN2 ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS MY_ALIAS FROM TABLE1",
@@ -903,6 +1000,11 @@ WHERE
"SELECT * FROM @foo/bar (FILE_FORMAT => ds_sandbox.test.my_csv_format, PATTERN => 'test') AS bla",
)
+ self.validate_identity(
+ "SELECT * FROM @test.public.thing/location/somefile.csv( FILE_FORMAT => 'fmt' )",
+ "SELECT * FROM @test.public.thing/location/somefile.csv (FILE_FORMAT => 'fmt')",
+ )
+
def test_sample(self):
self.validate_identity("SELECT * FROM testtable TABLESAMPLE BERNOULLI (20.3)")
self.validate_identity("SELECT * FROM testtable TABLESAMPLE SYSTEM (3) SEED (82)")
@@ -1196,6 +1298,17 @@ WHERE
"SELECT oldt.*, newt.* FROM my_table BEFORE (STATEMENT => '8e5d0ca9-005e-44e6-b858-a8f5b37c5726') AS oldt FULL OUTER JOIN my_table AT (STATEMENT => '8e5d0ca9-005e-44e6-b858-a8f5b37c5726') AS newt ON oldt.id = newt.id WHERE oldt.id IS NULL OR newt.id IS NULL",
)
+ # Make sure that the historical data keywords can still be used as aliases
+ for historical_data_prefix in ("AT", "BEFORE", "END", "CHANGES"):
+ for schema_suffix in ("", "(col)"):
+ with self.subTest(
+ f"Testing historical data prefix alias: {historical_data_prefix}{schema_suffix}"
+ ):
+ self.validate_identity(
+ f"SELECT * FROM foo {historical_data_prefix}{schema_suffix}",
+ f"SELECT * FROM foo AS {historical_data_prefix}{schema_suffix}",
+ )
+
def test_ddl(self):
for constraint_prefix in ("WITH ", ""):
with self.subTest(f"Constraint prefix: {constraint_prefix}"):
@@ -1216,6 +1329,7 @@ WHERE
"CREATE TABLE t (id INT TAG (key1='value_1', key2='value_2'))",
)
+ self.validate_identity("CREATE SECURE VIEW table1 AS (SELECT a FROM table2)")
self.validate_identity(
"""create external table et2(
col1 date as (parse_json(metadata$external_table_partition):COL1::date),
@@ -1241,6 +1355,9 @@ WHERE
"CREATE OR REPLACE TAG IF NOT EXISTS cost_center COMMENT='cost_center tag'"
).this.assert_is(exp.Identifier)
self.validate_identity(
+ "CREATE DYNAMIC TABLE product (pre_tax_profit, taxes, after_tax_profit) TARGET_LAG='20 minutes' WAREHOUSE=mywh AS SELECT revenue - cost, (revenue - cost) * tax_rate, (revenue - cost) * (1.0 - tax_rate) FROM staging_table"
+ )
+ self.validate_identity(
"ALTER TABLE db_name.schmaName.tblName ADD COLUMN COLUMN_1 VARCHAR NOT NULL TAG (key1='value_1')"
)
self.validate_identity(
@@ -1330,6 +1447,12 @@ WHERE
},
)
+ self.assertIsNotNone(
+ self.validate_identity("CREATE TABLE foo (bar INT AS (foo))").find(
+ exp.TransformColumnConstraint
+ )
+ )
+
def test_user_defined_functions(self):
self.validate_all(
"CREATE FUNCTION a(x DATE, y BIGINT) RETURNS ARRAY LANGUAGE JAVASCRIPT AS $$ SELECT 1 $$",
@@ -1608,16 +1731,27 @@ FROM persons AS p, LATERAL FLATTEN(input => p.c, path => 'contact') AS _flattene
"REGEXP_SUBSTR(subject, pattern)",
read={
"bigquery": "REGEXP_EXTRACT(subject, pattern)",
+ "snowflake": "REGEXP_EXTRACT(subject, pattern)",
+ },
+ write={
+ "bigquery": "REGEXP_EXTRACT(subject, pattern)",
+ "snowflake": "REGEXP_SUBSTR(subject, pattern)",
+ },
+ )
+ self.validate_all(
+ "REGEXP_SUBSTR(subject, pattern, 1, 1, 'c', 1)",
+ read={
"hive": "REGEXP_EXTRACT(subject, pattern)",
- "presto": "REGEXP_EXTRACT(subject, pattern)",
+ "spark2": "REGEXP_EXTRACT(subject, pattern)",
"spark": "REGEXP_EXTRACT(subject, pattern)",
+ "databricks": "REGEXP_EXTRACT(subject, pattern)",
},
write={
- "bigquery": "REGEXP_EXTRACT(subject, pattern)",
"hive": "REGEXP_EXTRACT(subject, pattern)",
- "presto": "REGEXP_EXTRACT(subject, pattern)",
- "snowflake": "REGEXP_SUBSTR(subject, pattern)",
+ "spark2": "REGEXP_EXTRACT(subject, pattern)",
"spark": "REGEXP_EXTRACT(subject, pattern)",
+ "databricks": "REGEXP_EXTRACT(subject, pattern)",
+ "snowflake": "REGEXP_SUBSTR(subject, pattern, 1, 1, 'c', 1)",
},
)
self.validate_all(
@@ -1885,7 +2019,7 @@ STORAGE_ALLOWED_LOCATIONS=('s3://mybucket1/path1/', 's3://mybucket2/path2/')""",
def test_swap(self):
ast = parse_one("ALTER TABLE a SWAP WITH b", read="snowflake")
- assert isinstance(ast, exp.AlterTable)
+ assert isinstance(ast, exp.Alter)
assert isinstance(ast.args["actions"][0], exp.SwapTable)
def test_try_cast(self):
@@ -2005,6 +2139,16 @@ SINGLE = TRUE""",
self.validate_identity("SELECT t.$23:a.b", "SELECT GET_PATH(t.$23, 'a.b')")
self.validate_identity("SELECT t.$17:a[0].b[0].c", "SELECT GET_PATH(t.$17, 'a[0].b[0].c')")
+ self.validate_all(
+ """
+ SELECT col:"customer's department"
+ """,
+ write={
+ "snowflake": """SELECT GET_PATH(col, '["customer\\'s department"]')""",
+ "postgres": "SELECT JSON_EXTRACT_PATH(col, 'customer''s department')",
+ },
+ )
+
def test_alter_set_unset(self):
self.validate_identity("ALTER TABLE tbl SET DATA_RETENTION_TIME_IN_DAYS=1")
self.validate_identity("ALTER TABLE tbl SET DEFAULT_DDL_COLLATION='test'")
@@ -2021,3 +2165,32 @@ SINGLE = TRUE""",
self.validate_identity("ALTER TABLE foo UNSET TAG a, b, c")
self.validate_identity("ALTER TABLE foo UNSET DATA_RETENTION_TIME_IN_DAYS, CHANGE_TRACKING")
+
+ def test_from_changes(self):
+ self.validate_identity(
+ """SELECT C1 FROM t1 CHANGES (INFORMATION => APPEND_ONLY) AT (STREAM => 's1') END (TIMESTAMP => $ts2)"""
+ )
+ self.validate_identity(
+ """SELECT C1 FROM t1 CHANGES (INFORMATION => APPEND_ONLY) BEFORE (STATEMENT => 'STMT_ID') END (TIMESTAMP => $ts2)"""
+ )
+ self.validate_identity(
+ """SELECT 1 FROM some_table CHANGES (INFORMATION => APPEND_ONLY) AT (TIMESTAMP => TO_TIMESTAMP_TZ('2024-07-01 00:00:00+00:00')) END (TIMESTAMP => TO_TIMESTAMP_TZ('2024-07-01 14:28:59.999999+00:00'))""",
+ """SELECT 1 FROM some_table CHANGES (INFORMATION => APPEND_ONLY) AT (TIMESTAMP => CAST('2024-07-01 00:00:00+00:00' AS TIMESTAMPTZ)) END (TIMESTAMP => CAST('2024-07-01 14:28:59.999999+00:00' AS TIMESTAMPTZ))""",
+ )
+
+ def test_grant(self):
+ grant_cmds = [
+ "GRANT SELECT ON FUTURE TABLES IN DATABASE d1 TO ROLE r1",
+ "GRANT INSERT, DELETE ON FUTURE TABLES IN SCHEMA d1.s1 TO ROLE r2",
+ "GRANT SELECT ON ALL TABLES IN SCHEMA mydb.myschema to ROLE analyst",
+ "GRANT SELECT, INSERT ON FUTURE TABLES IN SCHEMA mydb.myschema TO ROLE role1",
+ "GRANT CREATE MATERIALIZED VIEW ON SCHEMA mydb.myschema TO DATABASE ROLE mydb.dr1",
+ ]
+
+ for sql in grant_cmds:
+ with self.subTest(f"Testing Snowflake's GRANT command statement: {sql}"):
+ self.validate_identity(sql, check_command_warning=True)
+
+ self.validate_identity(
+ "GRANT ALL PRIVILEGES ON FUNCTION mydb.myschema.ADD5(number) TO ROLE analyst"
+ )
diff --git a/tests/dialects/test_spark.py b/tests/dialects/test_spark.py
index bff91bf..4fed68c 100644
--- a/tests/dialects/test_spark.py
+++ b/tests/dialects/test_spark.py
@@ -10,6 +10,7 @@ class TestSpark(Validator):
dialect = "spark"
def test_ddl(self):
+ self.validate_identity("INSERT OVERWRITE TABLE db1.tb1 TABLE db2.tb2")
self.validate_identity("CREATE TABLE foo AS WITH t AS (SELECT 1 AS col) SELECT col FROM t")
self.validate_identity("CREATE TEMPORARY VIEW test AS SELECT 1")
self.validate_identity("CREATE TABLE foo (col VARCHAR(50))")
@@ -129,6 +130,16 @@ TBLPROPERTIES (
"spark": "ALTER TABLE StudentInfo DROP COLUMNS (LastName, DOB)",
},
)
+ self.validate_identity("ALTER VIEW StudentInfoView AS SELECT * FROM StudentInfo")
+ self.validate_identity("ALTER VIEW StudentInfoView AS SELECT LastName FROM StudentInfo")
+ self.validate_identity("ALTER VIEW StudentInfoView RENAME TO StudentInfoViewRenamed")
+ self.validate_identity(
+ "ALTER VIEW StudentInfoView SET TBLPROPERTIES ('key1'='val1', 'key2'='val2')"
+ )
+ self.validate_identity(
+ "ALTER VIEW StudentInfoView UNSET TBLPROPERTIES ('key1', 'key2')",
+ check_command_warning=True,
+ )
def test_to_date(self):
self.validate_all(
@@ -245,7 +256,7 @@ TBLPROPERTIES (
self.validate_identity("SELECT TRANSFORM(ARRAY(1, 2, 3), x -> x + 1)")
self.validate_identity("SELECT TRANSFORM(ARRAY(1, 2, 3), (x, i) -> x + i)")
self.validate_identity("REFRESH TABLE a.b.c")
- self.validate_identity("INTERVAL -86 DAYS")
+ self.validate_identity("INTERVAL '-86' DAYS")
self.validate_identity("TRIM(' SparkSQL ')")
self.validate_identity("TRIM(BOTH 'SL' FROM 'SSparkSQLS')")
self.validate_identity("TRIM(LEADING 'SL' FROM 'SSparkSQLS')")
@@ -298,6 +309,20 @@ TBLPROPERTIES (
)
self.validate_all(
+ "SELECT ARRAY_AGG(x) FILTER (WHERE x = 5) FROM (SELECT 1 UNION ALL SELECT NULL) AS t(x)",
+ write={
+ "duckdb": "SELECT ARRAY_AGG(x) FILTER(WHERE x = 5 AND NOT x IS NULL) FROM (SELECT 1 UNION ALL SELECT NULL) AS t(x)",
+ "spark": "SELECT COLLECT_LIST(x) FILTER(WHERE x = 5) FROM (SELECT 1 UNION ALL SELECT NULL) AS t(x)",
+ },
+ )
+ self.validate_all(
+ "SELECT DATE_FORMAT(DATE '2020-01-01', 'EEEE') AS weekday",
+ write={
+ "presto": "SELECT DATE_FORMAT(CAST(CAST('2020-01-01' AS DATE) AS TIMESTAMP), '%W') AS weekday",
+ "spark": "SELECT DATE_FORMAT(CAST(CAST('2020-01-01' AS DATE) AS TIMESTAMP), 'EEEE') AS weekday",
+ },
+ )
+ self.validate_all(
"SELECT TRY_ELEMENT_AT(MAP(1, 'a', 2, 'b'), 2)",
read={
"databricks": "SELECT TRY_ELEMENT_AT(MAP(1, 'a', 2, 'b'), 2)",
@@ -467,7 +492,7 @@ TBLPROPERTIES (
)
self.validate_all(
"SELECT CAST(STRUCT('fooo') AS STRUCT<a: VARCHAR(2)>)",
- write={"spark": "SELECT CAST(STRUCT('fooo') AS STRUCT<a: STRING>)"},
+ write={"spark": "SELECT CAST(STRUCT('fooo' AS col1) AS STRUCT<a: STRING>)"},
)
self.validate_all(
"SELECT CAST(123456 AS VARCHAR(3))",
@@ -557,7 +582,10 @@ TBLPROPERTIES (
)
self.validate_all(
- "CAST(x AS TIMESTAMP)", read={"trino": "CAST(x AS TIMESTAMP(6) WITH TIME ZONE)"}
+ "CAST(x AS TIMESTAMP)",
+ read={
+ "trino": "CAST(x AS TIMESTAMP(6) WITH TIME ZONE)",
+ },
)
self.validate_all(
"SELECT DATE_ADD(my_date_column, 1)",
@@ -684,6 +712,32 @@ TBLPROPERTIES (
write={
"spark": "SELECT DATE_ADD(MONTH, 20, col)",
"databricks": "SELECT DATE_ADD(MONTH, 20, col)",
+ "presto": "SELECT DATE_ADD('MONTH', 20, col)",
+ "trino": "SELECT DATE_ADD('MONTH', 20, col)",
+ },
+ )
+ self.validate_identity("DESCRIBE schema.test PARTITION(ds = '2024-01-01')")
+
+ self.validate_all(
+ "SELECT ANY_VALUE(col, true), FIRST(col, true), FIRST_VALUE(col, true) OVER ()",
+ write={
+ "duckdb": "SELECT ANY_VALUE(col), FIRST(col), FIRST_VALUE(col IGNORE NULLS) OVER ()"
+ },
+ )
+
+ self.validate_all(
+ "SELECT STRUCT(1, 2)",
+ write={
+ "spark": "SELECT STRUCT(1 AS col1, 2 AS col2)",
+ "presto": "SELECT CAST(ROW(1, 2) AS ROW(col1 INTEGER, col2 INTEGER))",
+ "duckdb": "SELECT {'col1': 1, 'col2': 2}",
+ },
+ )
+ self.validate_all(
+ "SELECT STRUCT(x, 1, y AS col3, STRUCT(5)) FROM t",
+ write={
+ "spark": "SELECT STRUCT(x AS x, 1 AS col2, y AS col3, STRUCT(5 AS col1) AS col4) FROM t",
+ "duckdb": "SELECT {'x': x, 'col2': 1, 'col3': y, 'col4': {'col1': 5}} FROM t",
},
)
@@ -801,3 +855,24 @@ TBLPROPERTIES (
self.assertEqual(query.sql(name), with_modifiers)
else:
self.assertEqual(query.sql(name), without_modifiers)
+
+ def test_schema_binding_options(self):
+ for schema_binding in (
+ "BINDING",
+ "COMPENSATION",
+ "TYPE EVOLUTION",
+ "EVOLUTION",
+ ):
+ with self.subTest(f"Test roundtrip of VIEW schema binding {schema_binding}"):
+ self.validate_identity(
+ f"CREATE VIEW emp_v WITH SCHEMA {schema_binding} AS SELECT * FROM emp"
+ )
+
+ def test_minus(self):
+ self.validate_all(
+ "SELECT * FROM db.table1 MINUS SELECT * FROM db.table2",
+ write={
+ "spark": "SELECT * FROM db.table1 EXCEPT SELECT * FROM db.table2",
+ "databricks": "SELECT * FROM db.table1 EXCEPT SELECT * FROM db.table2",
+ },
+ )
diff --git a/tests/dialects/test_sqlite.py b/tests/dialects/test_sqlite.py
index 46bbadc..f2c9802 100644
--- a/tests/dialects/test_sqlite.py
+++ b/tests/dialects/test_sqlite.py
@@ -91,6 +91,10 @@ class TestSQLite(Validator):
read={"snowflake": "LEAST(x, y, z)"},
write={"snowflake": "LEAST(x, y, z)"},
)
+ self.validate_identity(
+ "SELECT * FROM station WHERE city IS NOT ''",
+ "SELECT * FROM station WHERE NOT city IS ''",
+ )
def test_strftime(self):
self.validate_identity("SELECT STRFTIME('%Y/%m/%d', 'now')")
diff --git a/tests/dialects/test_starrocks.py b/tests/dialects/test_starrocks.py
index 67aabb9..44c54a6 100644
--- a/tests/dialects/test_starrocks.py
+++ b/tests/dialects/test_starrocks.py
@@ -1,12 +1,41 @@
+from sqlglot.errors import UnsupportedError
from tests.dialects.test_dialect import Validator
class TestStarrocks(Validator):
dialect = "starrocks"
+ def test_ddl(self):
+ ddl_sqls = [
+ "DISTRIBUTED BY HASH (col1) BUCKETS 1",
+ "DISTRIBUTED BY HASH (col1)",
+ "DISTRIBUTED BY RANDOM BUCKETS 1",
+ "DISTRIBUTED BY RANDOM",
+ "DISTRIBUTED BY HASH (col1) ORDER BY (col1)",
+ "DISTRIBUTED BY HASH (col1) PROPERTIES ('replication_num'='1')",
+ "PRIMARY KEY (col1) DISTRIBUTED BY HASH (col1)",
+ "DUPLICATE KEY (col1, col2) DISTRIBUTED BY HASH (col1)",
+ ]
+
+ for properties in ddl_sqls:
+ with self.subTest(f"Testing create scheme: {properties}"):
+ self.validate_identity(f"CREATE TABLE foo (col1 BIGINT, col2 BIGINT) {properties}")
+ self.validate_identity(
+ f"CREATE TABLE foo (col1 BIGINT, col2 BIGINT) ENGINE=OLAP {properties}"
+ )
+
+ # Test the different wider DECIMAL types
+ self.validate_identity(
+ "CREATE TABLE foo (col0 DECIMAL(9, 1), col1 DECIMAL32(9, 1), col2 DECIMAL64(18, 10), col3 DECIMAL128(38, 10)) DISTRIBUTED BY HASH (col1) BUCKETS 1"
+ )
+
def test_identity(self):
self.validate_identity("SELECT CAST(`a`.`b` AS INT) FROM foo")
self.validate_identity("SELECT APPROX_COUNT_DISTINCT(a) FROM x")
+ self.validate_identity("SELECT [1, 2, 3]")
+ self.validate_identity(
+ """SELECT CAST(PARSE_JSON(fieldvalue) -> '00000000-0000-0000-0000-00000000' AS VARCHAR) AS `code` FROM (SELECT '{"00000000-0000-0000-0000-00000000":"code01"}') AS t(fieldvalue)"""
+ )
def test_time(self):
self.validate_identity("TIMESTAMP('2022-01-01')")
@@ -28,3 +57,61 @@ class TestStarrocks(Validator):
"mysql": "SELECT REGEXP_LIKE(abc, '%foo%')",
},
)
+
+ def test_unnest(self):
+ self.validate_identity(
+ "SELECT student, score, t.unnest FROM tests CROSS JOIN LATERAL UNNEST(scores) AS t",
+ "SELECT student, score, t.unnest FROM tests CROSS JOIN LATERAL UNNEST(scores) AS t(unnest)",
+ )
+ self.validate_all(
+ "SELECT student, score, unnest FROM tests CROSS JOIN LATERAL UNNEST(scores)",
+ write={
+ "spark": "SELECT student, score, unnest FROM tests LATERAL VIEW EXPLODE(scores) unnest AS unnest",
+ "starrocks": "SELECT student, score, unnest FROM tests CROSS JOIN LATERAL UNNEST(scores) AS unnest(unnest)",
+ },
+ )
+ self.validate_all(
+ r"""SELECT * FROM UNNEST(array['John','Jane','Jim','Jamie'], array[24,25,26,27]) AS t(name, age)""",
+ write={
+ "postgres": "SELECT * FROM UNNEST(ARRAY['John', 'Jane', 'Jim', 'Jamie'], ARRAY[24, 25, 26, 27]) AS t(name, age)",
+ "spark": "SELECT * FROM INLINE(ARRAYS_ZIP(ARRAY('John', 'Jane', 'Jim', 'Jamie'), ARRAY(24, 25, 26, 27))) AS t(name, age)",
+ "starrocks": "SELECT * FROM UNNEST(['John', 'Jane', 'Jim', 'Jamie'], [24, 25, 26, 27]) AS t(name, age)",
+ },
+ )
+
+ # Use UNNEST to convert into multiple columns
+ # see: https://docs.starrocks.io/docs/sql-reference/sql-functions/array-functions/unnest/
+ self.validate_all(
+ r"""SELECT id, t.type, t.scores FROM example_table, unnest(split(type, ";"), scores) AS t(type,scores)""",
+ write={
+ "postgres": "SELECT id, t.type, t.scores FROM example_table, UNNEST(SPLIT(type, ';'), scores) AS t(type, scores)",
+ "spark": r"""SELECT id, t.type, t.scores FROM example_table LATERAL VIEW INLINE(ARRAYS_ZIP(SPLIT(type, CONCAT('\\Q', ';', '\\E')), scores)) t AS type, scores""",
+ "databricks": r"""SELECT id, t.type, t.scores FROM example_table LATERAL VIEW INLINE(ARRAYS_ZIP(SPLIT(type, CONCAT('\\Q', ';', '\\E')), scores)) t AS type, scores""",
+ "starrocks": r"""SELECT id, t.type, t.scores FROM example_table, UNNEST(SPLIT(type, ';'), scores) AS t(type, scores)""",
+ "hive": UnsupportedError,
+ },
+ )
+
+ self.validate_all(
+ r"""SELECT id, t.type, t.scores FROM example_table_2 CROSS JOIN LATERAL unnest(split(type, ";"), scores) AS t(type,scores)""",
+ write={
+ "spark": r"""SELECT id, t.type, t.scores FROM example_table_2 LATERAL VIEW INLINE(ARRAYS_ZIP(SPLIT(type, CONCAT('\\Q', ';', '\\E')), scores)) t AS type, scores""",
+ "starrocks": r"""SELECT id, t.type, t.scores FROM example_table_2 CROSS JOIN LATERAL UNNEST(SPLIT(type, ';'), scores) AS t(type, scores)""",
+ "hive": UnsupportedError,
+ },
+ )
+
+ lateral_explode_sqls = [
+ "SELECT id, t.col FROM tbl, UNNEST(scores) AS t(col)",
+ "SELECT id, t.col FROM tbl CROSS JOIN LATERAL UNNEST(scores) AS t(col)",
+ ]
+
+ for sql in lateral_explode_sqls:
+ with self.subTest(f"Testing Starrocks roundtrip & transpilation of: {sql}"):
+ self.validate_all(
+ sql,
+ write={
+ "starrocks": sql,
+ "spark": "SELECT id, t.col FROM tbl LATERAL VIEW EXPLODE(scores) t AS col",
+ },
+ )
diff --git a/tests/dialects/test_teradata.py b/tests/dialects/test_teradata.py
index 74d5f88..466f5d5 100644
--- a/tests/dialects/test_teradata.py
+++ b/tests/dialects/test_teradata.py
@@ -1,3 +1,4 @@
+from sqlglot import exp
from tests.dialects.test_dialect import Validator
@@ -5,6 +6,13 @@ class TestTeradata(Validator):
dialect = "teradata"
def test_teradata(self):
+ self.validate_all(
+ "RANDOM(l, u)",
+ write={
+ "": "(u - l) * RAND() + l",
+ "teradata": "RANDOM(l, u)",
+ },
+ )
self.validate_identity("TO_NUMBER(expr, fmt, nlsparam)")
self.validate_identity("SELECT TOP 10 * FROM tbl")
self.validate_identity("SELECT * FROM tbl SAMPLE 5")
@@ -24,6 +32,10 @@ class TestTeradata(Validator):
},
)
+ self.validate_identity(
+ "RENAME TABLE emp TO employee", check_command_warning=True
+ ).assert_is(exp.Command)
+
def test_translate(self):
self.validate_all(
"TRANSLATE(x USING LATIN_TO_UNICODE)",
@@ -143,6 +155,15 @@ class TestTeradata(Validator):
"tsql": "CREATE TABLE a",
},
)
+ self.validate_identity(
+ "CREATE TABLE db.foo (id INT NOT NULL, valid_date DATE FORMAT 'YYYY-MM-DD', measurement INT COMPRESS)"
+ )
+ self.validate_identity(
+ "CREATE TABLE db.foo (id INT NOT NULL, valid_date DATE FORMAT 'YYYY-MM-DD', measurement INT COMPRESS (1, 2, 3))"
+ )
+ self.validate_identity(
+ "CREATE TABLE db.foo (id INT NOT NULL, valid_date DATE FORMAT 'YYYY-MM-DD' COMPRESS (CAST('9999-09-09' AS DATE)), measurement INT)"
+ )
def test_insert(self):
self.validate_all(
@@ -212,6 +233,8 @@ class TestTeradata(Validator):
)
def test_time(self):
+ self.validate_identity("CAST(CURRENT_TIMESTAMP(6) AS TIMESTAMP WITH TIME ZONE)")
+
self.validate_all(
"CURRENT_TIMESTAMP",
read={
diff --git a/tests/dialects/test_trino.py b/tests/dialects/test_trino.py
index ccc1407..0ebe749 100644
--- a/tests/dialects/test_trino.py
+++ b/tests/dialects/test_trino.py
@@ -16,3 +16,35 @@ class TestTrino(Validator):
"SELECT TRIM('!foo!', '!')",
"SELECT TRIM('!' FROM '!foo!')",
)
+
+ def test_ddl(self):
+ self.validate_identity("ALTER TABLE users RENAME TO people")
+ self.validate_identity("ALTER TABLE IF EXISTS users RENAME TO people")
+ self.validate_identity("ALTER TABLE users ADD COLUMN zip VARCHAR")
+ self.validate_identity("ALTER TABLE IF EXISTS users ADD COLUMN IF NOT EXISTS zip VARCHAR")
+ self.validate_identity("ALTER TABLE users DROP COLUMN zip")
+ self.validate_identity("ALTER TABLE IF EXISTS users DROP COLUMN IF EXISTS zip")
+ self.validate_identity("ALTER TABLE users RENAME COLUMN id TO user_id")
+ self.validate_identity("ALTER TABLE IF EXISTS users RENAME COLUMN IF EXISTS id TO user_id")
+ self.validate_identity("ALTER TABLE users ALTER COLUMN id SET DATA TYPE BIGINT")
+ self.validate_identity("ALTER TABLE users ALTER COLUMN id DROP NOT NULL")
+ self.validate_identity(
+ "ALTER TABLE people SET AUTHORIZATION alice", check_command_warning=True
+ )
+ self.validate_identity(
+ "ALTER TABLE people SET AUTHORIZATION ROLE PUBLIC", check_command_warning=True
+ )
+ self.validate_identity(
+ "ALTER TABLE people SET PROPERTIES x = 'y'", check_command_warning=True
+ )
+ self.validate_identity(
+ "ALTER TABLE people SET PROPERTIES foo = 123, 'foo bar' = 456",
+ check_command_warning=True,
+ )
+ self.validate_identity(
+ "ALTER TABLE people SET PROPERTIES x = DEFAULT", check_command_warning=True
+ )
+ self.validate_identity("ALTER VIEW people RENAME TO users")
+ self.validate_identity(
+ "ALTER VIEW people SET AUTHORIZATION alice", check_command_warning=True
+ )
diff --git a/tests/dialects/test_tsql.py b/tests/dialects/test_tsql.py
index 7455650..453cd5a 100644
--- a/tests/dialects/test_tsql.py
+++ b/tests/dialects/test_tsql.py
@@ -1,4 +1,4 @@
-from sqlglot import exp, parse
+from sqlglot import exp, parse, parse_one
from tests.dialects.test_dialect import Validator
from sqlglot.errors import ParseError
from sqlglot.optimizer.annotate_types import annotate_types
@@ -8,19 +8,14 @@ class TestTSQL(Validator):
dialect = "tsql"
def test_tsql(self):
- self.assertEqual(
- annotate_types(self.validate_identity("SELECT 1 WHERE EXISTS(SELECT 1)")).sql("tsql"),
- "SELECT 1 WHERE EXISTS(SELECT 1)",
- )
+ # https://learn.microsoft.com/en-us/previous-versions/sql/sql-server-2008-r2/ms187879(v=sql.105)?redirectedfrom=MSDN
+ # tsql allows .. which means use the default schema
+ self.validate_identity("SELECT * FROM a..b")
self.validate_identity("CREATE view a.b.c", "CREATE VIEW b.c")
self.validate_identity("DROP view a.b.c", "DROP VIEW b.c")
self.validate_identity("ROUND(x, 1, 0)")
self.validate_identity("EXEC MyProc @id=7, @name='Lochristi'", check_command_warning=True)
- # https://learn.microsoft.com/en-us/previous-versions/sql/sql-server-2008-r2/ms187879(v=sql.105)?redirectedfrom=MSDN
- # tsql allows .. which means use the default schema
- self.validate_identity("SELECT * FROM a..b")
-
self.validate_identity("SELECT TRIM(' test ') AS Result")
self.validate_identity("SELECT TRIM('.,! ' FROM ' # test .') AS Result")
self.validate_identity("SELECT * FROM t TABLESAMPLE (10 PERCENT)")
@@ -37,10 +32,36 @@ class TestTSQL(Validator):
self.validate_identity("CAST(x AS int) OR y", "CAST(x AS INTEGER) <> 0 OR y <> 0")
self.validate_identity("TRUNCATE TABLE t1 WITH (PARTITIONS(1, 2 TO 5, 10 TO 20, 84))")
self.validate_identity(
+ "SELECT TOP 10 s.RECORDID, n.c.value('(/*:FORM_ROOT/*:SOME_TAG)[1]', 'float') AS SOME_TAG_VALUE FROM source_table.dbo.source_data AS s(nolock) CROSS APPLY FormContent.nodes('/*:FORM_ROOT') AS N(C)"
+ )
+ self.validate_identity(
+ "CREATE CLUSTERED INDEX [IX_OfficeTagDetail_TagDetailID] ON [dbo].[OfficeTagDetail]([TagDetailID] ASC)"
+ )
+ self.validate_identity(
+ "CREATE INDEX [x] ON [y]([z] ASC) WITH (allow_page_locks=on) ON X([y])"
+ )
+ self.validate_identity(
+ "CREATE INDEX [x] ON [y]([z] ASC) WITH (allow_page_locks=on) ON PRIMARY"
+ )
+ self.validate_identity(
"COPY INTO test_1 FROM 'path' WITH (FORMAT_NAME = test, FILE_TYPE = 'CSV', CREDENTIAL = (IDENTITY='Shared Access Signature', SECRET='token'), FIELDTERMINATOR = ';', ROWTERMINATOR = '0X0A', ENCODING = 'UTF8', DATEFORMAT = 'ymd', MAXERRORS = 10, ERRORFILE = 'errorsfolder', IDENTITY_INSERT = 'ON')"
)
+ self.assertEqual(
+ annotate_types(self.validate_identity("SELECT 1 WHERE EXISTS(SELECT 1)")).sql("tsql"),
+ "SELECT 1 WHERE EXISTS(SELECT 1)",
+ )
self.validate_all(
+ "WITH A AS (SELECT 2 AS value), C AS (SELECT * FROM A) SELECT * INTO TEMP_NESTED_WITH FROM (SELECT * FROM C) AS temp",
+ read={
+ "snowflake": "CREATE TABLE TEMP_NESTED_WITH AS WITH C AS (WITH A AS (SELECT 2 AS value) SELECT * FROM A) SELECT * FROM C",
+ "tsql": "WITH A AS (SELECT 2 AS value), C AS (SELECT * FROM A) SELECT * INTO TEMP_NESTED_WITH FROM (SELECT * FROM C) AS temp",
+ },
+ write={
+ "snowflake": "CREATE TABLE TEMP_NESTED_WITH AS WITH A AS (SELECT 2 AS value), C AS (SELECT * FROM A) SELECT * FROM (SELECT * FROM C) AS temp",
+ },
+ )
+ self.validate_all(
"SELECT IIF(cond <> 0, 'True', 'False')",
read={
"spark": "SELECT IF(cond, 'True', 'False')",
@@ -383,6 +404,28 @@ class TestTSQL(Validator):
self.validate_identity("HASHBYTES('MD2', 'x')")
self.validate_identity("LOG(n, b)")
+ self.validate_all(
+ "STDEV(x)",
+ read={
+ "": "STDDEV(x)",
+ },
+ write={
+ "": "STDDEV(x)",
+ "tsql": "STDEV(x)",
+ },
+ )
+
+ # Check that TRUE and FALSE dont get expanded to (1=1) or (1=0) when used in a VALUES expression
+ self.validate_identity(
+ "SELECT val FROM (VALUES ((TRUE), (FALSE), (NULL))) AS t(val)",
+ write_sql="SELECT val FROM (VALUES ((1), (0), (NULL))) AS t(val)",
+ )
+ self.validate_identity("'a' + 'b'")
+ self.validate_identity(
+ "'a' || 'b'",
+ "'a' + 'b'",
+ )
+
def test_option(self):
possible_options = [
"HASH GROUP",
@@ -763,7 +806,7 @@ class TestTSQL(Validator):
self.validate_identity(f"CREATE VIEW a.b WITH {view_attr} AS SELECT * FROM x")
self.validate_identity("ALTER TABLE dbo.DocExe DROP CONSTRAINT FK_Column_B").assert_is(
- exp.AlterTable
+ exp.Alter
).args["actions"][0].assert_is(exp.Drop)
for clustered_keyword in ("CLUSTERED", "NONCLUSTERED"):
@@ -778,6 +821,7 @@ class TestTSQL(Validator):
f"UNIQUE {clustered_keyword} ([internal_id] ASC))",
)
+ self.validate_identity("CREATE VIEW t AS WITH cte AS (SELECT 1 AS c) SELECT c FROM cte")
self.validate_identity(
"ALTER TABLE tbl SET SYSTEM_VERSIONING=ON(HISTORY_TABLE=db.tbl, DATA_CONSISTENCY_CHECK=OFF, HISTORY_RETENTION_PERIOD=5 DAYS)"
)
@@ -792,6 +836,20 @@ class TestTSQL(Validator):
self.validate_identity("ALTER TABLE tbl SET DATA_DELETION=ON")
self.validate_identity("ALTER TABLE tbl SET DATA_DELETION=OFF")
+ self.validate_identity("ALTER VIEW v AS SELECT a, b, c, d FROM foo")
+ self.validate_identity("ALTER VIEW v AS SELECT * FROM foo WHERE c > 100")
+ self.validate_identity(
+ "ALTER VIEW v WITH SCHEMABINDING AS SELECT * FROM foo WHERE c > 100",
+ check_command_warning=True,
+ )
+ self.validate_identity(
+ "ALTER VIEW v WITH ENCRYPTION AS SELECT * FROM foo WHERE c > 100",
+ check_command_warning=True,
+ )
+ self.validate_identity(
+ "ALTER VIEW v WITH VIEW_METADATA AS SELECT * FROM foo WHERE c > 100",
+ check_command_warning=True,
+ )
self.validate_identity(
"CREATE PROCEDURE foo AS BEGIN DELETE FROM bla WHERE foo < CURRENT_TIMESTAMP - 7 END",
"CREATE PROCEDURE foo AS BEGIN DELETE FROM bla WHERE foo < GETDATE() - 7 END",
@@ -880,6 +938,14 @@ class TestTSQL(Validator):
},
)
+ for colstore in ("NONCLUSTERED COLUMNSTORE", "CLUSTERED COLUMNSTORE"):
+ self.validate_identity(f"CREATE {colstore} INDEX index_name ON foo.bar")
+
+ self.validate_identity(
+ "CREATE COLUMNSTORE INDEX index_name ON foo.bar",
+ "CREATE NONCLUSTERED COLUMNSTORE INDEX index_name ON foo.bar",
+ )
+
def test_insert_cte(self):
self.validate_all(
"INSERT INTO foo.bar WITH cte AS (SELECT 1 AS one) SELECT * FROM cte",
@@ -1108,6 +1174,11 @@ WHERE
self.validate_all("ISNULL(x, y)", write={"spark": "COALESCE(x, y)"})
def test_json(self):
+ self.validate_identity(
+ """JSON_QUERY(REPLACE(REPLACE(x , '''', '"'), '""', '"'))""",
+ """ISNULL(JSON_QUERY(REPLACE(REPLACE(x, '''', '"'), '""', '"'), '$'), JSON_VALUE(REPLACE(REPLACE(x, '''', '"'), '""', '"'), '$'))""",
+ )
+
self.validate_all(
"JSON_QUERY(r.JSON, '$.Attr_INT')",
write={
@@ -1470,6 +1541,15 @@ WHERE
},
)
+ # Check superfluous casts arent added. ref: https://github.com/TobikoData/sqlmesh/issues/2672
+ self.validate_all(
+ "SELECT DATEDIFF(DAY, CAST(a AS DATETIME2), CAST(b AS DATETIME2)) AS x FROM foo",
+ write={
+ "tsql": "SELECT DATEDIFF(DAY, CAST(a AS DATETIME2), CAST(b AS DATETIME2)) AS x FROM foo",
+ "clickhouse": "SELECT DATE_DIFF(DAY, CAST(a AS Nullable(DateTime)), CAST(b AS Nullable(DateTime))) AS x FROM foo",
+ },
+ )
+
def test_lateral_subquery(self):
self.validate_all(
"SELECT x.a, x.b, t.v, t.y FROM x CROSS APPLY (SELECT v, y FROM t) t(v, y)",
@@ -1511,8 +1591,8 @@ WHERE
self.validate_all(
"SELECT t.x, y.z FROM x OUTER APPLY a.b.tvfTest(t.x)y(z)",
write={
- "spark": "SELECT t.x, y.z FROM x LEFT JOIN LATERAL a.b.TVFTEST(t.x) AS y(z)",
- "tsql": "SELECT t.x, y.z FROM x OUTER APPLY a.b.TVFTEST(t.x) AS y(z)",
+ "spark": "SELECT t.x, y.z FROM x LEFT JOIN LATERAL a.b.tvfTest(t.x) AS y(z)",
+ "tsql": "SELECT t.x, y.z FROM x OUTER APPLY a.b.tvfTest(t.x) AS y(z)",
},
)
@@ -1607,7 +1687,7 @@ WHERE
},
write={
"bigquery": "LAST_DAY(CAST(CURRENT_TIMESTAMP() AS DATE))",
- "clickhouse": "LAST_DAY(CAST(CURRENT_TIMESTAMP() AS DATE))",
+ "clickhouse": "LAST_DAY(CAST(CURRENT_TIMESTAMP() AS Nullable(DATE)))",
"duckdb": "LAST_DAY(CAST(CURRENT_TIMESTAMP AS DATE))",
"mysql": "LAST_DAY(DATE(CURRENT_TIMESTAMP()))",
"postgres": "CAST(DATE_TRUNC('MONTH', CAST(CURRENT_TIMESTAMP AS DATE)) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)",
@@ -1622,11 +1702,11 @@ WHERE
"EOMONTH(GETDATE(), -1)",
write={
"bigquery": "LAST_DAY(DATE_ADD(CAST(CURRENT_TIMESTAMP() AS DATE), INTERVAL -1 MONTH))",
- "clickhouse": "LAST_DAY(DATE_ADD(MONTH, -1, CAST(CURRENT_TIMESTAMP() AS DATE)))",
+ "clickhouse": "LAST_DAY(DATE_ADD(MONTH, -1, CAST(CURRENT_TIMESTAMP() AS Nullable(DATE))))",
"duckdb": "LAST_DAY(CAST(CURRENT_TIMESTAMP AS DATE) + INTERVAL (-1) MONTH)",
"mysql": "LAST_DAY(DATE_ADD(CURRENT_TIMESTAMP(), INTERVAL -1 MONTH))",
"postgres": "CAST(DATE_TRUNC('MONTH', CAST(CURRENT_TIMESTAMP AS DATE) + INTERVAL '-1 MONTH') + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)",
- "presto": "LAST_DAY_OF_MONTH(DATE_ADD('MONTH', CAST(-1 AS BIGINT), CAST(CAST(CURRENT_TIMESTAMP AS TIMESTAMP) AS DATE)))",
+ "presto": "LAST_DAY_OF_MONTH(DATE_ADD('MONTH', -1, CAST(CAST(CURRENT_TIMESTAMP AS TIMESTAMP) AS DATE)))",
"redshift": "LAST_DAY(DATEADD(MONTH, -1, CAST(GETDATE() AS DATE)))",
"snowflake": "LAST_DAY(DATEADD(MONTH, -1, TO_DATE(CURRENT_TIMESTAMP())))",
"spark": "LAST_DAY(ADD_MONTHS(TO_DATE(CURRENT_TIMESTAMP()), -1))",
@@ -1868,3 +1948,60 @@ FROM OPENJSON(@json) WITH (
"DECLARE vendor_cursor CURSOR FOR SELECT VendorID, Name FROM Purchasing.Vendor WHERE PreferredVendorStatus = 1 ORDER BY VendorID",
check_command_warning=True,
)
+
+ def test_scope_resolution_op(self):
+ # we still want to support :: casting shorthand for tsql
+ self.validate_identity("x::int", "CAST(x AS INTEGER)")
+ self.validate_identity("x::varchar", "CAST(x AS VARCHAR)")
+ self.validate_identity("x::varchar(MAX)", "CAST(x AS VARCHAR(MAX))")
+
+ for lhs, rhs in (
+ ("", "FOO(a, b)"),
+ ("bar", "baZ(1, 2)"),
+ ("LOGIN", "EricKurjan"),
+ ("GEOGRAPHY", "Point(latitude, longitude, 4326)"),
+ (
+ "GEOGRAPHY",
+ "STGeomFromText('POLYGON((-122.358 47.653 , -122.348 47.649, -122.348 47.658, -122.358 47.658, -122.358 47.653))', 4326)",
+ ),
+ ):
+ with self.subTest(f"Scope resolution, LHS: {lhs}, RHS: {rhs}"):
+ expr = self.validate_identity(f"{lhs}::{rhs}")
+ base_sql = expr.sql()
+ self.assertEqual(base_sql, f"SCOPE_RESOLUTION({lhs + ', ' if lhs else ''}{rhs})")
+ self.assertEqual(parse_one(base_sql).sql("tsql"), f"{lhs}::{rhs}")
+
+ def test_count(self):
+ count = annotate_types(self.validate_identity("SELECT COUNT(1) FROM x"))
+ self.assertEqual(count.expressions[0].type.this, exp.DataType.Type.INT)
+
+ count_big = annotate_types(self.validate_identity("SELECT COUNT_BIG(1) FROM x"))
+ self.assertEqual(count_big.expressions[0].type.this, exp.DataType.Type.BIGINT)
+
+ self.validate_all(
+ "SELECT COUNT_BIG(1) FROM x",
+ read={
+ "duckdb": "SELECT COUNT(1) FROM x",
+ "spark": "SELECT COUNT(1) FROM x",
+ },
+ write={
+ "duckdb": "SELECT COUNT(1) FROM x",
+ "spark": "SELECT COUNT(1) FROM x",
+ "tsql": "SELECT COUNT_BIG(1) FROM x",
+ },
+ )
+ self.validate_all(
+ "SELECT COUNT(1) FROM x",
+ write={
+ "duckdb": "SELECT COUNT(1) FROM x",
+ "spark": "SELECT COUNT(1) FROM x",
+ "tsql": "SELECT COUNT(1) FROM x",
+ },
+ )
+
+ def test_grant(self):
+ self.validate_identity("GRANT EXECUTE ON TestProc TO User2")
+ self.validate_identity("GRANT EXECUTE ON TestProc TO TesterRole WITH GRANT OPTION")
+ self.validate_identity(
+ "GRANT EXECUTE ON TestProc TO User2 AS TesterRole", check_command_warning=True
+ )
diff --git a/tests/fixtures/identity.sql b/tests/fixtures/identity.sql
index 4dc4aa1..bed2502 100644
--- a/tests/fixtures/identity.sql
+++ b/tests/fixtures/identity.sql
@@ -53,6 +53,7 @@ x >> 1 | 1 & 1 ^ 1
x || y
x[ : ]
x[1 : ]
+x[ : 2]
x[1 : 2]
x[-4 : -1]
1 - -1
@@ -115,7 +116,7 @@ ARRAY(foo, time)
ARRAY(LENGTH(waiter_name) > 0)
ARRAY_CONTAINS(x, 1)
x.EXTRACT(1)
-EXTRACT(x FROM y)
+EXTRACT(X FROM y)
EXTRACT(DATE FROM y)
EXTRACT(WEEK(monday) FROM created_at)
CONCAT_WS('-', 'a', 'b')
@@ -130,7 +131,7 @@ x[ORDINAL(1)][SAFE_OFFSET(2)]
x GLOB '??-*'
x GLOB y
ILIKE(x, 'z')
-x LIKE SUBSTR('abc', 1, 1)
+x LIKE SUBSTRING('abc', 1, 1)
x LIKE y
x LIKE a.y
x LIKE '%y%'
@@ -203,6 +204,7 @@ USE ROLE x
USE WAREHOUSE x
USE DATABASE x
USE SCHEMA x.y
+USE CATALOG abc
NOT 1
NOT NOT 1
SELECT * FROM test
@@ -594,6 +596,7 @@ CREATE TABLE customer (pk BIGINT NOT NULL GENERATED ALWAYS AS IDENTITY (CYCLE))
CREATE TABLE customer (period INT NOT NULL)
CREATE TABLE foo (baz_id INT REFERENCES baz (id) DEFERRABLE)
CREATE TABLE foo (baz CHAR(4) CHARACTER SET LATIN UPPERCASE NOT CASESPECIFIC COMPRESS 'a')
+CREATE TABLE db.foo (id INT NOT NULL, valid_date DATE FORMAT 'YYYY-MM-DD', measurement INT COMPRESS)
CREATE TABLE foo (baz DATE FORMAT 'YYYY/MM/DD' TITLE 'title' INLINE LENGTH 1 COMPRESS ('a', 'b'))
CREATE TABLE t (title TEXT)
CREATE TABLE foo (baz INT, inline TEXT)
@@ -733,6 +736,8 @@ SELECT (WITH x AS (SELECT 1 AS y) SELECT * FROM x) AS z
SELECT ((SELECT 1) + 1)
SELECT * FROM project.dataset.INFORMATION_SCHEMA.TABLES
SELECT CAST(x AS INT) /* comment */ FROM foo
+SELECT c /* c1 /* c2 */ c3 */
+SELECT c /* c1 /* c2 /* c3 */ */ */
SELECT c /* c1 */ AS alias /* c2 */
SELECT a /* x */, b /* x */
SELECT a /* x */ /* y */ /* z */, b /* k */ /* m */
@@ -772,17 +777,13 @@ ALTER TABLE orders DROP PARTITION(dt = '2014-05-14', country = 'IN'), PARTITION(
ALTER TABLE mydataset.mytable DELETE WHERE x = 1
ALTER TABLE table1 RENAME COLUMN c1 TO c2
ALTER TABLE table1 RENAME COLUMN IF EXISTS c1 TO c2
+ALTER TABLE table1 RENAME TO table2
+ALTER VIEW view1 AS SELECT a, b, c FROM table1
+ALTER VIEW view1 AS SELECT a, b, c FROM table1 UNION ALL SELECT a, b, c FROM table2
+ALTER VIEW view1 AS SELECT a, b, c FROM table1 UNION ALL SELECT a, b, c FROM table2 LIMIT 100
SELECT div.a FROM test_table AS div
WITH view AS (SELECT 1 AS x) SELECT * FROM view
ARRAY<STRUCT<INT, DOUBLE, ARRAY<INT>>>
-ARRAY<INT>[1, 2, 3]
-ARRAY<INT>[]
-STRUCT<x VARCHAR(10)>
-STRUCT<x VARCHAR(10)>("bla")
-STRUCT<VARCHAR(10)>("bla")
-STRUCT<INT>(5)
-STRUCT<DATE>("2011-05-05")
-STRUCT<x INT, y TEXT>(1, t.str_col)
STRUCT<int INT>
SELECT CAST(NULL AS ARRAY<INT>) IS NULL AS array_is_null
ALTER TABLE "schema"."tablename" ADD CONSTRAINT "CHK_Name" CHECK (NOT "IdDwh" IS NULL AND "IdDwh" <> (0))
@@ -872,4 +873,14 @@ SELECT name
SELECT copy
SELECT rollup
SELECT unnest
+SELECT cube, cube.x FROM cube
SELECT * FROM a STRAIGHT_JOIN b
+SELECT COUNT(DISTINCT "foo bar") FROM (SELECT 1 AS "foo bar") AS t
+SELECT vector
+WITH all AS (SELECT 1 AS count) SELECT all.count FROM all
+SELECT rename
+GRANT SELECT ON TABLE tbl TO user
+GRANT SELECT, INSERT ON FUNCTION tbl TO user
+GRANT SELECT ON orders TO ROLE PUBLIC
+GRANT SELECT ON nation TO alice WITH GRANT OPTION
+GRANT DELETE ON SCHEMA finance TO bob \ No newline at end of file
diff --git a/tests/fixtures/optimizer/annotate_types.sql b/tests/fixtures/optimizer/annotate_types.sql
index e781765..f608851 100644
--- a/tests/fixtures/optimizer/annotate_types.sql
+++ b/tests/fixtures/optimizer/annotate_types.sql
@@ -1,13 +1,25 @@
5;
INT;
+-5;
+INT;
+
+~5;
+INT;
+
+(5);
+INT;
+
5.3;
DOUBLE;
'bla';
VARCHAR;
-True;
+true;
+bool;
+
+not true;
bool;
false;
@@ -15,6 +27,13 @@ bool;
null;
null;
+
+null and false;
+bool;
+
+null + 1;
+int;
+
CASE WHEN x THEN NULL ELSE 1 END;
INT;
@@ -55,3 +74,15 @@ STRUCT<a INT, b DOUBLE>;
# dialect: presto
ROW(1, 2.5, 'foo');
STRUCT<INT, DOUBLE, VARCHAR>;
+
+# dialect: bigquery
+EXTRACT(date from x);
+DATE;
+
+# dialect: bigquery
+EXTRACT(time from x);
+TIME;
+
+# dialect: bigquery
+EXTRACT(day from x);
+INT;
diff --git a/tests/fixtures/optimizer/canonicalize.sql b/tests/fixtures/optimizer/canonicalize.sql
index e4c78b7..66c6c95 100644
--- a/tests/fixtures/optimizer/canonicalize.sql
+++ b/tests/fixtures/optimizer/canonicalize.sql
@@ -52,6 +52,10 @@ SELECT "x"."a" AS "a" FROM "x" AS "x" WHERE CASE WHEN COALESCE("x"."b" <> 0, 1 <
DATE('2023-01-01');
CAST('2023-01-01' AS DATE);
+-- Some dialects only allow dates
+DATE('2023-01-01 00:00:00');
+DATE('2023-01-01 00:00:00');
+
TIMESTAMP('2023-01-01');
CAST('2023-01-01' AS TIMESTAMP);
diff --git a/tests/fixtures/optimizer/optimizer.sql b/tests/fixtures/optimizer/optimizer.sql
index 37ef4fd..76fc16d 100644
--- a/tests/fixtures/optimizer/optimizer.sql
+++ b/tests/fixtures/optimizer/optimizer.sql
@@ -547,7 +547,8 @@ FROM (
"tb"."b" AS "b",
"tb"."c" AS "c"
FROM "sc"."tb" AS "tb"
-) AS "_q_0" PIVOT(SUM("_q_0"."c") FOR "_q_0"."b" IN ('x', 'y', 'z')) AS "_q_1";
+) AS "_q_0"
+PIVOT(SUM("_q_0"."c") FOR "_q_0"."b" IN ('x', 'y', 'z')) AS "_q_1";
# title: pivoted source with explicit selections where one of them is excluded & selected at the same time
# note: we need to respect the exclude when selecting * from pivoted source and not include the computed column twice
@@ -564,7 +565,8 @@ FROM (
"tb"."b" AS "b",
"tb"."c" AS "c"
FROM "sc"."tb" AS "tb"
-) AS "_q_0" PIVOT(SUM("_q_0"."c") FOR "_q_0"."b" IN ('x', 'y', 'z')) AS "_q_1";
+) AS "_q_0"
+PIVOT(SUM("_q_0"."c") FOR "_q_0"."b" IN ('x', 'y', 'z')) AS "_q_1";
# title: pivoted source with implicit selections
# execute: false
@@ -579,7 +581,8 @@ FROM (
"u"."g" AS "g",
"u"."h" AS "h"
FROM "u" AS "u"
-) AS "_q_0" PIVOT(SUM("_q_0"."f") FOR "_q_0"."h" IN ('x', 'y')) AS "_q_1";
+) AS "_q_0"
+PIVOT(SUM("_q_0"."f") FOR "_q_0"."h" IN ('x', 'y')) AS "_q_1";
# title: selecting explicit qualified columns from pivoted source with explicit selections
# execute: false
@@ -592,7 +595,8 @@ FROM (
"u"."f" AS "f",
"u"."h" AS "h"
FROM "u" AS "u"
-) AS "_q_0" PIVOT(SUM("_q_0"."f") FOR "_q_0"."h" IN ('x', 'y')) AS "piv";
+) AS "_q_0"
+PIVOT(SUM("_q_0"."f") FOR "_q_0"."h" IN ('x', 'y')) AS "piv";
# title: selecting explicit unqualified columns from pivoted source with implicit selections
# execute: false
@@ -600,7 +604,8 @@ SELECT x, y FROM u PIVOT (SUM(f) FOR h IN ('x', 'y'));
SELECT
"_q_0"."x" AS "x",
"_q_0"."y" AS "y"
-FROM "u" AS "u" PIVOT(SUM("u"."f") FOR "u"."h" IN ('x', 'y')) AS "_q_0";
+FROM "u" AS "u"
+PIVOT(SUM("u"."f") FOR "u"."h" IN ('x', 'y')) AS "_q_0";
# title: selecting all columns from a pivoted CTE source, using alias for the aggregation and generating bigquery
# execute: false
@@ -617,7 +622,8 @@ SELECT
`_q_0`.`g` AS `g`,
`_q_0`.`sum_x` AS `sum_x`,
`_q_0`.`sum_y` AS `sum_y`
-FROM `u_cte` AS `u_cte` PIVOT(SUM(`u_cte`.`f`) AS `sum` FOR `u_cte`.`h` IN ('x', 'y')) AS `_q_0`;
+FROM `u_cte` AS `u_cte`
+PIVOT(SUM(`u_cte`.`f`) AS `sum` FOR `u_cte`.`h` IN ('x', 'y')) AS `_q_0`;
# title: selecting all columns from a pivoted source and generating snowflake
# execute: false
@@ -627,7 +633,8 @@ SELECT
"_q_0"."G" AS "G",
"_q_0"."'x'" AS "'x'",
"_q_0"."'y'" AS "'y'"
-FROM "U" AS "U" PIVOT(SUM("U"."F") FOR "U"."H" IN ('x', 'y')) AS "_q_0";
+FROM "U" AS "U"
+PIVOT(SUM("U"."F") FOR "U"."H" IN ('x', 'y')) AS "_q_0";
# title: selecting all columns from a pivoted source and generating spark
# note: spark doesn't allow pivot aliases or qualified columns for the pivot's "field" (`h`)
@@ -641,7 +648,8 @@ SELECT
FROM (
SELECT
*
- FROM `u` AS `u` PIVOT(SUM(`u`.`f`) FOR `h` IN ('x', 'y'))
+ FROM `u` AS `u`
+ PIVOT(SUM(`u`.`f`) FOR `h` IN ('x', 'y'))
) AS `_q_0`;
# title: selecting all columns from a pivoted source, pivot has column aliases
@@ -674,7 +682,8 @@ WITH "SOURCE" AS (
SELECT
"FINAL"."ID" AS "ID",
"FINAL"."TIMESTAMP_1" AS "TIMESTAMP_1"
-FROM "SOURCE" AS "SOURCE" PIVOT(MAX("SOURCE"."VALUE") FOR "SOURCE"."KEY" IN ('a', 'b', 'c')) AS "FINAL"("ID", "TIMESTAMP_1", "TIMESTAMP_2", "COL_1", "COL_2", "COL_3");
+FROM "SOURCE" AS "SOURCE"
+PIVOT(MAX("SOURCE"."VALUE") FOR "SOURCE"."KEY" IN ('a', 'b', 'c')) AS "FINAL"("ID", "TIMESTAMP_1", "TIMESTAMP_2", "COL_1", "COL_2", "COL_3");
# title: unpivoted table source with a single value column, unpivot columns can't be qualified
# execute: false
@@ -685,7 +694,8 @@ SELECT
"_q_0"."DEPT" AS "DEPT",
"_q_0"."MONTH" AS "MONTH",
"_q_0"."SALES" AS "SALES"
-FROM "M_SALES" AS "M_SALES"("EMPID", "DEPT", "JAN", "FEB") UNPIVOT("SALES" FOR "MONTH" IN ("JAN", "FEB")) AS "_q_0"
+FROM "M_SALES" AS "M_SALES"("EMPID", "DEPT", "JAN", "FEB")
+UNPIVOT("SALES" FOR "MONTH" IN ("JAN", "FEB")) AS "_q_0"
ORDER BY
"_q_0"."EMPID";
@@ -704,7 +714,8 @@ FROM (
"m_sales"."jan" AS "jan",
"m_sales"."feb" AS "feb"
FROM "m_sales" AS "m_sales"
-) AS "m_sales" UNPIVOT("sales" FOR "month" IN ("m_sales"."jan", "m_sales"."feb")) AS "unpiv"("a", "b", "c", "d");
+) AS "m_sales"
+UNPIVOT("sales" FOR "month" IN ("m_sales"."jan", "m_sales"."feb")) AS "unpiv"("a", "b", "c", "d");
# title: unpivoted derived table source with a single value column
# execute: false
@@ -722,20 +733,22 @@ FROM (
"M_SALES"."JAN" AS "JAN",
"M_SALES"."FEB" AS "FEB"
FROM "M_SALES" AS "M_SALES"
-) AS "M_SALES" UNPIVOT("SALES" FOR "MONTH" IN ("JAN", "FEB")) AS "_q_0"
+) AS "M_SALES"
+UNPIVOT("SALES" FOR "MONTH" IN ("JAN", "FEB")) AS "_q_0"
ORDER BY
"_q_0"."EMPID";
# title: unpivoted table source with a single value column, unpivot columns can be qualified
# execute: false
# dialect: bigquery
-# note: the named columns aren't supported by BQ but we add them here to avoid defining a schema
+# note: the named columns aren not supported by BQ but we add them here to avoid defining a schema
SELECT * FROM produce AS produce(product, q1, q2, q3, q4) UNPIVOT(sales FOR quarter IN (q1, q2, q3, q4));
SELECT
`_q_0`.`product` AS `product`,
`_q_0`.`quarter` AS `quarter`,
`_q_0`.`sales` AS `sales`
-FROM `produce` AS `produce` UNPIVOT(`sales` FOR `quarter` IN (`produce`.`q1`, `produce`.`q2`, `produce`.`q3`, `produce`.`q4`)) AS `_q_0`;
+FROM `produce` AS `produce`
+UNPIVOT(`sales` FOR `quarter` IN (`produce`.`q1`, `produce`.`q2`, `produce`.`q3`, `produce`.`q4`)) AS `_q_0`;
# title: unpivoted table source with multiple value columns
# execute: false
@@ -746,7 +759,8 @@ SELECT
`_q_0`.`semesters` AS `semesters`,
`_q_0`.`first_half_sales` AS `first_half_sales`,
`_q_0`.`second_half_sales` AS `second_half_sales`
-FROM `produce` AS `produce` UNPIVOT((`first_half_sales`, `second_half_sales`) FOR `semesters` IN ((`produce`.`q1`, `produce`.`q2`) AS 'semester_1', (`produce`.`q3`, `produce`.`q4`) AS 'semester_2')) AS `_q_0`;
+FROM `produce` AS `produce`
+UNPIVOT((`first_half_sales`, `second_half_sales`) FOR `semesters` IN ((`produce`.`q1`, `produce`.`q2`) AS 'semester_1', (`produce`.`q3`, `produce`.`q4`) AS 'semester_2')) AS `_q_0`;
# title: quoting is preserved
# dialect: snowflake
@@ -824,7 +838,7 @@ SELECT
FROM `bigquery-public-data.GooGle_tReNDs.TOp_TeRmS` AS `TOp_TeRmS`
WHERE
`TOp_TeRmS`.`rank` = 1
- AND `TOp_TeRmS`.`refresh_date` >= DATE_SUB(CURRENT_DATE, INTERVAL 2 WEEK)
+ AND `TOp_TeRmS`.`refresh_date` >= DATE_SUB(CURRENT_DATE, INTERVAL '2' WEEK)
GROUP BY
`day`,
`top_term`,
diff --git a/tests/fixtures/optimizer/qualify_columns.sql b/tests/fixtures/optimizer/qualify_columns.sql
index ea96fe5..68c0caa 100644
--- a/tests/fixtures/optimizer/qualify_columns.sql
+++ b/tests/fixtures/optimizer/qualify_columns.sql
@@ -343,6 +343,11 @@ WITH tbl1 AS (SELECT STRUCT(1 AS col1, Struct(5 AS col1)) AS col) SELECT tbl1.co
WITH tbl1 AS (SELECT STRUCT(1 AS col1, 2 AS col1) AS col) SELECT tbl1.col.* FROM tbl1;
WITH tbl1 AS (SELECT STRUCT(1 AS col1, 2 AS col1) AS col) SELECT tbl1.col.* FROM tbl1 AS tbl1;
+# title: CSV files are not scanned by default
+# execute: false
+SELECT * FROM READ_CSV('file.csv');
+SELECT * FROM READ_CSV('file.csv') AS _q_0;
+
--------------------------------------
-- CTEs
--------------------------------------
@@ -385,14 +390,14 @@ WITH player AS (SELECT player.name, player.asset.info FROM players) SELECT * FRO
WITH player AS (SELECT players.player.name AS name, players.player.asset.info AS info FROM players AS players) SELECT player.name AS name, player.info AS info FROM player AS player;
--------------------------------------
--- Except and Replace
+-- Except, Replace, Rename
--------------------------------------
# execute: false
-SELECT * REPLACE(a AS d) FROM x;
+SELECT * RENAME(a AS d) FROM x;
SELECT x.a AS d, x.b AS b FROM x AS x;
# execute: false
-SELECT * EXCEPT(b) REPLACE(a AS d) FROM x;
+SELECT * EXCEPT(b) RENAME(a AS d) FROM x;
SELECT x.a AS d FROM x AS x;
SELECT x.* EXCEPT(a), y.* FROM x, y;
@@ -416,6 +421,30 @@ SELECT x.a AS a, x.b AS b, y.b AS b FROM x AS x LEFT JOIN x AS y ON x.a = y.a;
SELECT COALESCE(CAST(t1.a AS VARCHAR), '') AS a, t2.* EXCEPT (a) FROM x AS t1, x AS t2;
SELECT COALESCE(CAST(t1.a AS VARCHAR), '') AS a, t2.b AS b FROM x AS t1, x AS t2;
+# execute: false
+SELECT * REPLACE(2 AS a) FROM x;
+SELECT 2 AS a, x.b AS b FROM x AS x;
+
+# execute: false
+SELECT * EXCEPT (a, b) REPLACE (a AS a) FROM x;
+SELECT * EXCEPT (a, b) REPLACE (x.a AS a) FROM x AS x;
+
+# execute: false
+SELECT * REPLACE(COALESCE(b, a) AS a, a as b) FROM x;
+SELECT COALESCE(x.b, x.a) AS a, x.a AS b FROM x AS x;
+
+# execute: false
+SELECT * REPLACE(1 AS a) RENAME(b as alias_b) FROM x;
+SELECT 1 AS a, x.b AS alias_b FROM x AS x;
+
+# execute: false
+SELECT * EXCEPT(a) REPLACE(COALESCE(a, b) AS b) RENAME(b AS new_b) FROM x;
+SELECT COALESCE(x.a, x.b) AS new_b FROM x AS x;
+
+# execute: false
+SELECT * REPLACE(1 AS a, a AS b) RENAME(b AS new_b) FROM x;
+SELECT 1 AS a, x.a AS new_b FROM x AS x;
+
--------------------------------------
-- Using
--------------------------------------
@@ -480,6 +509,18 @@ SELECT COALESCE(t1.a, t2.a) AS a FROM t1 AS t1 JOIN t2 AS t2 ON t1.a = t2.a;
WITH m(a) AS (SELECT 1), n(b) AS (SELECT 1) SELECT * FROM m JOIN n AS foo(a) USING (a);
WITH m AS (SELECT 1 AS a), n AS (SELECT 1 AS b) SELECT COALESCE(m.a, foo.a) AS a FROM m AS m JOIN n AS foo(a) ON m.a = foo.a;
+# title: coalesce the USING clause's columns (3 joins, 2 join columns)
+WITH t1 AS (SELECT 'x' AS id, DATE '2024-01-01' AS foo, 000 AS value), t2 AS (SELECT 'x' AS id, DATE '2024-02-02' AS foo, 123 AS value), t3 AS (SELECT 'x' AS id, DATE '2024-02-02' AS foo, 456 AS value) SELECT * FROM t1 FULL OUTER JOIN t2 USING(id, foo) FULL OUTER JOIN t3 USING(id, foo);
+WITH t1 AS (SELECT 'x' AS id, CAST('2024-01-01' AS DATE) AS foo, 000 AS value), t2 AS (SELECT 'x' AS id, CAST('2024-02-02' AS DATE) AS foo, 123 AS value), t3 AS (SELECT 'x' AS id, CAST('2024-02-02' AS DATE) AS foo, 456 AS value) SELECT COALESCE(t1.id, t2.id, t3.id) AS id, COALESCE(t1.foo, t2.foo, t3.foo) AS foo, t1.value AS value, t2.value AS value, t3.value AS value FROM t1 AS t1 FULL OUTER JOIN t2 AS t2 ON t1.id = t2.id AND t1.foo = t2.foo FULL OUTER JOIN t3 AS t3 ON COALESCE(t1.id, t2.id) = t3.id AND COALESCE(t1.foo, t2.foo) = t3.foo;
+
+# title: coalesce the USING clause's columns (3 joins, 3 join columns)
+WITH t1 AS (SELECT 'x' AS id, CAST('2024-01-01' AS DATE) AS foo, 000 AS value), t2 AS (SELECT 'x' AS id, CAST('2024-02-02' AS DATE) AS foo, 123 AS value), t3 AS (SELECT 'x' AS id, CAST('2024-02-02' AS DATE) AS foo, 456 AS value) SELECT * FROM t1 FULL OUTER JOIN t2 USING (id, foo, value) FULL OUTER JOIN t3 USING (id, foo, value);
+WITH t1 AS (SELECT 'x' AS id, CAST('2024-01-01' AS DATE) AS foo, 000 AS value), t2 AS (SELECT 'x' AS id, CAST('2024-02-02' AS DATE) AS foo, 123 AS value), t3 AS (SELECT 'x' AS id, CAST('2024-02-02' AS DATE) AS foo, 456 AS value) SELECT COALESCE(t1.id, t2.id, t3.id) AS id, COALESCE(t1.foo, t2.foo, t3.foo) AS foo, COALESCE(t1.value, t2.value, t3.value) AS value FROM t1 AS t1 FULL OUTER JOIN t2 AS t2 ON t1.id = t2.id AND t1.foo = t2.foo AND t1.value = t2.value FULL OUTER JOIN t3 AS t3 ON COALESCE(t1.id, t2.id) = t3.id AND COALESCE(t1.foo, t2.foo) = t3.foo AND COALESCE(t1.value, t2.value) = t3.value;
+
+# title: coalesce the USING clause's columns (4 joins, 2 join columns)
+WITH t1 AS (SELECT 'x' AS id, CAST('2024-01-01' AS DATE) AS foo, 000 AS value), t2 AS (SELECT 'x' AS id, CAST('2024-02-02' AS DATE) AS foo, 123 AS value), t3 AS (SELECT 'x' AS id, CAST('2024-02-02' AS DATE) AS foo, 456 AS value), t4 AS (SELECT 'x' AS id, CAST('2024-03-03' AS DATE) AS foo, 789 AS value) SELECT * FROM t1 FULL OUTER JOIN t2 USING (id, foo) FULL OUTER JOIN t3 USING (id, foo) FULL OUTER JOIN t4 USING (id, foo);
+WITH t1 AS (SELECT 'x' AS id, CAST('2024-01-01' AS DATE) AS foo, 000 AS value), t2 AS (SELECT 'x' AS id, CAST('2024-02-02' AS DATE) AS foo, 123 AS value), t3 AS (SELECT 'x' AS id, CAST('2024-02-02' AS DATE) AS foo, 456 AS value), t4 AS (SELECT 'x' AS id, CAST('2024-03-03' AS DATE) AS foo, 789 AS value) SELECT COALESCE(t1.id, t2.id, t3.id, t4.id) AS id, COALESCE(t1.foo, t2.foo, t3.foo, t4.foo) AS foo, t1.value AS value, t2.value AS value, t3.value AS value, t4.value AS value FROM t1 AS t1 FULL OUTER JOIN t2 AS t2 ON t1.id = t2.id AND t1.foo = t2.foo FULL OUTER JOIN t3 AS t3 ON COALESCE(t1.id, t2.id) = t3.id AND COALESCE(t1.foo, t2.foo) = t3.foo FULL OUTER JOIN t4 AS t4 ON COALESCE(t1.id, t2.id, t3.id) = t4.id AND COALESCE(t1.foo, t2.foo, t3.foo) = t4.foo;
+
--------------------------------------
-- Hint with table reference
--------------------------------------
@@ -591,6 +632,7 @@ SELECT x.a + 1 AS i, x.a + 1 + 1 AS j, x.a + 1 + 1 + 1 AS k FROM x AS x;
# title: noop - reference comes before alias
# execute: false
+# validate_qualify_columns: false
SELECT i + 1 AS j, x.a + 1 AS i FROM x;
SELECT i + 1 AS j, x.a + 1 AS i FROM x AS x;
@@ -619,6 +661,16 @@ SELECT x.a + x.b AS f, x.a + x.b AS _col_1, x.a + x.b + 5 AS _col_2 FROM x AS x;
SELECT a, SUM(b) AS c, SUM(c) OVER(PARTITION BY a) AS d from x group by 1 ORDER BY a;
SELECT x.a AS a, SUM(x.b) AS c, SUM(SUM(x.b)) OVER (PARTITION BY x.a) AS d FROM x AS x GROUP BY x.a ORDER BY a;
+# title: we can't expand aliases corresponding to recursive CTE columns (CTE names output columns)
+# execute: false
+WITH RECURSIVE t(c) AS (SELECT 1 AS c UNION ALL SELECT c + 1 AS c FROM t WHERE c <= 10) SELECT c FROM t;
+WITH RECURSIVE t(c) AS (SELECT 1 AS c UNION ALL SELECT t.c + 1 AS c FROM t AS t WHERE t.c <= 10) SELECT t.c AS c FROM t AS t;
+
+# title: we can't expand aliases corresponding to recursive CTE columns (CTE doesn't name output columns)
+# execute: false
+WITH RECURSIVE t AS (SELECT 1 AS c UNION ALL SELECT c + 1 AS c FROM t WHERE c <= 10) SELECT c FROM t;
+WITH RECURSIVE t AS (SELECT 1 AS c UNION ALL SELECT t.c + 1 AS c FROM t AS t WHERE t.c <= 10) SELECT t.c AS c FROM t AS t;
+
--------------------------------------
-- Wrapped tables / join constructs
--------------------------------------
@@ -653,3 +705,6 @@ SELECT _q_0.a AS a, _q_0.b AS b, _q_1.b AS b, _q_1.c AS c FROM ((SELECT x.a AS a
SELECT b FROM ((SELECT a FROM x) INNER JOIN y ON a = b);
SELECT y.b AS b FROM ((SELECT x.a AS a FROM x AS x) AS _q_0 INNER JOIN y AS y ON _q_0.a = y.b);
+
+SELECT a, c FROM x TABLESAMPLE SYSTEM (10 ROWS) CROSS JOIN y TABLESAMPLE SYSTEM (10 ROWS);
+SELECT x.a AS a, y.c AS c FROM x AS x TABLESAMPLE SYSTEM (10 ROWS) CROSS JOIN y AS y TABLESAMPLE SYSTEM (10 ROWS);
diff --git a/tests/fixtures/optimizer/qualify_tables.sql b/tests/fixtures/optimizer/qualify_tables.sql
index 30bf834..61d0b96 100644
--- a/tests/fixtures/optimizer/qualify_tables.sql
+++ b/tests/fixtures/optimizer/qualify_tables.sql
@@ -177,3 +177,10 @@ SELECT a.id, a_2.id FROM c.db1.a AS a JOIN c.db2.a AS a_2 ON a.id = a_2.id;
SELECT cat.db1.a.id, db2.a.id FROM cat.db1.a JOIN db2.a ON cat.db1.a.id = db2.a.id;
SELECT a.id, a_2.id FROM cat.db1.a AS a JOIN c.db2.a AS a_2 ON a.id = a_2.id;
+
+COPY INTO (SELECT * FROM x) TO 'data' WITH (FORMAT 'CSV');
+COPY INTO (SELECT * FROM c.db.x AS x) TO 'data' WITH (FORMAT 'CSV');
+
+# title: tablesample
+SELECT 1 FROM x TABLESAMPLE SYSTEM (10 PERCENT) CROSS JOIN y TABLESAMPLE SYSTEM (10 PERCENT);
+SELECT 1 FROM c.db.x AS x TABLESAMPLE SYSTEM (10 PERCENT) CROSS JOIN c.db.y AS y TABLESAMPLE SYSTEM (10 PERCENT);
diff --git a/tests/fixtures/optimizer/simplify.sql b/tests/fixtures/optimizer/simplify.sql
index 6035ee6..fa2dc79 100644
--- a/tests/fixtures/optimizer/simplify.sql
+++ b/tests/fixtures/optimizer/simplify.sql
@@ -85,6 +85,27 @@ NULL;
NULL = NULL;
NULL;
+1 AND 0;
+FALSE;
+
+0 AND 1;
+FALSE;
+
+0 OR 1;
+TRUE;
+
+0 OR NULL;
+NULL;
+
+NULL OR 0;
+NULL;
+
+0 AND NULL;
+FALSE;
+
+NULL AND 0;
+FALSE;
+
-- Can't optimize this because different engines do different things
-- mysql converts to 0 and 1 but tsql does true and false
NULL <=> NULL;
@@ -116,6 +137,9 @@ FALSE;
TRUE AND TRUE OR TRUE AND FALSE;
TRUE;
+COALESCE(x, y) <> ALL (SELECT z FROM w);
+COALESCE(x, y) <> ALL (SELECT z FROM w);
+
--------------------------------------
-- Absorption
--------------------------------------
@@ -531,6 +555,9 @@ CAST('2023-01-01 22:00:00' AS DATETIME);
DATE_ADD(x, 1, 'MONTH');
DATE_ADD(x, 1, 'MONTH');
+DATE_ADD(x, 1);
+DATE_ADD(x, 1, 'DAY');
+
--------------------------------------
-- Comparisons
--------------------------------------
diff --git a/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql b/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql
index 5b004fa..290d276 100644
--- a/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql
+++ b/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql
@@ -852,10 +852,10 @@ SELECT
SUM("x"."profit") AS "profit"
FROM "x" AS "x"
GROUP BY
-ROLLUP (
- "x"."channel",
- "x"."id"
-)
+ ROLLUP (
+ "x"."channel",
+ "x"."id"
+ )
ORDER BY
"channel",
"id"
@@ -991,9 +991,9 @@ FROM store_sales,
date_dim,
store,
(SELECT ca_zip
- FROM (SELECT Substr(ca_zip, 1, 5) ca_zip
+ FROM (SELECT SUBSTRING(ca_zip, 1, 5) ca_zip
FROM customer_address
- WHERE Substr(ca_zip, 1, 5) IN ( '67436', '26121', '38443',
+ WHERE SUBSTRING(ca_zip, 1, 5) IN ( '67436', '26121', '38443',
'63157',
'68856', '19485', '86425',
'26741',
@@ -1195,7 +1195,7 @@ FROM store_sales,
'92564' )
INTERSECT
SELECT ca_zip
- FROM (SELECT Substr(ca_zip, 1, 5) ca_zip,
+ FROM (SELECT SUBSTRING(ca_zip, 1, 5) ca_zip,
Count(*) cnt
FROM customer_address,
customer
@@ -1207,13 +1207,13 @@ WHERE ss_store_sk = s_store_sk
AND ss_sold_date_sk = d_date_sk
AND d_qoy = 2
AND d_year = 2000
- AND ( Substr(s_zip, 1, 2) = Substr(V1.ca_zip, 1, 2) )
+ AND ( SUBSTRING(s_zip, 1, 2) = SUBSTRING(V1.ca_zip, 1, 2) )
GROUP BY s_store_name
ORDER BY s_store_name
LIMIT 100;
WITH "a1" AS (
SELECT
- SUBSTR("customer_address"."ca_zip", 1, 5) AS "ca_zip"
+ SUBSTRING("customer_address"."ca_zip", 1, 5) AS "ca_zip"
FROM "customer_address" AS "customer_address"
JOIN "customer" AS "customer"
ON "customer"."c_current_addr_sk" = "customer_address"."ca_address_sk"
@@ -1224,10 +1224,10 @@ WITH "a1" AS (
COUNT(*) > 10
), "a2" AS (
SELECT
- SUBSTR("customer_address"."ca_zip", 1, 5) AS "ca_zip"
+ SUBSTRING("customer_address"."ca_zip", 1, 5) AS "ca_zip"
FROM "customer_address" AS "customer_address"
WHERE
- SUBSTR("customer_address"."ca_zip", 1, 5) IN ('67436', '26121', '38443', '63157', '68856', '19485', '86425', '26741', '70991', '60899', '63573', '47556', '56193', '93314', '87827', '62017', '85067', '95390', '48091', '10261', '81845', '41790', '42853', '24675', '12840', '60065', '84430', '57451', '24021', '91735', '75335', '71935', '34482', '56943', '70695', '52147', '56251', '28411', '86653', '23005', '22478', '29031', '34398', '15365', '42460', '33337', '59433', '73943', '72477', '74081', '74430', '64605', '39006', '11226', '49057', '97308', '42663', '18187', '19768', '43454', '32147', '76637', '51975', '11181', '45630', '33129', '45995', '64386', '55522', '26697', '20963', '35154', '64587', '49752', '66386', '30586', '59286', '13177', '66646', '84195', '74316', '36853', '32927', '12469', '11904', '36269', '17724', '55346', '12595', '53988', '65439', '28015', '63268', '73590', '29216', '82575', '69267', '13805', '91678', '79460', '94152', '14961', '15419', '48277', '62588', '55493', '28360', '14152', '55225', '18007', '53705', '56573', '80245', '71769', '57348', '36845', '13039', '17270', '22363', '83474', '25294', '43269', '77666', '15488', '99146', '64441', '43338', '38736', '62754', '48556', '86057', '23090', '38114', '66061', '18910', '84385', '23600', '19975', '27883', '65719', '19933', '32085', '49731', '40473', '27190', '46192', '23949', '44738', '12436', '64794', '68741', '15333', '24282', '49085', '31844', '71156', '48441', '17100', '98207', '44982', '20277', '71496', '96299', '37583', '22206', '89174', '30589', '61924', '53079', '10976', '13104', '42794', '54772', '15809', '56434', '39975', '13874', '30753', '77598', '78229', '59478', '12345', '55547', '57422', '42600', '79444', '29074', '29752', '21676', '32096', '43044', '39383', '37296', '36295', '63077', '16572', '31275', '18701', '40197', '48242', '27219', '49865', '84175', '30446', '25165', '13807', '72142', '70499', '70464', '71429', '18111', '70857', '29545', '36425', '52706', '36194', '42963', '75068', '47921', '74763', '90990', '89456', '62073', '88397', '73963', '75885', '62657', '12530', '81146', '57434', '25099', '41429', '98441', '48713', '52552', '31667', '14072', '13903', '44709', '85429', '58017', '38295', '44875', '73541', '30091', '12707', '23762', '62258', '33247', '78722', '77431', '14510', '35656', '72428', '92082', '35267', '43759', '24354', '90952', '11512', '21242', '22579', '56114', '32339', '52282', '41791', '24484', '95020', '28408', '99710', '11899', '43344', '72915', '27644', '62708', '74479', '17177', '32619', '12351', '91339', '31169', '57081', '53522', '16712', '34419', '71779', '44187', '46206', '96099', '61910', '53664', '12295', '31837', '33096', '10813', '63048', '31732', '79118', '73084', '72783', '84952', '46965', '77956', '39815', '32311', '75329', '48156', '30826', '49661', '13736', '92076', '74865', '88149', '92397', '52777', '68453', '32012', '21222', '52721', '24626', '18210', '42177', '91791', '75251', '82075', '44372', '45542', '20609', '60115', '17362', '22750', '90434', '31852', '54071', '33762', '14705', '40718', '56433', '30996', '40657', '49056', '23585', '66455', '41021', '74736', '72151', '37007', '21729', '60177', '84558', '59027', '93855', '60022', '86443', '19541', '86886', '30532', '39062', '48532', '34713', '52077', '22564', '64638', '15273', '31677', '36138', '62367', '60261', '80213', '42818', '25113', '72378', '69802', '69096', '55443', '28820', '13848', '78258', '37490', '30556', '77380', '28447', '44550', '26791', '70609', '82182', '33306', '43224', '22322', '86959', '68519', '14308', '46501', '81131', '34056', '61991', '19896', '87804', '65774', '92564')
+ SUBSTRING("customer_address"."ca_zip", 1, 5) IN ('67436', '26121', '38443', '63157', '68856', '19485', '86425', '26741', '70991', '60899', '63573', '47556', '56193', '93314', '87827', '62017', '85067', '95390', '48091', '10261', '81845', '41790', '42853', '24675', '12840', '60065', '84430', '57451', '24021', '91735', '75335', '71935', '34482', '56943', '70695', '52147', '56251', '28411', '86653', '23005', '22478', '29031', '34398', '15365', '42460', '33337', '59433', '73943', '72477', '74081', '74430', '64605', '39006', '11226', '49057', '97308', '42663', '18187', '19768', '43454', '32147', '76637', '51975', '11181', '45630', '33129', '45995', '64386', '55522', '26697', '20963', '35154', '64587', '49752', '66386', '30586', '59286', '13177', '66646', '84195', '74316', '36853', '32927', '12469', '11904', '36269', '17724', '55346', '12595', '53988', '65439', '28015', '63268', '73590', '29216', '82575', '69267', '13805', '91678', '79460', '94152', '14961', '15419', '48277', '62588', '55493', '28360', '14152', '55225', '18007', '53705', '56573', '80245', '71769', '57348', '36845', '13039', '17270', '22363', '83474', '25294', '43269', '77666', '15488', '99146', '64441', '43338', '38736', '62754', '48556', '86057', '23090', '38114', '66061', '18910', '84385', '23600', '19975', '27883', '65719', '19933', '32085', '49731', '40473', '27190', '46192', '23949', '44738', '12436', '64794', '68741', '15333', '24282', '49085', '31844', '71156', '48441', '17100', '98207', '44982', '20277', '71496', '96299', '37583', '22206', '89174', '30589', '61924', '53079', '10976', '13104', '42794', '54772', '15809', '56434', '39975', '13874', '30753', '77598', '78229', '59478', '12345', '55547', '57422', '42600', '79444', '29074', '29752', '21676', '32096', '43044', '39383', '37296', '36295', '63077', '16572', '31275', '18701', '40197', '48242', '27219', '49865', '84175', '30446', '25165', '13807', '72142', '70499', '70464', '71429', '18111', '70857', '29545', '36425', '52706', '36194', '42963', '75068', '47921', '74763', '90990', '89456', '62073', '88397', '73963', '75885', '62657', '12530', '81146', '57434', '25099', '41429', '98441', '48713', '52552', '31667', '14072', '13903', '44709', '85429', '58017', '38295', '44875', '73541', '30091', '12707', '23762', '62258', '33247', '78722', '77431', '14510', '35656', '72428', '92082', '35267', '43759', '24354', '90952', '11512', '21242', '22579', '56114', '32339', '52282', '41791', '24484', '95020', '28408', '99710', '11899', '43344', '72915', '27644', '62708', '74479', '17177', '32619', '12351', '91339', '31169', '57081', '53522', '16712', '34419', '71779', '44187', '46206', '96099', '61910', '53664', '12295', '31837', '33096', '10813', '63048', '31732', '79118', '73084', '72783', '84952', '46965', '77956', '39815', '32311', '75329', '48156', '30826', '49661', '13736', '92076', '74865', '88149', '92397', '52777', '68453', '32012', '21222', '52721', '24626', '18210', '42177', '91791', '75251', '82075', '44372', '45542', '20609', '60115', '17362', '22750', '90434', '31852', '54071', '33762', '14705', '40718', '56433', '30996', '40657', '49056', '23585', '66455', '41021', '74736', '72151', '37007', '21729', '60177', '84558', '59027', '93855', '60022', '86443', '19541', '86886', '30532', '39062', '48532', '34713', '52077', '22564', '64638', '15273', '31677', '36138', '62367', '60261', '80213', '42818', '25113', '72378', '69802', '69096', '55443', '28820', '13848', '78258', '37490', '30556', '77380', '28447', '44550', '26791', '70609', '82182', '33306', '43224', '22322', '86959', '68519', '14308', '46501', '81131', '34056', '61991', '19896', '87804', '65774', '92564')
INTERSECT
SELECT
"a1"."ca_zip" AS "ca_zip"
@@ -1244,7 +1244,7 @@ JOIN "date_dim" AS "date_dim"
JOIN "store" AS "store"
ON "store"."s_store_sk" = "store_sales"."ss_store_sk"
JOIN "a2" AS "a2"
- ON SUBSTR("a2"."ca_zip", 1, 2) = SUBSTR("store"."s_zip", 1, 2)
+ ON SUBSTRING("a2"."ca_zip", 1, 2) = SUBSTRING("store"."s_zip", 1, 2)
GROUP BY
"store"."s_store_name"
ORDER BY
@@ -2294,12 +2294,12 @@ SELECT
SUM("y"."number_sales") AS "_col_5"
FROM "y" AS "y"
GROUP BY
-ROLLUP (
- "y"."channel",
- "y"."i_brand_id",
- "y"."i_class_id",
- "y"."i_category_id"
-)
+ ROLLUP (
+ "y"."channel",
+ "y"."i_brand_id",
+ "y"."i_class_id",
+ "y"."i_category_id"
+ )
ORDER BY
"channel",
"i_brand_id",
@@ -2319,7 +2319,7 @@ FROM catalog_sales,
date_dim
WHERE cs_bill_customer_sk = c_customer_sk
AND c_current_addr_sk = ca_address_sk
- AND ( Substr(ca_zip, 1, 5) IN ( '85669', '86197', '88274', '83405',
+ AND ( SUBSTRING(ca_zip, 1, 5) IN ( '85669', '86197', '88274', '83405',
'86475', '85392', '85460', '80348',
'81792' )
OR ca_state IN ( 'CA', 'WA', 'GA' )
@@ -2344,7 +2344,7 @@ JOIN "customer_address" AS "customer_address"
ON (
"catalog_sales"."cs_sales_price" > 500
OR "customer_address"."ca_state" IN ('CA', 'WA', 'GA')
- OR SUBSTR("customer_address"."ca_zip", 1, 5) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792')
+ OR SUBSTRING("customer_address"."ca_zip", 1, 5) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792')
)
AND "customer"."c_current_addr_sk" = "customer_address"."ca_address_sk"
GROUP BY
@@ -2608,12 +2608,12 @@ JOIN "customer_address" AS "customer_address"
ON "customer"."c_current_addr_sk" = "customer_address"."ca_address_sk"
AND "customer_address"."ca_state" IN ('KS', 'IA', 'AL', 'UT', 'VA', 'NC', 'TX')
GROUP BY
-ROLLUP (
- "item"."i_item_id",
- "customer_address"."ca_country",
- "customer_address"."ca_state",
- "customer_address"."ca_county"
-)
+ ROLLUP (
+ "item"."i_item_id",
+ "customer_address"."ca_country",
+ "customer_address"."ca_state",
+ "customer_address"."ca_county"
+ )
ORDER BY
"ca_country",
"ca_state",
@@ -2643,7 +2643,7 @@ WHERE d_date_sk = ss_sold_date_sk
AND d_year = 1998
AND ss_customer_sk = c_customer_sk
AND c_current_addr_sk = ca_address_sk
- AND Substr(ca_zip, 1, 5) <> Substr(s_zip, 1, 5)
+ AND SUBSTRING(ca_zip, 1, 5) <> SUBSTRING(s_zip, 1, 5)
AND ss_store_sk = s_store_sk
GROUP BY i_brand,
i_brand_id,
@@ -2672,7 +2672,7 @@ JOIN "customer_address" AS "customer_address"
ON "customer"."c_current_addr_sk" = "customer_address"."ca_address_sk"
JOIN "store" AS "store"
ON "store"."s_store_sk" = "store_sales"."ss_store_sk"
- AND SUBSTR("customer_address"."ca_zip", 1, 5) <> SUBSTR("store"."s_zip", 1, 5)
+ AND SUBSTRING("customer_address"."ca_zip", 1, 5) <> SUBSTRING("store"."s_zip", 1, 5)
WHERE
"date_dim"."d_moy" = 12 AND "date_dim"."d_year" = 1998
GROUP BY
@@ -2876,12 +2876,12 @@ JOIN "item" AS "item"
JOIN "warehouse" AS "warehouse"
ON "inventory"."inv_warehouse_sk" = "warehouse"."w_warehouse_sk"
GROUP BY
-ROLLUP (
- "item"."i_product_name",
- "item"."i_brand",
- "item"."i_class",
- "item"."i_category"
-)
+ ROLLUP (
+ "item"."i_product_name",
+ "item"."i_brand",
+ "item"."i_class",
+ "item"."i_category"
+ )
ORDER BY
"qoh",
"i_product_name",
@@ -2895,7 +2895,7 @@ LIMIT 100;
--------------------------------------
# execute: true
WITH frequent_ss_items
- AS (SELECT Substr(i_item_desc, 1, 30) itemdesc,
+ AS (SELECT SUBSTRING(i_item_desc, 1, 30) itemdesc,
i_item_sk item_sk,
d_date solddate,
Count(*) cnt
@@ -2905,7 +2905,7 @@ WITH frequent_ss_items
WHERE ss_sold_date_sk = d_date_sk
AND ss_item_sk = i_item_sk
AND d_year IN ( 1998, 1998 + 1, 1998 + 2, 1998 + 3 )
- GROUP BY Substr(i_item_desc, 1, 30),
+ GROUP BY SUBSTRING(i_item_desc, 1, 30),
i_item_sk,
d_date
HAVING Count(*) > 4),
@@ -2962,7 +2962,7 @@ WITH "frequent_ss_items" AS (
JOIN "item" AS "item"
ON "item"."i_item_sk" = "store_sales"."ss_item_sk"
GROUP BY
- SUBSTR("item"."i_item_desc", 1, 30),
+ SUBSTRING("item"."i_item_desc", 1, 30),
"item"."i_item_sk",
"date_dim"."d_date"
HAVING
@@ -3347,10 +3347,10 @@ JOIN "store" AS "store"
ON "store"."s_state" IN ('TN', 'TN', 'TN', 'TN', 'TN', 'TN')
AND "store"."s_store_sk" = "store_sales"."ss_store_sk"
GROUP BY
-ROLLUP (
- "item"."i_item_id",
- "store"."s_state"
-)
+ ROLLUP (
+ "item"."i_item_id",
+ "store"."s_state"
+ )
ORDER BY
"i_item_id",
"s_state"
@@ -4463,10 +4463,10 @@ JOIN "store" AS "store"
ON "store"."s_state" IN ('TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN')
AND "store"."s_store_sk" = "store_sales"."ss_store_sk"
GROUP BY
-ROLLUP (
- "item"."i_category",
- "item"."i_class"
-)
+ ROLLUP (
+ "item"."i_category",
+ "item"."i_class"
+ )
ORDER BY
"lochierarchy" DESC,
CASE WHEN "lochierarchy" = 0 THEN "i_category" END,
@@ -5296,7 +5296,7 @@ FROM web_sales,
WHERE ws_bill_customer_sk = c_customer_sk
AND c_current_addr_sk = ca_address_sk
AND ws_item_sk = i_item_sk
- AND ( Substr(ca_zip, 1, 5) IN ( '85669', '86197', '88274', '83405',
+ AND ( SUBSTRING(ca_zip, 1, 5) IN ( '85669', '86197', '88274', '83405',
'86475', '85392', '85460', '80348',
'81792' )
OR i_item_id IN (SELECT i_item_id
@@ -5340,7 +5340,7 @@ JOIN "customer_address" AS "customer_address"
ON "customer"."c_current_addr_sk" = "customer_address"."ca_address_sk"
WHERE
NOT "_u_0"."i_item_id" IS NULL
- OR SUBSTR("customer_address"."ca_zip", 1, 5) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792')
+ OR SUBSTRING("customer_address"."ca_zip", 1, 5) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792')
GROUP BY
"customer_address"."ca_zip",
"customer_address"."ca_state"
@@ -7585,7 +7585,7 @@ LIMIT 100;
-- TPC-DS 62
--------------------------------------
# execute: true
-SELECT Substr(w_warehouse_name, 1, 20) AS "_col_0",
+SELECT SUBSTRING(w_warehouse_name, 1, 20) AS "_col_0",
sm_type,
web_name,
Sum(CASE
@@ -7622,15 +7622,15 @@ WHERE d_month_seq BETWEEN 1222 AND 1222 + 11
AND ws_warehouse_sk = w_warehouse_sk
AND ws_ship_mode_sk = sm_ship_mode_sk
AND ws_web_site_sk = web_site_sk
-GROUP BY Substr(w_warehouse_name, 1, 20),
+GROUP BY SUBSTRING(w_warehouse_name, 1, 20),
sm_type,
web_name
-ORDER BY Substr(w_warehouse_name, 1, 20),
+ORDER BY SUBSTRING(w_warehouse_name, 1, 20),
sm_type,
web_name
LIMIT 100;
SELECT
- SUBSTR("warehouse"."w_warehouse_name", 1, 20) AS "_col_0",
+ SUBSTRING("warehouse"."w_warehouse_name", 1, 20) AS "_col_0",
"ship_mode"."sm_type" AS "sm_type",
"web_site"."web_name" AS "web_name",
SUM(
@@ -7683,7 +7683,7 @@ JOIN "warehouse" AS "warehouse"
JOIN "web_site" AS "web_site"
ON "web_sales"."ws_web_site_sk" = "web_site"."web_site_sk"
GROUP BY
- SUBSTR("warehouse"."w_warehouse_name", 1, 20),
+ SUBSTRING("warehouse"."w_warehouse_name", 1, 20),
"ship_mode"."sm_type",
"web_site"."web_name"
ORDER BY
@@ -9017,16 +9017,16 @@ WITH "dw1" AS (
JOIN "store" AS "store"
ON "store"."s_store_sk" = "store_sales"."ss_store_sk"
GROUP BY
- ROLLUP (
- "item"."i_category",
- "item"."i_class",
- "item"."i_brand",
- "item"."i_product_name",
- "date_dim"."d_year",
- "date_dim"."d_qoy",
- "date_dim"."d_moy",
- "store"."s_store_id"
- )
+ ROLLUP (
+ "item"."i_category",
+ "item"."i_class",
+ "item"."i_brand",
+ "item"."i_product_name",
+ "date_dim"."d_year",
+ "date_dim"."d_qoy",
+ "date_dim"."d_moy",
+ "store"."s_store_id"
+ )
), "dw2" AS (
SELECT
"dw1"."i_category" AS "i_category",
@@ -9396,10 +9396,10 @@ LEFT JOIN "_u_0" AS "_u_0"
WHERE
NOT "_u_0"."s_state" IS NULL
GROUP BY
-ROLLUP (
- "store"."s_state",
- "store"."s_county"
-)
+ ROLLUP (
+ "store"."s_state",
+ "store"."s_county"
+ )
ORDER BY
"lochierarchy" DESC,
CASE WHEN "lochierarchy" = 0 THEN "s_state" END,
@@ -10433,10 +10433,10 @@ SELECT
SUM("x"."profit") AS "profit"
FROM "x" AS "x"
GROUP BY
-ROLLUP (
- "x"."channel",
- "x"."id"
-)
+ ROLLUP (
+ "x"."channel",
+ "x"."id"
+ )
ORDER BY
"channel",
"id"
@@ -10638,7 +10638,7 @@ LIMIT 100;
# execute: true
SELECT c_last_name,
c_first_name,
- Substr(s_city, 1, 30) AS "_col_2",
+ SUBSTRING(s_city, 1, 30) AS "_col_2",
ss_ticket_number,
amt,
profit
@@ -10667,7 +10667,7 @@ FROM (SELECT ss_ticket_number,
WHERE ss_customer_sk = c_customer_sk
ORDER BY c_last_name,
c_first_name,
- Substr(s_city, 1, 30),
+ SUBSTRING(s_city, 1, 30),
profit
LIMIT 100;
WITH "ms" AS (
@@ -10701,7 +10701,7 @@ WITH "ms" AS (
SELECT
"customer"."c_last_name" AS "c_last_name",
"customer"."c_first_name" AS "c_first_name",
- SUBSTR("ms"."s_city", 1, 30) AS "_col_2",
+ SUBSTRING("ms"."s_city", 1, 30) AS "_col_2",
"ms"."ss_ticket_number" AS "ss_ticket_number",
"ms"."amt" AS "amt",
"ms"."profit" AS "profit"
@@ -10711,7 +10711,7 @@ JOIN "customer" AS "customer"
ORDER BY
"c_last_name",
"c_first_name",
- SUBSTR("ms"."s_city", 1, 30),
+ SUBSTRING("ms"."s_city", 1, 30),
"profit"
LIMIT 100;
@@ -10937,10 +10937,10 @@ SELECT
SUM("x"."profit") AS "profit"
FROM "x" AS "x"
GROUP BY
-ROLLUP (
- "x"."channel",
- "x"."id"
-)
+ ROLLUP (
+ "x"."channel",
+ "x"."id"
+ )
ORDER BY
"channel",
"id"
@@ -11371,7 +11371,7 @@ LIMIT 100;
-- TPC-DS 85
--------------------------------------
# execute: true
-SELECT Substr(r_reason_desc, 1, 20) AS "_col_0",
+SELECT SUBSTRING(r_reason_desc, 1, 20) AS "_col_0",
Avg(ws_quantity) AS "_col_1",
Avg(wr_refunded_cash) AS "_col_2",
Avg(wr_fee) AS "_col_3"
@@ -11417,13 +11417,13 @@ WHERE ws_web_page_sk = wp_web_page_sk
AND ca_state IN ( 'FL', 'WI', 'KS' )
AND ws_net_profit BETWEEN 50 AND 250 ) )
GROUP BY r_reason_desc
-ORDER BY Substr(r_reason_desc, 1, 20),
+ORDER BY SUBSTRING(r_reason_desc, 1, 20),
Avg(ws_quantity),
Avg(wr_refunded_cash),
Avg(wr_fee)
LIMIT 100;
SELECT
- SUBSTR("reason"."r_reason_desc", 1, 20) AS "_col_0",
+ SUBSTRING("reason"."r_reason_desc", 1, 20) AS "_col_0",
AVG("web_sales"."ws_quantity") AS "_col_1",
AVG("web_returns"."wr_refunded_cash") AS "_col_2",
AVG("web_returns"."wr_fee") AS "_col_3"
@@ -11539,10 +11539,10 @@ JOIN "date_dim" AS "d1"
JOIN "item" AS "item"
ON "item"."i_item_sk" = "web_sales"."ws_item_sk"
GROUP BY
-ROLLUP (
- "item"."i_category",
- "item"."i_class"
-)
+ ROLLUP (
+ "item"."i_category",
+ "item"."i_class"
+ )
ORDER BY
"lochierarchy" DESC,
CASE WHEN "lochierarchy" = 0 THEN "i_category" END,
@@ -12617,7 +12617,7 @@ ORDER BY
-- TPC-DS 99
--------------------------------------
# execute: true
-SELECT Substr(w_warehouse_name, 1, 20) AS "_col_0",
+SELECT SUBSTRING(w_warehouse_name, 1, 20) AS "_col_0",
sm_type,
cc_name,
Sum(CASE
@@ -12654,15 +12654,15 @@ WHERE d_month_seq BETWEEN 1200 AND 1200 + 11
AND cs_warehouse_sk = w_warehouse_sk
AND cs_ship_mode_sk = sm_ship_mode_sk
AND cs_call_center_sk = cc_call_center_sk
-GROUP BY Substr(w_warehouse_name, 1, 20),
+GROUP BY SUBSTRING(w_warehouse_name, 1, 20),
sm_type,
cc_name
-ORDER BY Substr(w_warehouse_name, 1, 20),
+ORDER BY SUBSTRING(w_warehouse_name, 1, 20),
sm_type,
cc_name
LIMIT 100;
SELECT
- SUBSTR("warehouse"."w_warehouse_name", 1, 20) AS "_col_0",
+ SUBSTRING("warehouse"."w_warehouse_name", 1, 20) AS "_col_0",
"ship_mode"."sm_type" AS "sm_type",
"call_center"."cc_name" AS "cc_name",
SUM(
@@ -12715,7 +12715,7 @@ JOIN "ship_mode" AS "ship_mode"
JOIN "warehouse" AS "warehouse"
ON "catalog_sales"."cs_warehouse_sk" = "warehouse"."w_warehouse_sk"
GROUP BY
- SUBSTR("warehouse"."w_warehouse_name", 1, 20),
+ SUBSTRING("warehouse"."w_warehouse_name", 1, 20),
"ship_mode"."sm_type",
"call_center"."cc_name"
ORDER BY
diff --git a/tests/fixtures/optimizer/tpc-h/tpc-h.sql b/tests/fixtures/optimizer/tpc-h/tpc-h.sql
index c131643..ed7a689 100644
--- a/tests/fixtures/optimizer/tpc-h/tpc-h.sql
+++ b/tests/fixtures/optimizer/tpc-h/tpc-h.sql
@@ -375,7 +375,7 @@ order by
SELECT
"n1"."n_name" AS "supp_nation",
"n2"."n_name" AS "cust_nation",
- EXTRACT(year FROM CAST("lineitem"."l_shipdate" AS DATE)) AS "l_year",
+ EXTRACT(YEAR FROM CAST("lineitem"."l_shipdate" AS DATE)) AS "l_year",
SUM("lineitem"."l_extendedprice" * (
1 - "lineitem"."l_discount"
)) AS "revenue"
@@ -407,7 +407,7 @@ JOIN "nation" AS "n2"
GROUP BY
"n1"."n_name",
"n2"."n_name",
- EXTRACT(year FROM CAST("lineitem"."l_shipdate" AS DATE))
+ EXTRACT(YEAR FROM CAST("lineitem"."l_shipdate" AS DATE))
ORDER BY
"supp_nation",
"cust_nation",
@@ -425,7 +425,7 @@ select
from
(
select
- extract(year from cast(o_orderdate as date)) as o_year,
+ extract(YEAR from cast(o_orderdate as date)) as o_year,
l_extendedprice * (1 - l_discount) as volume,
n2.n_name as nation
from
@@ -454,7 +454,7 @@ group by
order by
o_year;
SELECT
- EXTRACT(year FROM CAST("orders"."o_orderdate" AS DATE)) AS "o_year",
+ EXTRACT(YEAR FROM CAST("orders"."o_orderdate" AS DATE)) AS "o_year",
SUM(
CASE
WHEN "n2"."n_name" = 'BRAZIL'
@@ -486,7 +486,7 @@ JOIN "region" AS "region"
WHERE
"part"."p_type" = 'ECONOMY ANODIZED STEEL'
GROUP BY
- EXTRACT(year FROM CAST("orders"."o_orderdate" AS DATE))
+ EXTRACT(YEAR FROM CAST("orders"."o_orderdate" AS DATE))
ORDER BY
"o_year";
@@ -527,7 +527,7 @@ order by
o_year desc;
SELECT
"nation"."n_name" AS "nation",
- EXTRACT(year FROM CAST("orders"."o_orderdate" AS DATE)) AS "o_year",
+ EXTRACT(YEAR FROM CAST("orders"."o_orderdate" AS DATE)) AS "o_year",
SUM(
"lineitem"."l_extendedprice" * (
1 - "lineitem"."l_discount"
@@ -549,7 +549,7 @@ WHERE
"part"."p_name" LIKE '%green%'
GROUP BY
"nation"."n_name",
- EXTRACT(year FROM CAST("orders"."o_orderdate" AS DATE))
+ EXTRACT(YEAR FROM CAST("orders"."o_orderdate" AS DATE))
ORDER BY
"nation",
"o_year" DESC;
diff --git a/tests/fixtures/pretty.sql b/tests/fixtures/pretty.sql
index 62ba01c..3e5619a 100644
--- a/tests/fixtures/pretty.sql
+++ b/tests/fixtures/pretty.sql
@@ -99,14 +99,14 @@ WITH cte1 AS (
FOO(CASE WHEN a AND b THEN c AND d ELSE 3 END)
GROUP BY
x,
- GROUPING SETS (
- a,
- (b, c)
- ),
- CUBE (
- y,
- z
- )
+ GROUPING SETS (
+ a,
+ (b, c)
+ ),
+ CUBE (
+ y,
+ z
+ )
) AS x
)
SELECT
@@ -395,3 +395,26 @@ JOIN b
JOIN d
USING (f)
USING (g);
+
+('aaaaaaaaaaa', 'bbbbbbbbbbbbbbbb', 'ccccccccccccc', 'ddddddddddd', 'eeeeeeeeeeeeeeeeeeeee');
+(
+ 'aaaaaaaaaaa',
+ 'bbbbbbbbbbbbbbbb',
+ 'ccccccccccccc',
+ 'ddddddddddd',
+ 'eeeeeeeeeeeeeeeeeeeee'
+);
+
+/* COMMENT */
+INSERT FIRST WHEN salary > 4000 THEN INTO emp2
+ WHEN salary > 5000 THEN INTO emp3
+ WHEN salary > 6000 THEN INTO emp4
+SELECT salary FROM employees;
+/* COMMENT */
+INSERT FIRST
+ WHEN salary > 4000 THEN INTO emp2
+ WHEN salary > 5000 THEN INTO emp3
+ WHEN salary > 6000 THEN INTO emp4
+SELECT
+ salary
+FROM employees;
diff --git a/tests/test_build.py b/tests/test_build.py
index 150bb42..7518b72 100644
--- a/tests/test_build.py
+++ b/tests/test_build.py
@@ -361,10 +361,34 @@ class TestBuild(unittest.TestCase):
(
lambda: select("x")
.from_("tbl")
+ .with_("tbl", as_="SELECT x FROM tbl2", materialized=True),
+ "WITH tbl AS MATERIALIZED (SELECT x FROM tbl2) SELECT x FROM tbl",
+ ),
+ (
+ lambda: select("x")
+ .from_("tbl")
+ .with_("tbl", as_="SELECT x FROM tbl2", materialized=False),
+ "WITH tbl AS NOT MATERIALIZED (SELECT x FROM tbl2) SELECT x FROM tbl",
+ ),
+ (
+ lambda: select("x")
+ .from_("tbl")
.with_("tbl", as_="SELECT x FROM tbl2", recursive=True),
"WITH RECURSIVE tbl AS (SELECT x FROM tbl2) SELECT x FROM tbl",
),
(
+ lambda: select("x")
+ .from_("tbl")
+ .with_("tbl", as_=select("x").from_("tbl2"), recursive=True, materialized=True),
+ "WITH RECURSIVE tbl AS MATERIALIZED (SELECT x FROM tbl2) SELECT x FROM tbl",
+ ),
+ (
+ lambda: select("x")
+ .from_("tbl")
+ .with_("tbl", as_=select("x").from_("tbl2"), recursive=True, materialized=False),
+ "WITH RECURSIVE tbl AS NOT MATERIALIZED (SELECT x FROM tbl2) SELECT x FROM tbl",
+ ),
+ (
lambda: select("x").from_("tbl").with_("tbl", as_=select("x").from_("tbl2")),
"WITH tbl AS (SELECT x FROM tbl2) SELECT x FROM tbl",
),
@@ -677,6 +701,18 @@ class TestBuild(unittest.TestCase):
"WITH cte AS (SELECT x FROM tbl) INSERT INTO t SELECT * FROM cte",
),
(
+ lambda: exp.insert("SELECT * FROM cte", "t").with_(
+ "cte", as_="SELECT x FROM tbl", materialized=True
+ ),
+ "WITH cte AS MATERIALIZED (SELECT x FROM tbl) INSERT INTO t SELECT * FROM cte",
+ ),
+ (
+ lambda: exp.insert("SELECT * FROM cte", "t").with_(
+ "cte", as_="SELECT x FROM tbl", materialized=False
+ ),
+ "WITH cte AS NOT MATERIALIZED (SELECT x FROM tbl) INSERT INTO t SELECT * FROM cte",
+ ),
+ (
lambda: exp.convert((exp.column("x"), exp.column("y"))).isin((1, 2), (3, 4)),
"(x, y) IN ((1, 2), (3, 4))",
"postgres",
@@ -695,6 +731,46 @@ class TestBuild(unittest.TestCase):
lambda: exp.rename_column("table1", "c1", "c2"),
"ALTER TABLE table1 RENAME COLUMN c1 TO c2",
),
+ (
+ lambda: exp.merge(
+ "WHEN MATCHED THEN UPDATE SET col1 = source.col1",
+ "WHEN NOT MATCHED THEN INSERT (col1) VALUES (source.col1)",
+ into="target_table",
+ using="source_table",
+ on="target_table.id = source_table.id",
+ ),
+ "MERGE INTO target_table USING source_table ON target_table.id = source_table.id WHEN MATCHED THEN UPDATE SET col1 = source.col1 WHEN NOT MATCHED THEN INSERT (col1) VALUES (source.col1)",
+ ),
+ (
+ lambda: exp.merge(
+ "WHEN MATCHED AND source.is_deleted = 1 THEN DELETE",
+ "WHEN MATCHED THEN UPDATE SET val = source.val",
+ "WHEN NOT MATCHED THEN INSERT (id, val) VALUES (source.id, source.val)",
+ into="target_table",
+ using="source_table",
+ on="target_table.id = source_table.id",
+ ),
+ "MERGE INTO target_table USING source_table ON target_table.id = source_table.id WHEN MATCHED AND source.is_deleted = 1 THEN DELETE WHEN MATCHED THEN UPDATE SET val = source.val WHEN NOT MATCHED THEN INSERT (id, val) VALUES (source.id, source.val)",
+ ),
+ (
+ lambda: exp.merge(
+ "WHEN MATCHED THEN UPDATE SET target.name = source.name",
+ into=exp.table_("target_table").as_("target"),
+ using=exp.table_("source_table").as_("source"),
+ on="target.id = source.id",
+ ),
+ "MERGE INTO target_table AS target USING source_table AS source ON target.id = source.id WHEN MATCHED THEN UPDATE SET target.name = source.name",
+ ),
+ (
+ lambda: exp.merge(
+ "WHEN MATCHED THEN UPDATE SET target.name = source.name",
+ into=exp.table_("target_table").as_("target"),
+ using=exp.table_("source_table").as_("source"),
+ on="target.id = source.id",
+ returning="target.*",
+ ),
+ "MERGE INTO target_table AS target USING source_table AS source ON target.id = source.id WHEN MATCHED THEN UPDATE SET target.name = source.name RETURNING target.*",
+ ),
]:
with self.subTest(sql):
self.assertEqual(expression().sql(dialect[0] if dialect else None), sql)
diff --git a/tests/test_diff.py b/tests/test_diff.py
index fa012a8..f83c805 100644
--- a/tests/test_diff.py
+++ b/tests/test_diff.py
@@ -1,14 +1,18 @@
import unittest
from sqlglot import exp, parse_one
-from sqlglot.diff import Insert, Keep, Move, Remove, Update, diff
+from sqlglot.diff import Insert, Move, Remove, Update, diff
from sqlglot.expressions import Join, to_table
+def diff_delta_only(source, target, matchings=None, **kwargs):
+ return diff(source, target, matchings=matchings, delta_only=True, **kwargs)
+
+
class TestDiff(unittest.TestCase):
def test_simple(self):
self._validate_delta_only(
- diff(parse_one("SELECT a + b"), parse_one("SELECT a - b")),
+ diff_delta_only(parse_one("SELECT a + b"), parse_one("SELECT a - b")),
[
Remove(parse_one("a + b")), # the Add node
Insert(parse_one("a - b")), # the Sub node
@@ -16,21 +20,21 @@ class TestDiff(unittest.TestCase):
)
self._validate_delta_only(
- diff(parse_one("SELECT a, b, c"), parse_one("SELECT a, c")),
+ diff_delta_only(parse_one("SELECT a, b, c"), parse_one("SELECT a, c")),
[
Remove(parse_one("b")), # the Column node
],
)
self._validate_delta_only(
- diff(parse_one("SELECT a, b"), parse_one("SELECT a, b, c")),
+ diff_delta_only(parse_one("SELECT a, b"), parse_one("SELECT a, b, c")),
[
Insert(parse_one("c")), # the Column node
],
)
self._validate_delta_only(
- diff(
+ diff_delta_only(
parse_one("SELECT a FROM table_one"),
parse_one("SELECT a FROM table_two"),
),
@@ -44,7 +48,9 @@ class TestDiff(unittest.TestCase):
def test_lambda(self):
self._validate_delta_only(
- diff(parse_one("SELECT a, b, c, x(a -> a)"), parse_one("SELECT a, b, c, x(b -> b)")),
+ diff_delta_only(
+ parse_one("SELECT a, b, c, x(a -> a)"), parse_one("SELECT a, b, c, x(b -> b)")
+ ),
[
Update(
exp.Lambda(this=exp.to_identifier("a"), expressions=[exp.to_identifier("a")]),
@@ -55,14 +61,16 @@ class TestDiff(unittest.TestCase):
def test_udf(self):
self._validate_delta_only(
- diff(parse_one('SELECT a, b, "my.udf1"()'), parse_one('SELECT a, b, "my.udf2"()')),
+ diff_delta_only(
+ parse_one('SELECT a, b, "my.udf1"()'), parse_one('SELECT a, b, "my.udf2"()')
+ ),
[
Insert(parse_one('"my.udf2"()')),
Remove(parse_one('"my.udf1"()')),
],
)
self._validate_delta_only(
- diff(
+ diff_delta_only(
parse_one('SELECT a, b, "my.udf"(x, y, z)'),
parse_one('SELECT a, b, "my.udf"(x, y, w)'),
),
@@ -74,28 +82,28 @@ class TestDiff(unittest.TestCase):
def test_node_position_changed(self):
self._validate_delta_only(
- diff(parse_one("SELECT a, b, c"), parse_one("SELECT c, a, b")),
+ diff_delta_only(parse_one("SELECT a, b, c"), parse_one("SELECT c, a, b")),
[
Move(parse_one("c")), # the Column node
],
)
self._validate_delta_only(
- diff(parse_one("SELECT a + b"), parse_one("SELECT b + a")),
+ diff_delta_only(parse_one("SELECT a + b"), parse_one("SELECT b + a")),
[
Move(parse_one("a")), # the Column node
],
)
self._validate_delta_only(
- diff(parse_one("SELECT aaaa AND bbbb"), parse_one("SELECT bbbb AND aaaa")),
+ diff_delta_only(parse_one("SELECT aaaa AND bbbb"), parse_one("SELECT bbbb AND aaaa")),
[
Move(parse_one("aaaa")), # the Column node
],
)
self._validate_delta_only(
- diff(
+ diff_delta_only(
parse_one("SELECT aaaa OR bbbb OR cccc"),
parse_one("SELECT cccc OR bbbb OR aaaa"),
),
@@ -120,7 +128,7 @@ class TestDiff(unittest.TestCase):
"""
self._validate_delta_only(
- diff(parse_one(expr_src), parse_one(expr_tgt)),
+ diff_delta_only(parse_one(expr_src), parse_one(expr_tgt)),
[
Remove(parse_one("LOWER(c) AS c")), # the Alias node
Remove(parse_one("LOWER(c)")), # the Lower node
@@ -133,8 +141,7 @@ class TestDiff(unittest.TestCase):
expr_src = "SELECT a, b FROM t1 LEFT JOIN t2 ON t1.key = t2.key"
expr_tgt = "SELECT a, b FROM t1 RIGHT JOIN t2 ON t1.key = t2.key"
- changes = diff(parse_one(expr_src), parse_one(expr_tgt))
- changes = _delta_only(changes)
+ changes = diff_delta_only(parse_one(expr_src), parse_one(expr_tgt))
self.assertEqual(len(changes), 2)
self.assertTrue(isinstance(changes[0], Remove))
@@ -145,10 +152,10 @@ class TestDiff(unittest.TestCase):
expr_src = parse_one("SELECT ROW_NUMBER() OVER (PARTITION BY a ORDER BY b)")
expr_tgt = parse_one("SELECT RANK() OVER (PARTITION BY a ORDER BY b)")
- self._validate_delta_only(diff(expr_src, expr_src), [])
+ self._validate_delta_only(diff_delta_only(expr_src, expr_src), [])
self._validate_delta_only(
- diff(expr_src, expr_tgt),
+ diff_delta_only(expr_src, expr_tgt),
[
Remove(parse_one("ROW_NUMBER()")), # the Anonymous node
Insert(parse_one("RANK()")), # the Anonymous node
@@ -160,7 +167,7 @@ class TestDiff(unittest.TestCase):
expr_tgt = parse_one("SELECT 1, 2, 3, 4")
self._validate_delta_only(
- diff(expr_src, expr_tgt),
+ diff_delta_only(expr_src, expr_tgt),
[
Remove(expr_src),
Insert(expr_tgt),
@@ -171,7 +178,7 @@ class TestDiff(unittest.TestCase):
)
self._validate_delta_only(
- diff(expr_src, expr_tgt, matchings=[(expr_src, expr_tgt)]),
+ diff_delta_only(expr_src, expr_tgt, matchings=[(expr_src, expr_tgt)]),
[
Insert(exp.Literal.number(2)),
Insert(exp.Literal.number(3)),
@@ -180,23 +187,20 @@ class TestDiff(unittest.TestCase):
)
with self.assertRaises(ValueError):
- diff(expr_src, expr_tgt, matchings=[(expr_src, expr_tgt), (expr_src, expr_tgt)])
+ diff_delta_only(
+ expr_src, expr_tgt, matchings=[(expr_src, expr_tgt), (expr_src, expr_tgt)]
+ )
def test_identifier(self):
expr_src = parse_one("SELECT a FROM tbl")
expr_tgt = parse_one("SELECT a, tbl.b from tbl")
self._validate_delta_only(
- diff(expr_src, expr_tgt),
+ diff_delta_only(expr_src, expr_tgt),
[
Insert(expression=exp.to_column("tbl.b")),
],
)
- def _validate_delta_only(self, actual_diff, expected_delta):
- actual_delta = _delta_only(actual_diff)
+ def _validate_delta_only(self, actual_delta, expected_delta):
self.assertEqual(set(actual_delta), set(expected_delta))
-
-
-def _delta_only(changes):
- return [d for d in changes if not isinstance(d, Keep)]
diff --git a/tests/test_executor.py b/tests/test_executor.py
index 317b930..e80fb1e 100644
--- a/tests/test_executor.py
+++ b/tests/test_executor.py
@@ -14,6 +14,8 @@ from sqlglot.errors import ExecuteError
from sqlglot.executor import execute
from sqlglot.executor.python import Python
from sqlglot.executor.table import Table, ensure_tables
+from sqlglot.optimizer import optimize
+from sqlglot.planner import Plan
from tests.helpers import (
FIXTURES_DIR,
SKIP_INTEGRATION,
@@ -862,3 +864,18 @@ class TestExecutor(unittest.TestCase):
result = execute("SELECT x FROM t", dialect="duckdb", tables=tables)
self.assertEqual(result.columns, ("x",))
self.assertEqual(result.rows, [([1, 2, 3],)])
+
+ def test_agg_order(self):
+ plan = Plan(
+ optimize("""
+ SELECT
+ AVG(bill_length_mm) AS avg_bill_length,
+ AVG(bill_depth_mm) AS avg_bill_depth
+ FROM penguins
+ """)
+ )
+
+ assert [agg.alias for agg in plan.root.aggregations] == [
+ "avg_bill_length",
+ "avg_bill_depth",
+ ]
diff --git a/tests/test_expressions.py b/tests/test_expressions.py
index 1395b24..e88740b 100644
--- a/tests/test_expressions.py
+++ b/tests/test_expressions.py
@@ -1,3 +1,4 @@
+import sys
import datetime
import math
import unittest
@@ -431,6 +432,31 @@ class TestExpressions(unittest.TestCase):
table = expression.find(exp.Table)
self.assertEqual(table.alias_column_names, ["a", "b"])
+ def test_cast(self):
+ expression = parse_one("select cast(x as DATE)")
+ casts = list(expression.find_all(exp.Cast))
+ self.assertEqual(len(casts), 1)
+
+ cast = casts[0]
+ self.assertTrue(cast.to.is_type(exp.DataType.Type.DATE))
+
+ # check that already cast values arent re-cast if wrapped in a cast to the same type
+ recast = exp.cast(cast, to=exp.DataType.Type.DATE)
+ self.assertEqual(recast, cast)
+ self.assertEqual(recast.sql(), "CAST(x AS DATE)")
+
+ # however, recasting is fine if the types are different
+ recast = exp.cast(cast, to=exp.DataType.Type.VARCHAR)
+ self.assertNotEqual(recast, cast)
+ self.assertEqual(len(list(recast.find_all(exp.Cast))), 2)
+ self.assertEqual(recast.sql(), "CAST(CAST(x AS DATE) AS VARCHAR)")
+
+ # check that dialect is used when casting strings
+ self.assertEqual(
+ exp.cast("x", to="regtype", dialect="postgres").sql(), "CAST(x AS REGTYPE)"
+ )
+ self.assertEqual(exp.cast("`x`", to="date", dialect="hive").sql(), 'CAST("x" AS DATE)')
+
def test_ctes(self):
expression = parse_one("SELECT a FROM x")
self.assertEqual(expression.ctes, [])
@@ -648,6 +674,8 @@ class TestExpressions(unittest.TestCase):
self.assertIsInstance(parse_one("STR_POSITION(a, 'test')"), exp.StrPosition)
self.assertIsInstance(parse_one("STR_TO_UNIX(a, 'format')"), exp.StrToUnix)
self.assertIsInstance(parse_one("STRUCT_EXTRACT(a, 'test')"), exp.StructExtract)
+ self.assertIsInstance(parse_one("SUBSTR('a', 1, 1)"), exp.Substring)
+ self.assertIsInstance(parse_one("SUBSTRING('a', 1, 1)"), exp.Substring)
self.assertIsInstance(parse_one("SUM(a)"), exp.Sum)
self.assertIsInstance(parse_one("SQRT(a)"), exp.Sqrt)
self.assertIsInstance(parse_one("STDDEV(a)"), exp.Stddev)
@@ -657,7 +685,10 @@ class TestExpressions(unittest.TestCase):
self.assertIsInstance(parse_one("TIME_TO_TIME_STR(a)"), exp.Cast)
self.assertIsInstance(parse_one("TIME_TO_UNIX(a)"), exp.TimeToUnix)
self.assertIsInstance(parse_one("TIME_STR_TO_DATE(a)"), exp.TimeStrToDate)
- self.assertIsInstance(parse_one("TIME_STR_TO_TIME(a)"), exp.TimeStrToTime)
+ (self.assertIsInstance(parse_one("TIME_STR_TO_TIME(a)"), exp.TimeStrToTime),)
+ self.assertIsInstance(
+ parse_one("TIME_STR_TO_TIME(a, 'America/Los_Angeles')"), exp.TimeStrToTime
+ )
self.assertIsInstance(parse_one("TIME_STR_TO_UNIX(a)"), exp.TimeStrToUnix)
self.assertIsInstance(parse_one("TRIM(LEADING 'b' FROM 'bla')"), exp.Trim)
self.assertIsInstance(parse_one("TS_OR_DS_ADD(a, 1, 'day')"), exp.TsOrDsAdd)
@@ -791,6 +822,7 @@ class TestExpressions(unittest.TestCase):
def test_convert(self):
from collections import namedtuple
+ import pytz
PointTuple = namedtuple("Point", ["x", "y"])
@@ -809,11 +841,17 @@ class TestExpressions(unittest.TestCase):
({"x": None}, "MAP(ARRAY('x'), ARRAY(NULL))"),
(
datetime.datetime(2022, 10, 1, 1, 1, 1, 1),
- "TIME_STR_TO_TIME('2022-10-01 01:01:01.000001+00:00')",
+ "TIME_STR_TO_TIME('2022-10-01 01:01:01.000001')",
),
(
datetime.datetime(2022, 10, 1, 1, 1, 1, tzinfo=datetime.timezone.utc),
- "TIME_STR_TO_TIME('2022-10-01 01:01:01+00:00')",
+ "TIME_STR_TO_TIME('2022-10-01 01:01:01+00:00', 'UTC')",
+ ),
+ (
+ pytz.timezone("America/Los_Angeles").localize(
+ datetime.datetime(2022, 10, 1, 1, 1, 1)
+ ),
+ "TIME_STR_TO_TIME('2022-10-01 01:01:01-07:00', 'America/Los_Angeles')",
),
(datetime.date(2022, 10, 1), "DATE_STR_TO_DATE('2022-10-01')"),
(math.nan, "NULL"),
@@ -829,6 +867,21 @@ class TestExpressions(unittest.TestCase):
"MAP_FROM_ARRAYS(ARRAY('test'), ARRAY('value'))",
)
+ @unittest.skipUnless(sys.version_info >= (3, 9), "zoneinfo only available from python 3.9+")
+ def test_convert_python39(self):
+ import zoneinfo
+
+ for value, expected in [
+ (
+ datetime.datetime(
+ 2022, 10, 1, 1, 1, 1, tzinfo=zoneinfo.ZoneInfo("America/Los_Angeles")
+ ),
+ "TIME_STR_TO_TIME('2022-10-01 01:01:01-07:00', 'America/Los_Angeles')",
+ )
+ ]:
+ with self.subTest(value):
+ self.assertEqual(exp.convert(value).sql(), expected)
+
def test_comment_alias(self):
sql = """
SELECT
@@ -984,7 +1037,6 @@ FROM foo""",
self.assertEqual(exp.DataType.build("GEOGRAPHY").sql(), "GEOGRAPHY")
self.assertEqual(exp.DataType.build("GEOMETRY").sql(), "GEOMETRY")
self.assertEqual(exp.DataType.build("STRUCT").sql(), "STRUCT")
- self.assertEqual(exp.DataType.build("NULLABLE").sql(), "NULLABLE")
self.assertEqual(exp.DataType.build("HLLSKETCH", dialect="redshift").sql(), "HLLSKETCH")
self.assertEqual(exp.DataType.build("HSTORE", dialect="postgres").sql(), "HSTORE")
self.assertEqual(exp.DataType.build("NULL").sql(), "NULL")
@@ -993,14 +1045,15 @@ FROM foo""",
self.assertEqual(exp.DataType.build("UNKNOWN", dialect="bigquery").sql(), "UNKNOWN")
self.assertEqual(exp.DataType.build("UNKNOWN", dialect="snowflake").sql(), "UNKNOWN")
self.assertEqual(exp.DataType.build("TIMESTAMP", dialect="bigquery").sql(), "TIMESTAMPTZ")
- self.assertEqual(
- exp.DataType.build("struct<x int>", dialect="spark").sql(), "STRUCT<x INT>"
- )
self.assertEqual(exp.DataType.build("USER-DEFINED").sql(), "USER-DEFINED")
-
self.assertEqual(exp.DataType.build("ARRAY<UNKNOWN>").sql(), "ARRAY<UNKNOWN>")
self.assertEqual(exp.DataType.build("ARRAY<NULL>").sql(), "ARRAY<NULL>")
self.assertEqual(exp.DataType.build("varchar(100) collate 'en-ci'").sql(), "VARCHAR(100)")
+ self.assertEqual(exp.DataType.build("int[3]").sql(dialect="duckdb"), "INT[3]")
+ self.assertEqual(exp.DataType.build("int[3][3]").sql(dialect="duckdb"), "INT[3][3]")
+ self.assertEqual(
+ exp.DataType.build("struct<x int>", dialect="spark").sql(), "STRUCT<x INT>"
+ )
with self.assertRaises(ParseError):
exp.DataType.build("varchar(")
@@ -1011,12 +1064,18 @@ FROM foo""",
"ALTER TABLE t1 RENAME TO t2",
)
- def test_is_negative(self):
- self.assertTrue(parse_one("-1").is_negative)
- self.assertTrue(parse_one("- 1.0").is_negative)
- self.assertTrue(exp.Literal.number("-1").is_negative)
- self.assertFalse(parse_one("1").is_negative)
- self.assertFalse(parse_one("x").is_negative)
+ def test_to_py(self):
+ self.assertEqual(parse_one("- -1").to_py(), 1)
+ self.assertIs(parse_one("TRUE").to_py(), True)
+ self.assertIs(parse_one("1").to_py(), 1)
+ self.assertIs(parse_one("'1'").to_py(), "1")
+ self.assertIs(parse_one("null").to_py(), None)
+
+ with self.assertRaises(ValueError):
+ parse_one("x").to_py()
+
+ def test_is_int(self):
+ self.assertTrue(parse_one("- -1").is_int)
def test_is_star(self):
assert parse_one("*").is_star
@@ -1099,6 +1158,10 @@ FROM foo""",
dtype = exp.DataType.build("a.b.c", udt=True)
assert dtype.is_type("a.b.c")
+ dtype = exp.DataType.build("Nullable(Int32)", dialect="clickhouse")
+ assert dtype.is_type("int")
+ assert not dtype.is_type("int", check_nullable=True)
+
with self.assertRaises(ParseError):
exp.DataType.build("foo")
@@ -1114,3 +1177,6 @@ FROM foo""",
AssertionError, "x is not <class 'sqlglot.expressions.Identifier'>\\."
):
parse_one("x").assert_is(exp.Identifier)
+
+ def test_parse_identifier(self):
+ self.assertEqual(exp.parse_identifier("a ' b"), exp.to_identifier("a ' b"))
diff --git a/tests/test_generator.py b/tests/test_generator.py
index a5945b2..7609831 100644
--- a/tests/test_generator.py
+++ b/tests/test_generator.py
@@ -43,3 +43,7 @@ class TestGenerator(unittest.TestCase):
assert parse_one("X").sql(identify="safe") == "X"
assert parse_one("x as 1").sql(identify="safe") == '"x" AS "1"'
assert parse_one("X as 1").sql(identify="safe") == 'X AS "1"'
+
+ def test_generate_nested_binary(self):
+ sql = "SELECT 'foo'" + (" || 'foo'" * 1000)
+ self.assertEqual(parse_one(sql).sql(copy=False), sql)
diff --git a/tests/test_jsonpath.py b/tests/test_jsonpath.py
index 4daf3c1..c939c52 100644
--- a/tests/test_jsonpath.py
+++ b/tests/test_jsonpath.py
@@ -2,8 +2,9 @@ import json
import os
import unittest
-from sqlglot import exp, jsonpath
+from sqlglot import exp
from sqlglot.errors import ParseError, TokenError
+from sqlglot.jsonpath import parse
from tests.helpers import FIXTURES_DIR
@@ -25,7 +26,7 @@ class TestJsonpath(unittest.TestCase):
exp.JSONPathSelector(this=exp.JSONPathScript(this="@.x)")),
]
self.assertEqual(
- jsonpath.parse("$.*.a[0]['x'][*, 'y', 1].z[?(@.a == 'b'), 1:][1:5][1,?@.a][(@.x)]"),
+ parse("$.*.a[0]['x'][*, 'y', 1].z[?(@.a == 'b'), 1:][1:5][1,?@.a][(@.x)]"),
exp.JSONPath(expressions=expected_expressions),
)
@@ -36,7 +37,7 @@ class TestJsonpath(unittest.TestCase):
("$[((@.length-1))]", "$[((@.length-1))]"),
):
with self.subTest(f"{selector} -> {expected}"):
- self.assertEqual(jsonpath.parse(selector).sql(), f"'{expected}'")
+ self.assertEqual(parse(selector).sql(), f"'{expected}'")
def test_cts_file(self):
with open(os.path.join(FIXTURES_DIR, "jsonpath", "cts.json")) as file:
@@ -131,9 +132,9 @@ class TestJsonpath(unittest.TestCase):
with self.subTest(f"{selector.strip()} /* {test['name']} */"):
if test.get("invalid_selector"):
try:
- jsonpath.parse(selector)
+ parse(selector)
except (ParseError, TokenError):
pass
else:
- path = jsonpath.parse(selector)
+ path = parse(selector)
self.assertEqual(path.sql(), f"'{overrides.get(selector, selector)}'")
diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py
index 81b9731..857ba1a 100644
--- a/tests/test_optimizer.py
+++ b/tests/test_optimizer.py
@@ -27,11 +27,11 @@ def parse_and_optimize(func, sql, read_dialect, **kwargs):
return func(parse_one(sql, read=read_dialect), **kwargs)
-def qualify_columns(expression, **kwargs):
+def qualify_columns(expression, validate_qualify_columns=True, **kwargs):
expression = optimizer.qualify.qualify(
expression,
infer_schema=True,
- validate_qualify_columns=False,
+ validate_qualify_columns=validate_qualify_columns,
identify=False,
**kwargs,
)
@@ -135,11 +135,17 @@ class TestOptimizer(unittest.TestCase):
continue
dialect = meta.get("dialect")
leave_tables_isolated = meta.get("leave_tables_isolated")
+ validate_qualify_columns = meta.get("validate_qualify_columns")
func_kwargs = {**kwargs}
if leave_tables_isolated is not None:
func_kwargs["leave_tables_isolated"] = string_to_bool(leave_tables_isolated)
+ if validate_qualify_columns is not None:
+ func_kwargs["validate_qualify_columns"] = string_to_bool(
+ validate_qualify_columns
+ )
+
if set_dialect and dialect:
func_kwargs["dialect"] = dialect
@@ -341,6 +347,88 @@ class TestOptimizer(unittest.TestCase):
"WITH tbl1 AS (SELECT STRUCT(1 AS `f0`, 2 AS f1) AS col) SELECT tbl1.col.`f0` AS `f0`, tbl1.col.f1 AS f1 FROM tbl1",
)
+ # can't coalesce USING columns because they don't exist in every already-joined table
+ self.assertEqual(
+ optimizer.qualify_columns.qualify_columns(
+ parse_one(
+ "SELECT id, dt, v FROM (SELECT t1.id, t1.dt, sum(coalesce(t2.v, 0)) AS v FROM t1 AS t1 LEFT JOIN lkp AS lkp USING (id) LEFT JOIN t2 AS t2 USING (other_id, dt, common) WHERE t1.id > 10 GROUP BY 1, 2) AS _q_0",
+ dialect="bigquery",
+ ),
+ schema=MappingSchema(
+ schema={
+ "t1": {"id": "int64", "dt": "date", "common": "int64"},
+ "lkp": {"id": "int64", "other_id": "int64", "common": "int64"},
+ "t2": {"other_id": "int64", "dt": "date", "v": "int64", "common": "int64"},
+ },
+ dialect="bigquery",
+ ),
+ ).sql(dialect="bigquery"),
+ "SELECT _q_0.id AS id, _q_0.dt AS dt, _q_0.v AS v FROM (SELECT t1.id AS id, t1.dt AS dt, sum(coalesce(t2.v, 0)) AS v FROM t1 AS t1 LEFT JOIN lkp AS lkp ON t1.id = lkp.id LEFT JOIN t2 AS t2 ON lkp.other_id = t2.other_id AND t1.dt = t2.dt AND COALESCE(t1.common, lkp.common) = t2.common WHERE t1.id > 10 GROUP BY t1.id, t1.dt) AS _q_0",
+ )
+
+ # Detection of correlation where columns are referenced in derived tables nested within subqueries
+ self.assertEqual(
+ optimizer.qualify.qualify(
+ parse_one(
+ "SELECT a.g FROM a WHERE a.e < (SELECT MAX(u) FROM (SELECT SUM(c.b) AS u FROM c WHERE c.d = f GROUP BY c.e) w)"
+ ),
+ schema={
+ "a": {"g": "INT", "e": "INT", "f": "INT"},
+ "c": {"d": "INT", "e": "INT", "b": "INT"},
+ },
+ quote_identifiers=False,
+ ).sql(),
+ "SELECT a.g AS g FROM a AS a WHERE a.e < (SELECT MAX(w.u) AS _col_0 FROM (SELECT SUM(c.b) AS u FROM c AS c WHERE c.d = a.f GROUP BY c.e) AS w)",
+ )
+
+ # Detection of correlation where columns are referenced in derived tables nested within lateral joins
+ self.assertEqual(
+ optimizer.qualify.qualify(
+ parse_one(
+ "SELECT u.user_id, l.log_date FROM users AS u CROSS JOIN LATERAL (SELECT l1.log_date FROM (SELECT l.log_date FROM logs AS l WHERE l.user_id = u.user_id AND l.log_date <= 100 ORDER BY l.log_date LIMIT 1) AS l1) AS l",
+ dialect="postgres",
+ ),
+ schema={
+ "users": {"user_id": "text", "log_date": "date"},
+ "logs": {"user_id": "text", "log_date": "date"},
+ },
+ quote_identifiers=False,
+ ).sql("postgres"),
+ "SELECT u.user_id AS user_id, l.log_date AS log_date FROM users AS u CROSS JOIN LATERAL (SELECT l1.log_date AS log_date FROM (SELECT l.log_date AS log_date FROM logs AS l WHERE l.user_id = u.user_id AND l.log_date <= 100 ORDER BY l.log_date LIMIT 1) AS l1) AS l",
+ )
+
+ self.assertEqual(
+ optimizer.qualify.qualify(
+ parse_one(
+ "SELECT A.b_id FROM A JOIN B ON A.b_id=B.b_id JOIN C USING(c_id)",
+ dialect="postgres",
+ ),
+ schema={
+ "A": {"b_id": "int"},
+ "B": {"b_id": "int", "c_id": "int"},
+ "C": {"c_id": "int"},
+ },
+ quote_identifiers=False,
+ ).sql("postgres"),
+ "SELECT a.b_id AS b_id FROM a AS a JOIN b AS b ON a.b_id = b.b_id JOIN c AS c ON b.c_id = c.c_id",
+ )
+ self.assertEqual(
+ optimizer.qualify.qualify(
+ parse_one(
+ "SELECT A.b_id FROM A JOIN B ON A.b_id=B.b_id JOIN C ON B.b_id = C.b_id JOIN D USING(d_id)",
+ dialect="postgres",
+ ),
+ schema={
+ "A": {"b_id": "int"},
+ "B": {"b_id": "int", "d_id": "int"},
+ "C": {"b_id": "int"},
+ "D": {"d_id": "int"},
+ },
+ quote_identifiers=False,
+ ).sql("postgres"),
+ "SELECT a.b_id AS b_id FROM a AS a JOIN b AS b ON a.b_id = b.b_id JOIN c AS c ON b.b_id = c.b_id JOIN d AS d ON b.d_id = d.d_id",
+ )
+
self.check_file(
"qualify_columns",
qualify_columns,
@@ -473,15 +561,35 @@ SELECT :with,WITH :expressions,CTE :this,UNION :this,SELECT :expressions,1,:expr
'SELECT "x"."a" + 1 AS "d", "x"."a" + 1 + 1 AS "e" FROM "x" AS "x" WHERE ("x"."a" + 2) > 1 GROUP BY "x"."a" + 1 + 1',
)
+ unused_schema = {"l": {"c": "int"}}
self.assertEqual(
optimizer.qualify_columns.qualify_columns(
parse_one("SELECT CAST(x AS INT) AS y FROM z AS z"),
- schema={"l": {"c": "int"}},
+ schema=unused_schema,
infer_schema=False,
).sql(),
"SELECT CAST(x AS INT) AS y FROM z AS z",
)
+ # BigQuery expands overlapping alias only for GROUP BY + HAVING
+ sql = "WITH data AS (SELECT 1 AS id, 2 AS my_id, 'a' AS name, 'b' AS full_name) SELECT id AS my_id, CONCAT(id, name) AS full_name FROM data WHERE my_id = 1 GROUP BY my_id, full_name HAVING my_id = 1"
+ self.assertEqual(
+ optimizer.qualify_columns.qualify_columns(
+ parse_one(sql, dialect="bigquery"),
+ schema=MappingSchema(schema=unused_schema, dialect="bigquery"),
+ ).sql(),
+ "WITH data AS (SELECT 1 AS id, 2 AS my_id, 'a' AS name, 'b' AS full_name) SELECT data.id AS my_id, CONCAT(data.id, data.name) AS full_name FROM data WHERE data.my_id = 1 GROUP BY data.id, CONCAT(data.id, data.name) HAVING data.id = 1",
+ )
+
+ # Clickhouse expands overlapping alias across the entire query
+ self.assertEqual(
+ optimizer.qualify_columns.qualify_columns(
+ parse_one(sql, dialect="clickhouse"),
+ schema=MappingSchema(schema=unused_schema, dialect="clickhouse"),
+ ).sql(),
+ "WITH data AS (SELECT 1 AS id, 2 AS my_id, 'a' AS name, 'b' AS full_name) SELECT data.id AS my_id, CONCAT(data.id, data.name) AS full_name FROM data WHERE data.id = 1 GROUP BY data.id, CONCAT(data.id, data.name) HAVING data.id = 1",
+ )
+
def test_optimize_joins(self):
self.check_file(
"optimize_joins",
@@ -552,7 +660,7 @@ SELECT
"_q_0"."n_comment" AS "n_comment"
FROM READ_CSV('tests/fixtures/optimizer/tpc-h/nation.csv.gz', 'delimiter', '|') AS "_q_0"
""".strip(),
- optimizer.optimize(expression).sql(pretty=True),
+ optimizer.optimize(expression, infer_csv_schemas=True).sql(pretty=True),
)
def test_scope(self):
@@ -989,31 +1097,14 @@ FROM READ_CSV('tests/fixtures/optimizer/tpc-h/nation.csv.gz', 'delimiter', '|')
concat_expr.right.expressions[0].type.this, exp.DataType.Type.VARCHAR
) # x.cola (arg)
+ # Ensures we don't raise if there are unqualified columns
annotate_types(parse_one("select x from y lateral view explode(y) as x")).expressions[0]
- def test_null_annotation(self):
- expression = annotate_types(parse_one("SELECT NULL + 2 AS col")).expressions[0].this
- self.assertEqual(expression.left.type.this, exp.DataType.Type.NULL)
- self.assertEqual(expression.right.type.this, exp.DataType.Type.INT)
-
- # NULL <op> UNKNOWN should yield NULL
- sql = "SELECT NULL + SOME_ANONYMOUS_FUNC() AS result"
-
- concat_expr_alias = annotate_types(parse_one(sql)).expressions[0]
- self.assertEqual(concat_expr_alias.type.this, exp.DataType.Type.NULL)
-
- concat_expr = concat_expr_alias.this
- self.assertEqual(concat_expr.type.this, exp.DataType.Type.NULL)
- self.assertEqual(concat_expr.left.type.this, exp.DataType.Type.NULL)
- self.assertEqual(concat_expr.right.type.this, exp.DataType.Type.UNKNOWN)
-
- def test_nullable_annotation(self):
- nullable = exp.DataType.build("NULLABLE", expressions=exp.DataType.build("BOOLEAN"))
- expression = annotate_types(parse_one("NULL AND FALSE"))
-
- self.assertEqual(expression.type, nullable)
- self.assertEqual(expression.left.type.this, exp.DataType.Type.NULL)
- self.assertEqual(expression.right.type.this, exp.DataType.Type.BOOLEAN)
+ # NULL <op> UNKNOWN should yield UNKNOWN
+ self.assertEqual(
+ annotate_types(parse_one("SELECT NULL + ANONYMOUS_FUNC()")).expressions[0].type.this,
+ exp.DataType.Type.UNKNOWN,
+ )
def test_predicate_annotation(self):
expression = annotate_types(parse_one("x BETWEEN a AND b"))
@@ -1142,6 +1233,19 @@ FROM READ_CSV('tests/fixtures/optimizer/tpc-h/nation.csv.gz', 'delimiter', '|')
exp.DataType.build("date"),
)
+ self.assertEqual(
+ annotate_types(
+ optimizer.qualify.qualify(
+ parse_one(
+ "SELECT x FROM UNNEST(GENERATE_TIMESTAMP_ARRAY('2016-10-05 00:00:00', '2016-10-06 02:00:00', interval 1 day)) AS x"
+ )
+ )
+ )
+ .selects[0]
+ .type,
+ exp.DataType.build("timestamp"),
+ )
+
def test_map_annotation(self):
# ToMap annotation
expression = annotate_types(parse_one("SELECT MAP {'x': 1}", read="duckdb"))
@@ -1157,6 +1261,26 @@ FROM READ_CSV('tests/fixtures/optimizer/tpc-h/nation.csv.gz', 'delimiter', '|')
expression = annotate_types(parse_one("SELECT MAP('a', 'b')", read="spark"))
self.assertEqual(expression.selects[0].type, exp.DataType.build("MAP(VARCHAR, VARCHAR)"))
+ def test_union_annotation(self):
+ for left, right, expected_type in (
+ ("SELECT 1::INT AS c", "SELECT 2::BIGINT AS c", "BIGINT"),
+ ("SELECT 1 AS c", "SELECT NULL AS c", "INT"),
+ ("SELECT FOO() AS c", "SELECT 1 AS c", "UNKNOWN"),
+ ("SELECT FOO() AS c", "SELECT BAR() AS c", "UNKNOWN"),
+ ):
+ with self.subTest(f"left: {left}, right: {right}, expected: {expected_type}"):
+ lr = annotate_types(parse_one(f"SELECT t.c FROM ({left} UNION ALL {right}) t(c)"))
+ rl = annotate_types(parse_one(f"SELECT t.c FROM ({right} UNION ALL {left}) t(c)"))
+ assert lr.selects[0].type == rl.selects[0].type == exp.DataType.build(expected_type)
+
+ union_by_name = annotate_types(
+ parse_one(
+ "SELECT t.a, t.d FROM (SELECT 1 a, 3 d, UNION ALL BY NAME SELECT 7.0 d, 8::BIGINT a) AS t(a, d)"
+ )
+ )
+ self.assertEqual(union_by_name.selects[0].type.this, exp.DataType.Type.BIGINT)
+ self.assertEqual(union_by_name.selects[1].type.this, exp.DataType.Type.DOUBLE)
+
def test_recursive_cte(self):
query = parse_one(
"""
@@ -1253,3 +1377,26 @@ FROM READ_CSV('tests/fixtures/optimizer/tpc-h/nation.csv.gz', 'delimiter', '|')
self.assertEqual(4, normalization_distance(gen_expr(2), max_=100))
self.assertEqual(18, normalization_distance(gen_expr(3), max_=100))
self.assertEqual(110, normalization_distance(gen_expr(10), max_=100))
+
+ def test_custom_annotators(self):
+ # In Spark hierarchy, SUBSTRING result type is dependent on input expr type
+ for dialect in ("spark2", "spark", "databricks"):
+ for expr_type_pair in (
+ ("col", "STRING"),
+ ("col", "BINARY"),
+ ("'str_literal'", "STRING"),
+ ("CAST('str_literal' AS BINARY)", "BINARY"),
+ ):
+ with self.subTest(
+ f"Testing {dialect}'s SUBSTRING() result type for {expr_type_pair}"
+ ):
+ expr, type = expr_type_pair
+ ast = parse_one(f"SELECT substring({expr}, 2, 3) AS x FROM tbl", read=dialect)
+
+ subst_type = (
+ optimizer.optimize(ast, schema={"tbl": {"col": type}}, dialect=dialect)
+ .expressions[0]
+ .type
+ )
+
+ self.assertEqual(subst_type.sql(dialect), exp.DataType.build(type).sql(dialect))
diff --git a/tests/test_parser.py b/tests/test_parser.py
index d6849c3..9ff8373 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -14,6 +14,8 @@ class TestParser(unittest.TestCase):
parse_one("")
def test_parse_into(self):
+ self.assertIsInstance(parse_one("select * from t", into=exp.Select), exp.Select)
+ self.assertIsInstance(parse_one("select * from t limit 5", into=exp.Select), exp.Select)
self.assertIsInstance(parse_one("left join foo", into=exp.Join), exp.Join)
self.assertIsInstance(parse_one("int", into=exp.DataType), exp.DataType)
self.assertIsInstance(parse_one("array<int>", into=exp.DataType), exp.DataType)
@@ -102,6 +104,13 @@ class TestParser(unittest.TestCase):
def test_float(self):
self.assertEqual(parse_one(".2"), parse_one("0.2"))
+ def test_unnest(self):
+ unnest_sql = "UNNEST(foo)"
+ expr = parse_one(unnest_sql)
+ self.assertIsInstance(expr, exp.Unnest)
+ self.assertIsInstance(expr.expressions, list)
+ self.assertEqual(expr.sql(), unnest_sql)
+
def test_unnest_projection(self):
expr = parse_one("SELECT foo IN UNNEST(bla) AS bar")
self.assertIsInstance(expr.selects[0], exp.Alias)
@@ -570,12 +579,6 @@ class TestParser(unittest.TestCase):
logger,
)
- def test_rename_table(self):
- self.assertEqual(
- parse_one("ALTER TABLE foo RENAME TO bar").sql(),
- "ALTER TABLE foo RENAME TO bar",
- )
-
def test_pivot_columns(self):
nothing_aliased = """
SELECT * FROM (
@@ -696,77 +699,19 @@ class TestParser(unittest.TestCase):
def test_parse_nested(self):
now = time.time()
- query = parse_one(
- """
- SELECT *
- FROM a
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- LEFT JOIN b ON a.id = b.id
- """
- )
-
+ query = parse_one("SELECT * FROM a " + ("LEFT JOIN b ON a.id = b.id " * 38))
self.assertIsNotNone(query)
+ self.assertLessEqual(time.time() - now, 0.1)
- query = parse_one(
- """
- SELECT *
- FROM a
- LEFT JOIN UNNEST(ARRAY[])
- LEFT JOIN UNNEST(ARRAY[])
- LEFT JOIN UNNEST(ARRAY[])
- LEFT JOIN UNNEST(ARRAY[])
- LEFT JOIN UNNEST(ARRAY[])
- LEFT JOIN UNNEST(ARRAY[])
- LEFT JOIN UNNEST(ARRAY[])
- LEFT JOIN UNNEST(ARRAY[])
- LEFT JOIN UNNEST(ARRAY[])
- LEFT JOIN UNNEST(ARRAY[])
- LEFT JOIN UNNEST(ARRAY[])
- LEFT JOIN UNNEST(ARRAY[])
- LEFT JOIN UNNEST(ARRAY[])
- LEFT JOIN UNNEST(ARRAY[])
- LEFT JOIN UNNEST(ARRAY[])
- """
- )
+ now = time.time()
+ query = parse_one("SELECT * FROM a " + ("LEFT JOIN UNNEST(ARRAY[]) " * 15))
+ self.assertIsNotNone(query)
+ self.assertLessEqual(time.time() - now, 0.1)
+ now = time.time()
+ query = parse_one("SELECT * FROM a " + ("OUTER APPLY (SELECT * FROM b) " * 30))
self.assertIsNotNone(query)
- self.assertLessEqual(time.time() - now, 0.2)
+ self.assertLessEqual(time.time() - now, 0.1)
def test_parse_properties(self):
self.assertEqual(
@@ -894,3 +839,29 @@ class TestParser(unittest.TestCase):
def test_parse_prop_eq(self):
self.assertIsInstance(parse_one("x(a := b and c)").expressions[0], exp.PropertyEQ)
+
+ def test_collate(self):
+ collates = [
+ ('pg_catalog."default"', exp.Column),
+ ('"en_DE"', exp.Identifier),
+ ("LATIN1_GENERAL_BIN", exp.Var),
+ ("'en'", exp.Literal),
+ ]
+
+ for collate_pair in collates:
+ collate_node = parse_one(
+ f"""SELECT * FROM t WHERE foo LIKE '%bar%' COLLATE {collate_pair[0]}"""
+ ).find(exp.Collate)
+ self.assertIsInstance(collate_node, exp.Collate)
+ self.assertIsInstance(collate_node.expression, collate_pair[1])
+
+ def test_odbc_date_literals(self):
+ for value, cls in [
+ ("{d'2024-01-01'}", exp.Date),
+ ("{t'12:00:00'}", exp.Time),
+ ("{ts'2024-01-01 12:00:00'}", exp.Timestamp),
+ ]:
+ sql = f"INSERT INTO tab(ds) VALUES ({value})"
+ expr = parse_one(sql)
+ self.assertIsInstance(expr, exp.Insert)
+ self.assertIsInstance(expr.expression.expressions[0].expressions[0], cls)
diff --git a/tests/test_schema.py b/tests/test_schema.py
index 21b59fd..83cbbd8 100644
--- a/tests/test_schema.py
+++ b/tests/test_schema.py
@@ -202,11 +202,11 @@ class TestSchema(unittest.TestCase):
dialect="clickhouse",
)
- table_z = exp.table_("z", db="y", catalog="x")
+ table_z = exp.table_("Z", db="y", catalog="x")
table_w = exp.table_("w", db="y", catalog="x")
self.assertEqual(schema.column_names(table_z), ["a", "B"])
- self.assertEqual(schema.column_names(table_w), ["c"])
+ self.assertEqual(schema.column_names(table_w), ["C"])
schema = MappingSchema(schema={"x": {"`y`": "INT"}}, dialect="clickhouse")
self.assertEqual(schema.column_names(exp.table_("x")), ["y"])
diff --git a/tests/test_time.py b/tests/test_time.py
index bd0e63f..62d4361 100644
--- a/tests/test_time.py
+++ b/tests/test_time.py
@@ -1,6 +1,7 @@
import unittest
+import sys
-from sqlglot.time import format_time
+from sqlglot.time import format_time, subsecond_precision
class TestTime(unittest.TestCase):
@@ -12,3 +13,22 @@ class TestTime(unittest.TestCase):
self.assertEqual(format_time("aa", mapping), "c")
self.assertEqual(format_time("aaada", mapping), "cbdb")
self.assertEqual(format_time("da", mapping), "db")
+
+ def test_subsecond_precision(self):
+ self.assertEqual(6, subsecond_precision("2023-01-01 12:13:14.123456+00:00"))
+ self.assertEqual(3, subsecond_precision("2023-01-01 12:13:14.123+00:00"))
+ self.assertEqual(0, subsecond_precision("2023-01-01 12:13:14+00:00"))
+ self.assertEqual(0, subsecond_precision("2023-01-01 12:13:14"))
+ self.assertEqual(0, subsecond_precision("garbage"))
+
+ @unittest.skipUnless(
+ sys.version_info >= (3, 11),
+ "Python 3.11 relaxed datetime.fromisoformat() parsing with regards to microseconds",
+ )
+ def test_subsecond_precision_python311(self):
+ # ref: https://docs.python.org/3/whatsnew/3.11.html#datetime
+ self.assertEqual(6, subsecond_precision("2023-01-01 12:13:14.123456789+00:00"))
+ self.assertEqual(6, subsecond_precision("2023-01-01 12:13:14.12345+00:00"))
+ self.assertEqual(6, subsecond_precision("2023-01-01 12:13:14.1234+00:00"))
+ self.assertEqual(3, subsecond_precision("2023-01-01 12:13:14.12+00:00"))
+ self.assertEqual(3, subsecond_precision("2023-01-01 12:13:14.1+00:00"))
diff --git a/tests/test_transforms.py b/tests/test_transforms.py
index 73d6705..e7d596c 100644
--- a/tests/test_transforms.py
+++ b/tests/test_transforms.py
@@ -3,6 +3,7 @@ import unittest
from sqlglot import parse_one
from sqlglot.transforms import (
eliminate_distinct_on,
+ eliminate_join_marks,
eliminate_qualify,
remove_precision_parameterized_types,
unalias_group,
@@ -12,9 +13,11 @@ from sqlglot.transforms import (
class TestTransforms(unittest.TestCase):
maxDiff = None
- def validate(self, transform, sql, target):
- with self.subTest(sql):
- self.assertEqual(parse_one(sql).transform(transform).sql(), target)
+ def validate(self, transform, sql, target, dialect=None):
+ with self.subTest(f"{dialect} - {sql}"):
+ self.assertEqual(
+ parse_one(sql, dialect=dialect).transform(transform).sql(dialect=dialect), target
+ )
def test_unalias_group(self):
self.validate(
@@ -138,3 +141,76 @@ class TestTransforms(unittest.TestCase):
"SELECT CAST(1 AS DECIMAL(10, 2)), CAST('13' AS VARCHAR(10))",
"SELECT CAST(1 AS DECIMAL), CAST('13' AS VARCHAR)",
)
+
+ def test_eliminate_join_marks(self):
+ for dialect in ("oracle", "redshift"):
+ self.validate(
+ eliminate_join_marks,
+ "SELECT T1.d, T2.c FROM T1, T2 WHERE T1.x = T2.x (+) and T2.y (+) > 5",
+ "SELECT T1.d, T2.c FROM T1 LEFT JOIN T2 ON T1.x = T2.x AND T2.y > 5",
+ dialect,
+ )
+ self.validate(
+ eliminate_join_marks,
+ "SELECT T1.d, T2.c FROM T1, T2 WHERE T1.x (+) = T2.x and T2.y > 5",
+ "SELECT T1.d, T2.c FROM T2 LEFT JOIN T1 ON T1.x = T2.x WHERE T2.y > 5",
+ dialect,
+ )
+ self.validate(
+ eliminate_join_marks,
+ "SELECT T1.d, T2.c FROM T1, T2 WHERE T1.x = T2.x (+) and T2.y (+) IS NULL",
+ "SELECT T1.d, T2.c FROM T1 LEFT JOIN T2 ON T1.x = T2.x AND T2.y IS NULL",
+ dialect,
+ )
+ self.validate(
+ eliminate_join_marks,
+ "SELECT T1.d, T2.c FROM T1, T2 WHERE T1.x = T2.x (+) and T2.y IS NULL",
+ "SELECT T1.d, T2.c FROM T1 LEFT JOIN T2 ON T1.x = T2.x WHERE T2.y IS NULL",
+ dialect,
+ )
+ self.validate(
+ eliminate_join_marks,
+ "SELECT T1.d, T2.c FROM T1, T2 WHERE T1.x = T2.x (+) and T1.Z > 4",
+ "SELECT T1.d, T2.c FROM T1 LEFT JOIN T2 ON T1.x = T2.x WHERE T1.Z > 4",
+ dialect,
+ )
+ self.validate(
+ eliminate_join_marks,
+ "SELECT * FROM table1, table2 WHERE table1.col = table2.col(+)",
+ "SELECT * FROM table1 LEFT JOIN table2 ON table1.col = table2.col",
+ dialect,
+ )
+ self.validate(
+ eliminate_join_marks,
+ "SELECT * FROM table1, table2, table3, table4 WHERE table1.col = table2.col(+) and table2.col >= table3.col(+) and table1.col = table4.col(+)",
+ "SELECT * FROM table1 LEFT JOIN table2 ON table1.col = table2.col LEFT JOIN table3 ON table2.col >= table3.col LEFT JOIN table4 ON table1.col = table4.col",
+ dialect,
+ )
+ self.validate(
+ eliminate_join_marks,
+ "SELECT * FROM table1, table2, table3 WHERE table1.col = table2.col(+) and table2.col >= table3.col(+)",
+ "SELECT * FROM table1 LEFT JOIN table2 ON table1.col = table2.col LEFT JOIN table3 ON table2.col >= table3.col",
+ dialect,
+ )
+ # 2 join marks on one side of predicate
+ self.validate(
+ eliminate_join_marks,
+ "SELECT * FROM table1, table2 WHERE table1.col = table2.col1(+) + table2.col2(+)",
+ "SELECT * FROM table1 LEFT JOIN table2 ON table1.col = table2.col1 + table2.col2",
+ dialect,
+ )
+ # join mark and expression
+ self.validate(
+ eliminate_join_marks,
+ "SELECT * FROM table1, table2 WHERE table1.col = table2.col1(+) + 25",
+ "SELECT * FROM table1 LEFT JOIN table2 ON table1.col = table2.col1 + 25",
+ dialect,
+ )
+
+ alias = "AS " if dialect != "oracle" else ""
+ self.validate(
+ eliminate_join_marks,
+ "SELECT table1.id, table2.cloumn1, table3.id FROM table1, table2, (SELECT tableInner1.id FROM tableInner1, tableInner2 WHERE tableInner1.id = tableInner2.id(+)) AS table3 WHERE table1.id = table2.id(+) and table1.id = table3.id(+)",
+ f"SELECT table1.id, table2.cloumn1, table3.id FROM table1 LEFT JOIN table2 ON table1.id = table2.id LEFT JOIN (SELECT tableInner1.id FROM tableInner1 LEFT JOIN tableInner2 ON tableInner1.id = tableInner2.id) {alias}table3 ON table1.id = table3.id",
+ dialect,
+ )
diff --git a/tests/test_transpile.py b/tests/test_transpile.py
index dea9985..07a915d 100644
--- a/tests/test_transpile.py
+++ b/tests/test_transpile.py
@@ -112,6 +112,10 @@ class TestTranspile(unittest.TestCase):
def test_comments(self):
self.validate(
+ "select /* asfd /* asdf */ asdf */ 1",
+ "/* asfd /* asdf */ asdf */ SELECT 1",
+ )
+ self.validate(
"SELECT c /* foo */ AS alias",
"SELECT c AS alias /* foo */",
)
@@ -552,11 +556,15 @@ FROM x""",
)
self.validate(
- """SELECT X FROM catalog.db.table WHERE Y
+ """SELECT X FROM catalog.db.table WHERE Y
--
AND Z""",
"""SELECT X FROM catalog.db.table WHERE Y AND Z""",
)
+ self.validate(
+ """with a as /* comment */ ( select * from b) select * from a""",
+ """WITH a AS (SELECT * FROM b) /* comment */ SELECT * FROM a""",
+ )
def test_types(self):
self.validate("INT 1", "CAST(1 AS INT)")
@@ -585,24 +593,24 @@ FROM x""",
def test_extract(self):
self.validate(
"EXTRACT(day FROM '2020-01-01'::TIMESTAMP)",
- "EXTRACT(day FROM CAST('2020-01-01' AS TIMESTAMP))",
+ "EXTRACT(DAY FROM CAST('2020-01-01' AS TIMESTAMP))",
)
self.validate(
"EXTRACT(timezone FROM '2020-01-01'::TIMESTAMP)",
- "EXTRACT(timezone FROM CAST('2020-01-01' AS TIMESTAMP))",
+ "EXTRACT(TIMEZONE FROM CAST('2020-01-01' AS TIMESTAMP))",
)
self.validate(
"EXTRACT(year FROM '2020-01-01'::TIMESTAMP WITH TIME ZONE)",
- "EXTRACT(year FROM CAST('2020-01-01' AS TIMESTAMPTZ))",
+ "EXTRACT(YEAR FROM CAST('2020-01-01' AS TIMESTAMPTZ))",
)
self.validate(
"extract(month from '2021-01-31'::timestamp without time zone)",
- "EXTRACT(month FROM CAST('2021-01-31' AS TIMESTAMP))",
+ "EXTRACT(MONTH FROM CAST('2021-01-31' AS TIMESTAMP))",
)
- self.validate("extract(week from current_date + 2)", "EXTRACT(week FROM CURRENT_DATE + 2)")
+ self.validate("extract(week from current_date + 2)", "EXTRACT(WEEK FROM CURRENT_DATE + 2)")
self.validate(
"EXTRACT(minute FROM datetime1 - datetime2)",
- "EXTRACT(minute FROM datetime1 - datetime2)",
+ "EXTRACT(MINUTE FROM datetime1 - datetime2)",
)
def test_if(self):
@@ -725,6 +733,11 @@ FROM x""",
self.validate("TIME_TO_STR(x, 'y')", "DATE_FORMAT(x, 'y')", write="hive")
self.validate("TIME_STR_TO_TIME(x)", "TIME_STR_TO_TIME(x)", write=None)
+ self.validate(
+ "TIME_STR_TO_TIME(x, 'America/Los_Angeles')",
+ "TIME_STR_TO_TIME(x, 'America/Los_Angeles')",
+ write=None,
+ )
self.validate("TIME_STR_TO_UNIX(x)", "TIME_STR_TO_UNIX(x)", write=None)
self.validate("TIME_TO_TIME_STR(x)", "CAST(x AS TEXT)", write=None)
self.validate("TIME_TO_STR(x, 'y')", "TIME_TO_STR(x, 'y')", write=None)
@@ -756,7 +769,7 @@ FROM x""",
self.validate("STR_TO_TIME('x', 'y')", "DATE_PARSE('x', 'y')", write="presto")
self.validate(
"STR_TO_UNIX('x', 'y')",
- "TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST('x' AS VARCHAR), 'y')), PARSE_DATETIME(CAST('x' AS VARCHAR), 'y')))",
+ "TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST('x' AS VARCHAR), 'y')), PARSE_DATETIME(DATE_FORMAT(CAST('x' AS TIMESTAMP), 'y'), 'y')))",
write="presto",
)
self.validate("TIME_TO_STR(x, 'y')", "DATE_FORMAT(x, 'y')", write="presto")
@@ -807,10 +820,10 @@ FROM x""",
self.assertEqual(
cm.output,
[
- "WARNING:sqlglot:Applying array index offset (1)",
- "WARNING:sqlglot:Applying array index offset (-1)",
- "WARNING:sqlglot:Applying array index offset (1)",
- "WARNING:sqlglot:Applying array index offset (1)",
+ "INFO:sqlglot:Applying array index offset (1)",
+ "INFO:sqlglot:Applying array index offset (-1)",
+ "INFO:sqlglot:Applying array index offset (1)",
+ "INFO:sqlglot:Applying array index offset (1)",
],
)
@@ -837,7 +850,6 @@ FROM x""",
"ALTER TABLE table1 RENAME COLUMN c1 TO c2, c2 TO c3",
"ALTER TABLE table1 RENAME COLUMN c1 c2",
"ALTER TYPE electronic_mail RENAME TO email",
- "ALTER VIEW foo ALTER COLUMN bla SET DEFAULT 'NOT SET'",
"ALTER schema doo",
"ANALYZE a.y",
"CALL catalog.system.iceberg_procedure_name(named_arg_1 => 'arg_1', named_arg_2 => 'arg_2')",
@@ -845,7 +857,6 @@ FROM x""",
"CREATE OR REPLACE STAGE",
"EXECUTE statement",
"EXPLAIN SELECT * FROM x",
- "GRANT INSERT ON foo TO bla",
"LOAD foo",
"OPTIMIZE TABLE y",
"PREPARE statement",